Aaron: 1, GenomeLoc: 0. I changed our GenomeLoc class, seperating the creation of a genome loc (with the reference setup) to a parser class. GenomeLoc now just represents the actual genomic postion. The constructors are now package-protected (to enforce using the parser), but we may want to expose some constructors in the future.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1069 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-22 14:39:41 +00:00
parent 26eb362f52
commit bcb64d92e9
53 changed files with 931 additions and 658 deletions

View File

@ -101,7 +101,7 @@ public class GenomeAnalysisEngine {
// Prepare the sort ordering w.r.t. the sequence dictionary
if (argCollection.referenceFile != null) {
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
GenomeLoc.setupRefContigOrdering(refFile);
GenomeLocParser.setupRefContigOrdering(refFile);
}
// Determine the validation stringency. Default to ValidationStringency.STRICT.
@ -145,7 +145,7 @@ public class GenomeAnalysisEngine {
// Prepare the sort ordering w.r.t. the sequence dictionary
if (argCollection.referenceFile != null) {
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
GenomeLoc.setupRefContigOrdering(refFile);
GenomeLocParser.setupRefContigOrdering(refFile);
}
// Determine the validation stringency. Default to ValidationStringency.STRICT.
@ -222,10 +222,10 @@ public class GenomeAnalysisEngine {
if ( intervalsString != null) {
if (new File(intervalsString).exists()) {
if (! quiet) logger.info("Intervals argument specifies a file. Loading intervals from file.");
locs = GenomeLoc.IntervalFileToList(intervalsString);
locs = GenomeLocParser.intervalFileToList(intervalsString);
} else {
if (! quiet) logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string.");
locs = GenomeLoc.parseGenomeLocs(intervalsString);
locs = GenomeLocParser.parseGenomeLocs(intervalsString);
}
}
return locs;

View File

@ -5,6 +5,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Iterator;
/**
@ -63,7 +64,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
LocusShardStrategy( SAMSequenceDictionary dic ) {
this.dic = dic;
limitingFactor = -1;
mLoc = new GenomeLoc(0, 0, 0);
mLoc = GenomeLocParser.createGenomeLoc(0, 0, 0);
if (dic.getSequences().size() > 0) {
nextContig = true;
}
@ -98,7 +99,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
throw new IllegalArgumentException("Interval files must contain at least one interval");
}
GenomeLoc loc = intervals.iterator().next();
mLoc = new GenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1);
mLoc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1);
if (dic.getSequences().size() > 0) {
nextContig = true;
}
@ -174,7 +175,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
intervals.removeRegion(loc);
return new IntervalShard(loc);
} else {
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
GenomeLoc subLoc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
intervals.removeRegion(subLoc);
return new IntervalShard(subLoc);
}
@ -193,7 +194,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
// can we fit it into the current seq size?
if (nextStart + proposedSize - 1 < length) {
lastGenomeLocSize = proposedSize;
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
return LocusShard.toShard(mLoc);
}
// else we can't make it in the current location, we have to stitch one together
@ -207,7 +208,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
// move to the next contig
// the next sequence should start at the begining of the next contig
Shard ret = LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1));
Shard ret = LocusShard.toShard(GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1));
// now jump ahead to the next contig
jumpContig();
@ -226,7 +227,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
return;
}
logger.debug("Next contig, index = " + dic.getSequence(seqLoc).getSequenceIndex());
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0);
mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0);
}

View File

@ -14,6 +14,7 @@ import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
import java.util.ArrayList;
@ -184,7 +185,7 @@ public class SAMDataSource implements SimpleDataSource {
if (!intoUnmappedReads) {
if (lastReadPos == null) {
lastReadPos = new GenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
iter = iteratorPool.iterator(lastReadPos);
return InitialReadIterator(shard.getSize(), iter);
} else {
@ -280,7 +281,8 @@ public class SAMDataSource implements SimpleDataSource {
++x;
} else {
// jump contigs
if (lastReadPos.toNextContig() == false) {
lastReadPos = GenomeLocParser.toNextContig(lastReadPos);
if (lastReadPos == null) {
// check to see if we're using unmapped reads, if not return, we're done
readsTaken = 0;
intoUnmappedReads = true;
@ -304,7 +306,7 @@ public class SAMDataSource implements SimpleDataSource {
else if (rec != null) {
int stopPos = rec.getAlignmentStart();
if (stopPos < lastReadPos.getStart()) {
lastReadPos = new GenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
lastReadPos = GenomeLocParser.createGenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
} else {
lastReadPos.setStart(rec.getAlignmentStart());
}

View File

@ -18,9 +18,9 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.io.File;
@ -202,7 +202,7 @@ public abstract class MicroScheduler {
catch (FileNotFoundException ex) {
throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex);
}
GenomeLoc.setupRefContigOrdering(ref);
GenomeLocParser.setupRefContigOrdering(ref);
return ref;
}
}

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.iterators;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.NoSuchElementException;
/**
@ -39,7 +39,7 @@ public class GenomeLocusIterator implements LocusIterator {
*/
public GenomeLocusIterator( GenomeLoc completeLocus ) {
this.completeLocus = completeLocus;
this.currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart());
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
}
/**

View File

@ -1,15 +1,16 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.AlignmentBlock;
import org.broadinstitute.sting.utils.*;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.RefHanger;
import org.broadinstitute.sting.utils.Utils;
import java.util.Iterator;
import org.broadinstitute.sting.utils.RefHanger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.apache.log4j.Logger;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
*/
@ -103,12 +104,12 @@ public class LocusContextIteratorByHanger extends LocusContextIterator {
}
protected void hangRead(final SAMRecord read) {
GenomeLoc readLoc = new GenomeLoc(read);
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
for ( AlignmentBlock block : read.getAlignmentBlocks() ) {
if ( DEBUG ) logger.debug(String.format("Processing block %s len=%d", block, block.getLength()));
for ( int i = 0; i < block.getLength(); i++ ) {
GenomeLoc offset = new GenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
GenomeLoc offset = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
readHanger.expandingPut(offset, read);
offsetHanger.expandingPut(offset, block.getReadStart() + i - 1);
if ( DEBUG ) logger.debug(String.format(" # Added %s", offset));
@ -134,7 +135,7 @@ public class LocusContextIteratorByHanger extends LocusContextIterator {
return true;
else {
final SAMRecord read = it.peek();
GenomeLoc readLoc = new GenomeLoc(read);
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
final boolean coveredP = currentPositionIsFullyCovered(readLoc);
//System.out.printf("CoverP = %s => %b%n", readLoc, coveredP);
return coveredP;
@ -161,7 +162,7 @@ public class LocusContextIteratorByHanger extends LocusContextIterator {
SAMRecord read = it.next();
justCleared = false;
GenomeLoc readLoc = new GenomeLoc(read);
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
if ( DEBUG ) {
logger.debug(String.format(" Expanding window sizes %d with %d : left=%s, right=%s, readLoc = %s, cmp=%d",
readHanger.size(), incrementSize,

View File

@ -5,6 +5,7 @@ import net.sf.samtools.util.RuntimeIOException;
import net.sf.samtools.util.StringUtil;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import java.util.Iterator;
@ -65,7 +66,7 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
}
public GenomeLoc getLocation() {
return new GenomeLoc(getCurrentContig().getName(), getPosition());
return GenomeLocParser.createGenomeLoc(getCurrentContig().getName(), getPosition());
}
// --------------------------------------------------------------------------------------------------------------
@ -137,11 +138,11 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
if (seekOffset < offset ) {
// bad boy -- can't go backward safely
throw new IllegalArgumentException(String.format("Invalid seek %s => %s, which is usually due to out of order reads%n",
new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset)));
GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset)));
} else if (seekOffset >= currentContig.length()) {
// bad boy -- can't go beyond the contig length
throw new IllegalArgumentException(String.format("Invalid seek to %s, which is beyond the end of the contig%n",
new GenomeLoc(currentContig.getName(), seekOffset + 1)));
GenomeLocParser.createGenomeLoc(currentContig.getName(), seekOffset + 1)));
} else {
offset = seekOffset - 1;
return next();
@ -160,15 +161,16 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
if (DEBUG)
logger.debug(String.format(" -> Seeking to %s %d from %s %d%n", seekContigName, seekOffset, currentContig.getName(), offset));
int cmpContigs = GenomeLoc.compareContigs(seekContigName, currentContig.getName());
int cmpContigs = GenomeLocParser.compareContigs(seekContigName,currentContig.getName());
if ( cmpContigs < 0 && GenomeLoc.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late!
if ( cmpContigs < 0 && GenomeLocParser.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late!
// The contig we are looking for is before the currentContig -- it's an error
throw new IllegalArgumentException(String.format("Invalid seek %s => %s, contigs/sequences are out of order%n",
new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset)));
GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset)));
}
if ( cmpContigs > 0 || (! GenomeLoc.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek
if ( cmpContigs > 0 || (! GenomeLocParser.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek
// then try to seek forward in the reference file until we get the contig we need
if (DEBUG)
logger.debug(String.format(" -> Seeking in the fasta file to %s from %s%n", seekContigName, currentContig.getName()));
@ -176,7 +178,7 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
if (!refFile.seekToContig(seekContigName)) { // ok, do the seek
// a false result indicates a failure, throw a somewhat cryptic call
throw new RuntimeIOException(String.format("Unexpected seek failure from %s to %s%n",
new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset)));
GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset)));
}
readNextContig(); // since we haven't failed, we just read in the next contig (which is seekContigName)

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.RuntimeIOException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.Reads;
import java.util.Iterator;
@ -63,8 +64,8 @@ public class VerifyingSamIterator implements StingSAMIterator {
if ( last == null || cur.getReadUnmappedFlag() )
return false;
else {
GenomeLoc lastLoc = new GenomeLoc( last );
GenomeLoc curLoc = new GenomeLoc( cur );
GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last );
GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur );
return curLoc.compareTo(lastLoc) == -1;
}
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.List;
import java.util.Arrays;
@ -79,7 +80,7 @@ public class HapMapAlleleFrequenciesROD extends BasicReferenceOrderedDatum {
varFreq = Double.parseDouble(parts[11]); // CEU_var_freq
totalCounts = Integer.parseInt(parts[12]); // CEU_var
loc = GenomeLoc.parseGenomeLoc(contig, start, stop);
loc = GenomeLocParser.parseGenomeLoc(contig, start, stop);
} catch ( RuntimeException e ) {
System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts));

View File

@ -1,19 +1,9 @@
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom
@ -28,7 +18,7 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
}
public GenomeLoc getLocation() {
loc = new GenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
return loc;
}
public String getRefBasesFWD() { return this.get("2"); }

View File

@ -6,10 +6,7 @@ import java.util.*;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.Pileup;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.*;
import net.sf.picard.reference.ReferenceSequenceFileWalker;
@ -97,8 +94,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
if ( refBaseChar == '*' ) {
parseIndels(observedString) ;
if ( varType == DELETION_VARIANT ) loc = new GenomeLoc(contig, start, start+eventLength-1);
else loc = new GenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.createGenomeLoc(contig, start, start+eventLength-1);
else loc = GenomeLocParser.createGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
} else {
parseBasesAndQuals(line,pos[7]+1,pos[8], pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) );
// parseBasesAndQuals(line.substring(pos[7]+1,pos[8]), line.substring(pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) ) );
@ -108,7 +105,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
refBases = line.substring(pos[1]+1, pos[2]).toUpperCase();
eventLength = 1;
//loc = new GenomeLoc(contig, start, start+1);
loc = new GenomeLoc(contig, start, start);
loc = GenomeLocParser.createGenomeLoc(contig, start, start);
char ch = observedString.charAt(0);
@ -170,8 +167,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
if ( refBaseChar == '*' ) {
parseIndels(parts[3]) ;
if ( varType == DELETION_VARIANT ) loc = GenomeLoc.parseGenomeLoc(contig, start, start+eventLength-1);
else loc = GenomeLoc.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.parseGenomeLoc(contig, start, start+eventLength-1);
else loc = GenomeLocParser.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
}
else {
parseBasesAndQuals(parts[8], parts[9]);
@ -181,7 +178,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
refBases = parts[2].toUpperCase();
eventLength = 1;
//loc = GenomeLoc.parseGenomeLoc(contig, start, start+1);
loc = GenomeLoc.parseGenomeLoc(contig, start, start);
loc = GenomeLocParser.parseGenomeLoc(contig, start, start);
char ch = parts[3].charAt(0);
@ -623,7 +620,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
System.exit(1);
}
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
int counter = 0;

View File

@ -1,17 +1,16 @@
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.apache.log4j.Logger;
/**
@ -201,7 +200,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
String s = get(header.get(0));
if ( s == null )
return null;
return GenomeLoc.parseGenomeLoc(s);
return GenomeLocParser.parseGenomeLoc(s);
}
public ArrayList<String> getHeader() {

View File

@ -5,6 +5,7 @@ import java.util.List;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
/** Holds a single transcript annotation: refseq id, gene name, genomic locations of the locus, of the coding region
* and of all the exons.
@ -75,8 +76,8 @@ public class Transcript {
else throw new StingException("Expected strand symbol (+/-), found: "+fields[3]);
String contig_name = fields[2];
transcript_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
transcript_coding_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
transcript_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
transcript_coding_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
gene_name = fields[12];
String[] exon_starts = fields[9].split(",");
String[] exon_stops = fields[10].split(",");
@ -89,7 +90,7 @@ public class Transcript {
exon_frames = new ArrayList<Integer>(eframes.length);
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
exons.add(GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
exon_frames.add(Integer.decode(eframes[i]));
}
}

View File

@ -4,10 +4,10 @@ import net.sf.picard.util.SequenceUtil;
import java.util.*;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.MalformedGenomeLocException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
/**
@ -162,7 +162,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
String contig = parts[1];
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
loc = GenomeLoc.parseGenomeLoc(contig, start, stop-1);
loc = GenomeLocParser.parseGenomeLoc(contig, start, stop-1);
name = parts[4];
refBases = parts[5];

View File

@ -1,15 +1,15 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import java.util.HashMap;
import java.util.Scanner;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
/**
* Class for representing arbitrary reference ordered data sets
*
@ -73,7 +73,7 @@ public class rodGFF extends BasicReferenceOrderedDatum {
}
public GenomeLoc getLocation() {
return GenomeLoc.parseGenomeLoc(contig, start, stop);
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
}
public String getAttribute(final String key) {

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.Transcript;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.xReadLines;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class rodRefSeq extends BasicReferenceOrderedDatum {
@ -178,8 +179,8 @@ class refSeqIterator implements Iterator<rodRefSeq> {
while ( reader.hasNext() ) {
Transcript t = reader.peek();
int ci1 = GenomeLoc.getContigIndex(curr_contig_name);
int ci2 = GenomeLoc.getContigIndex( t.getLocation().getContig() );
int ci1 = GenomeLocParser.getContigIndex(curr_contig_name);
int ci2 = GenomeLocParser.getContigIndex( t.getLocation().getContig() );
if ( ci1 > ci2 ) throw new StingException("RefSeq track seems to be not contig-ordered");
if ( ci1 < ci2 ) break; // next transcript is on the next contig, we do not need it yet...
if ( t.getLocation().getStart() > curr_position ) break; // next transcript is on the same contig but starts after the current position; we are done
@ -192,7 +193,7 @@ class refSeqIterator implements Iterator<rodRefSeq> {
// 'records' and current position are fully updated. We can now create new rod and return it (NOTE: this iterator will break if the list
// of pre-loaded records is meddled with by the clients between iterations, so we return them as unmodifiable list)
rodRefSeq rod = new rodRefSeq(name,GenomeLoc.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records));
rodRefSeq rod = new rodRefSeq(name, GenomeLocParser.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records));
// if ( (++z) % 1000000 == 0 ) {
// System.out.println(rod.getLocation()+": holding "+records.size()+ "; time per 1M ref positions: "+((double)(System.currentTimeMillis()-t)/1000.0)+" s");
// z = 0;

View File

@ -7,6 +7,7 @@ import java.util.*;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import net.sf.picard.reference.ReferenceSequenceFileWalker;
@ -192,7 +193,7 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
System.exit(1);
}
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
int counter = 0;

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.IOException;
@ -22,7 +23,7 @@ public class rodVariants extends BasicReferenceOrderedDatum {
public boolean parseLine(Object header, String[] parts) throws IOException {
if (!parts[0].startsWith("#")) {
loc = new GenomeLoc(parts[0], Long.valueOf(parts[1]));
loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]));
refBase = parts[2].charAt(0);
depth = Integer.valueOf(parts[3]);
maxMappingQuality = Integer.valueOf(parts[4]);

View File

@ -443,7 +443,7 @@ public abstract class TraversalEngine {
//this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
this.refFile = new FastaSequenceFile2(refFileName); // todo: replace when FastaSequenceFile2 is in picard
this.refIter = new ReferenceIterator(this.refFile);
if (!GenomeLoc.setupRefContigOrdering(this.refFile)) {
if (!GenomeLocParser.setupRefContigOrdering(this.refFile)) {
// We couldn't process the reference contig ordering, fail since we need it
Utils.scareUser(String.format("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. Please use /seq/software/picard/current/bin/CreateSequenceDictionary.jar to create a sequence dictionary for your file", refFileName));
}
@ -470,7 +470,7 @@ public abstract class TraversalEngine {
while (true) {
ReferenceSequence ref = refFile.nextSequence();
logger.debug(String.format("%s %d %d", ref.getName(), ref.length(), System.currentTimeMillis()));
printProgress(true, "loci", new GenomeLoc("foo", 1));
printProgress(true, "loci", GenomeLocParser.createGenomeLoc("foo", 1));
}
}

View File

@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.*;
import java.io.File;
@ -141,7 +142,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
walker.nonIntervalReadAction(read);
}
else {
GenomeLoc loc = new GenomeLoc(read);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
// if we're in the current interval, add it to the list
if ( currentInterval.overlapsP(loc) ) {
intervalReads.add(read);
@ -243,7 +244,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
}
}
GenomeLoc window = new GenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
LocusContext locus = new LocusContext(window, reads, null);
if ( DOWNSAMPLE_BY_COVERAGE )
locus.downsampleToCoverage(downsamplingCoverage);
@ -255,8 +256,8 @@ public class TraverseByLocusWindows extends TraversalEngine {
GenomeLoc loc = locus1.getLocation().merge(locus2.getLocation());
TreeSet<SAMRecord> set = new TreeSet<SAMRecord>(new Comparator<SAMRecord>() {
public int compare(SAMRecord obj1, SAMRecord obj2) {
GenomeLoc myLoc = new GenomeLoc(obj1);
GenomeLoc hisLoc = new GenomeLoc(obj2);
GenomeLoc myLoc = GenomeLocParser.createGenomeLoc(obj1);
GenomeLoc hisLoc = GenomeLocParser.createGenomeLoc(obj2);
int comparison = myLoc.compareTo(hisLoc);
// if the reads have the same start position, we must give a non-zero comparison
// (because java Sets often require "consistency with equals")

View File

@ -1,25 +1,28 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
import java.io.File;
import java.util.*;
import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
/**
*
@ -66,12 +69,12 @@ public class TraverseDuplicates extends TraversalEngine {
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter)
{
GenomeLoc site = new GenomeLoc(read);
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
l.add(read);
for (SAMRecord read2: iter) {
GenomeLoc site2 = new GenomeLoc(read2);
GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2);
// the next read starts too late
if ( site2.getStart() != site.getStart() ) {
@ -105,12 +108,12 @@ public class TraverseDuplicates extends TraversalEngine {
// At this point, there are two possibilities, we have found at least one dup or not
// if it's a dup, add it to the dups list, otherwise add it to the uniques list
if ( key != null ) {
final GenomeLoc keyLoc = new GenomeLoc(key);
final GenomeLoc keyMateLoc = new GenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
final GenomeLoc keyLoc = GenomeLocParser.createGenomeLoc(key);
final GenomeLoc keyMateLoc = GenomeLocParser.createGenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
for ( SAMRecord read : reads ) {
final GenomeLoc readLoc = new GenomeLoc(read);
final GenomeLoc readMateLoc = new GenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
final GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
if (DEBUG) logger.debug(String.format("Examining reads at %s vs. %s at %s / %s vs. %s / %s%n", key.getReadName(), read.getReadName(), keyLoc, keyMateLoc, readLoc, readMateLoc));
// read and key start at the same place, and either the this read and the key
@ -150,7 +153,7 @@ public class TraverseDuplicates extends TraversalEngine {
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(readIter);
for (SAMRecord read: iter) {
// get the genome loc from the read
GenomeLoc site = new GenomeLoc(read);
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
List<SAMRecord> reads = readsAtLoc(read, iter);
Pair<List<SAMRecord>, List<SAMRecord>> split = splitDuplicates(reads);
List<SAMRecord> uniqueReads = split.getFirst();

View File

@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
import java.util.ArrayList;
@ -121,7 +122,7 @@ public class TraverseReads extends TraversalEngine {
if (needsReferenceBasesP && read.getReferenceIndex() >= 0) {
// get the genome loc from the read
GenomeLoc site = new GenomeLoc(read);
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
// Jump forward in the reference to this locus location
locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.playground.gatk;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.cmdline.Option;
@ -11,8 +10,6 @@ import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.gatk.refdata.*;
import java.io.*;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
public class PrepareROD extends CommandLineProgram {
@ -33,7 +30,7 @@ public class PrepareROD extends CommandLineProgram {
// Prepare the sort ordering w.r.t. the sequence dictionary
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
GenomeLoc.setupRefContigOrdering(refFile);
GenomeLocParser.setupRefContigOrdering(refFile);
Class<? extends ReferenceOrderedDatum> rodClass = ReferenceOrderedData.Types.get(ROD_TYPE.toLowerCase()).type;

View File

@ -1,24 +1,20 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileWriter;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.Map;
import java.util.HashMap;
import java.util.Queue;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
import java.util.*;
/**
* User: hanna
* User: hanna
* Date: Jun 10, 2009
* Time: 2:40:19 PM
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
@ -99,7 +95,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
*/
@Override
public Integer map(char[] ref, SAMRecord read) {
GenomeLoc loc = new GenomeLoc(read);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
while( loc.isPast(interval) && intervals.size() > 0 ) {
interval = intervals.remove();

View File

@ -18,10 +18,10 @@ import org.broadinstitute.sting.gatk.refdata.RODIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodRefSeq;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.playground.indels.AlignmentUtils;
import org.broadinstitute.sting.playground.utils.CircularArray;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -82,7 +82,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
int nSams = getToolkit().getArguments().samFiles.size();
location = new GenomeLoc(0,1);
location = GenomeLocParser.createGenomeLoc(0,1);
if ( call_somatic ) {
if ( nSams != 2 ) {

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.AlignmentBlock;
@ -35,8 +36,8 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1;
long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1;
GenomeLoc indelLoc = new GenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
GenomeLoc refLoc = new GenomeLoc(read);
GenomeLoc indelLoc = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
GenomeLoc refLoc = GenomeLocParser.createGenomeLoc(read);
return new Interval(refLoc, indelLoc);
}
@ -81,7 +82,7 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
public Interval merge(Interval i) {
long indelLeftEdge = Math.min(this.indelLoc.getStart(), i.indelLoc.getStart());
long indelRightEdge = Math.max(this.indelLoc.getStop(), i.indelLoc.getStop());
GenomeLoc mergedIndelLoc = new GenomeLoc(this.indelLoc.getContigIndex(), indelLeftEdge, indelRightEdge);
GenomeLoc mergedIndelLoc = GenomeLocParser.createGenomeLoc(this.indelLoc.getContigIndex(), indelLeftEdge, indelRightEdge);
Interval mergedInterval = new Interval(this.readLoc.merge(i.readLoc), mergedIndelLoc);
mergedInterval.indelCount = this.indelCount + i.indelCount;
return mergedInterval;

View File

@ -915,7 +915,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
reads.add(r5);
reads.add(r6);
reads.add(r7);
clean(reads, reference, new GenomeLoc(0,0));
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
}
private void testCleanWithDeletion() {
@ -971,7 +971,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
reads.add(r6);
reads.add(r7);
reads.add(r8);
clean(reads, reference, new GenomeLoc(0,0));
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
}
public static String cigarToString(Cigar cig) {

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.playground.indels;
import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
@ -16,6 +15,7 @@ import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
public class IndelInspectorMain extends CommandLineProgram {
@ -55,10 +55,10 @@ public class IndelInspectorMain extends CommandLineProgram {
System.out.println("No reference sequence dictionary found. Abort.");
}
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
GenomeLoc location = null;
if ( GENOME_LOCATION != null ) {
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
location = GenomeLocParser.parseGenomeLoc(GENOME_LOCATION);
}
if ( COUNT_CUTOFF == null ) COUNT_CUTOFF = 2;
@ -117,8 +117,8 @@ public class IndelInspectorMain extends CommandLineProgram {
cur_contig = r.getReferenceName();
System.out.println("Contig "+cur_contig);
// if contig is specified and we are past that contig, we are done:
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == 1 ) break;
if ( location == null || GenomeLoc.compareContigs(cur_contig, location.getContig()) == 0 ) {
if ( location != null && GenomeLocParser.compareContigs(cur_contig, location.getContig()) == 1 ) break;
if ( location == null || GenomeLocParser.compareContigs(cur_contig, location.getContig()) == 0 ) {
if ( location != null ) System.out.println("Time spent to scroll input bam file to the specified chromosome: "+ ((System.currentTimeMillis()-tc)/1000) + " seconds.");
tc = System.currentTimeMillis();
contig_seq = reference.get(r.getReferenceIndex());
@ -130,7 +130,7 @@ public class IndelInspectorMain extends CommandLineProgram {
}
// if contig is specified and we did not reach it yet, skip the records until we reach that contig:
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
if ( location != null && GenomeLocParser.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
if ( location != null && r.getAlignmentEnd() < location.getStart() ) continue;
@ -327,7 +327,7 @@ public class IndelInspectorMain extends CommandLineProgram {
setDefaultContigOrdering();
return;
}
GenomeLoc.setupRefContigOrdering(h.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(h.getSequenceDictionary());
}
private void setDefaultContigOrdering() {

View File

@ -1,17 +1,10 @@
package org.broadinstitute.sting.utils;
import edu.mit.broad.picard.util.Interval;
import edu.mit.broad.picard.directed.IntervalList;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import java.io.File;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -26,253 +19,49 @@ import java.util.regex.Pattern;
public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
private static Logger logger = Logger.getLogger(GenomeLoc.class);
/**
* the basic components of a genome loc, its contig index,
* start and stop position, and (optionally) the contig name
*/
private int contigIndex;
private long start;
private long stop;
// --------------------------------------------------------------------------------------------------------------
//
// Ugly global variable defining the optional ordering of contig elements
//
// --------------------------------------------------------------------------------------------------------------
//public static Map<String, Integer> refContigOrdering = null;
private static SAMSequenceDictionary contigInfo = null;
public static boolean hasKnownContigOrdering() {
return contigInfo != null;
}
public static SAMSequenceRecord getContigInfo( final String contig ) {
return contigInfo.getSequence(contig);
}
/**
* Returns the contig index of a specified string version of the contig
* @param contig the contig string
* @return the contig index, -1 if not found
*/
public static int getContigIndex( final String contig ) {
if (contigInfo.getSequenceIndex(contig) == -1)
Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
return contigInfo.getSequenceIndex(contig);
}
public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) {
return setupRefContigOrdering(refFile.getSequenceDictionary());
}
public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) {
if (seqDict == null) { // we couldn't load the reference dictionary
logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
Utils.scareUser("Failed to load reference dictionary");
return false;
} else if ( contigInfo == null ){
contigInfo = seqDict;
logger.debug(String.format("Prepared reference sequence contig dictionary"));
for (SAMSequenceRecord contig : seqDict.getSequences() ) {
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
}
}
return true;
}
private String contigName;
static int MAX_CONTIG;
// --------------------------------------------------------------------------------------------------------------
//
// constructors
//
// --------------------------------------------------------------------------------------------------------------
public GenomeLoc( int contigIndex, final long start, final long stop ) {
if(contigInfo == null) { throw new StingException("Contig info has not been setup in the GenomeLoc context yet."); }
if (!isSequenceIndexValid(contigIndex)) {
throw new StingException("Contig info has not been setup in the GenomeLoc context yet.");
}
/*GenomeLoc( int contigIndex, final long start, final long stop ) {
MAX_CONTIG = Integer.MAX_VALUE;
if (start < 0) { throw new StingException("Bad start position " + start);}
if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position
this.contigIndex = contigIndex;
this.start = start;
this.contigName = null; // we just don't know
this.stop = stop == -1 ? start : stop;
}*/
GenomeLoc(final SAMRecord read) {
this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
}
public GenomeLoc(final SAMRecord read) {
this(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) {
this.contigName = contig;
this.contigIndex = contigIndex;
this.start = start;
this.stop = stop;
}
public GenomeLoc( final String contig, final long start, final long stop ) {
this(contigInfo.getSequenceIndex(contig), start, stop);
}
public GenomeLoc( final String contig, final long pos ) {
this(contig, pos, pos);
}
public GenomeLoc( final int contig, final long pos ) {
/*GenomeLoc( final int contig, final long pos ) {
this(contig, pos, pos );
}
public GenomeLoc( final GenomeLoc toCopy ) {
this( toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
}
// --------------------------------------------------------------------------------------------------------------
//
// Parsing string representations
//
// --------------------------------------------------------------------------------------------------------------
private static long parsePosition( final String pos ) {
String x = pos.replaceAll(",", "");
return Long.parseLong(x);
}
/**
* Use this static constructor when the input data is under limited control (i.e. parsing user data).
* @param contig Contig to parse.
* @param start Starting point.
* @param stop Stop point.
* @return The genome location, or a MalformedGenomeLocException if unparseable.
*/
public static GenomeLoc parseGenomeLoc( final String contig, long start, long stop ) {
if( !isContigValid(contig) )
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
return new GenomeLoc(contig,start,stop);
}
public static GenomeLoc parseGenomeLoc( final String str ) {
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
//System.out.printf("Parsing location '%s'%n", str);
final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1
final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2
final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$"); // matches case 3
final Pattern regex4 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)\\+"); // matches case 4
String contig = null;
long start = 1;
long stop = Integer.MAX_VALUE;
boolean bad = false;
Matcher match1 = regex1.matcher(str);
Matcher match2 = regex2.matcher(str);
Matcher match3 = regex3.matcher(str);
Matcher match4 = regex4.matcher(str);
try {
if ( match1.matches() ) {
contig = match1.group(1);
}
else if ( match2.matches() ) {
contig = match2.group(1);
start = parsePosition(match2.group(2));
stop = start;
}
else if ( match4.matches() ) {
contig = match4.group(1);
start = parsePosition(match4.group(2));
}
else if ( match3.matches() ) {
contig = match3.group(1);
start = parsePosition(match3.group(2));
stop = parsePosition(match3.group(3));
if ( start > stop )
bad = true;
}
else {
bad = true;
}
} catch ( Exception e ) {
bad = true;
}
if ( bad ) {
throw new StingException("Invalid Genome Location string: " + str);
}
if ( stop == Integer.MAX_VALUE && hasKnownContigOrdering() ) {
// lookup the actually stop position!
stop = getContigInfo(contig).getSequenceLength();
}
if( !isContigValid(contig) )
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
GenomeLoc loc = parseGenomeLoc(contig,start,stop);
// System.out.printf(" => Parsed location '%s' into %s%n", str, loc);
return loc;
}
/**
* Useful utility function that parses a location string into a coordinate-order sorted
* array of GenomeLoc objects
*
* @param str String representation of genome locs. Null string corresponds to no filter.
* @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
*/
public static List<GenomeLoc> parseGenomeLocs(final String str) {
// Null string means no filter.
if( str == null ) return null;
// Of the form: loc1;loc2;...
// Where each locN can be:
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
try {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for( String loc: str.split(";") )
locs.add( parseGenomeLoc(loc.trim()) );
Collections.sort(locs);
//logger.info(String.format("Going to process %d locations", locs.length));
locs = mergeOverlappingLocations(locs);
logger.debug("Locations are:" + Utils.join(", ", locs));
return locs;
} catch (Exception e) {
e.printStackTrace();
Utils.scareUser(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str));
return null;
}
}
public static List<GenomeLoc> mergeOverlappingLocations(final List<GenomeLoc> raw) {
logger.debug(" Raw locations are:\n" + Utils.join("\n", raw));
if ( raw.size() <= 1 )
return raw;
else {
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
Iterator<GenomeLoc> it = raw.iterator();
GenomeLoc prev = it.next();
while ( it.hasNext() ) {
GenomeLoc curr = it.next();
if ( prev.contiguousP(curr) ) {
prev = prev.merge(curr);
} else {
merged.add(prev);
prev = curr;
}
}
merged.add(prev);
return merged;
}
}
/**
* Move this Genome loc to the next contig, with a start
* and stop of 1.
* @return true if we are not out of contigs, otherwise false if we're
* at the end of the genome (no more contigs to jump to).
*/
public boolean toNextContig() {
if ((contigIndex + 1) < GenomeLoc.contigInfo.size()) {
this.contigIndex++;
this.start = 1;
this.stop = 1;
return true;
}
return false;
*/
GenomeLoc( final GenomeLoc toCopy ) {
this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
}
@ -319,7 +108,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
return returnTrueIfEmpty;
// skip loci before intervals begin
if ( hasKnownContigOrdering() && curr.contigIndex < locs.get(0).contigIndex )
if ( curr.contigIndex < locs.get(0).contigIndex )
return false;
for ( GenomeLoc loc : locs ) {
@ -336,23 +125,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
// Accessors and setters
//
public final String getContig() {
//this.contigIndex != -1;
if (!(contigInfo != null && contigInfo.getSequences() != null)) {
throw new StingException("The contig information or it's sequences are null");
}
if ((this.contigIndex < 0) || (this.contigIndex >= contigInfo.getSequences().size())) {
throw new StingException("The contig index is not bounded by the zero and seqeunce count, contig index: " + contigIndex);
}
if (contigInfo.getSequence(this.contigIndex) == null ||
contigInfo.getSequence(this.contigIndex).getSequenceName() == null) {
throw new StingException("The associated sequence index for contig " + contigIndex + " is null");
}
return contigInfo.getSequence(this.contigIndex).getSequenceName();
//if (contigInfo != null && contigInfo.getSequence(this.contigIndex) != null) {
// return contigInfo.getSequence(this.contigIndex).getSequenceName();
//}
//return null;
return this.contigName;
}
public final int getContigIndex() { return this.contigIndex; }
@ -372,7 +145,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
public final boolean atBeginningOfContigP() { return this.start == 1; }
public void setContig(String contig) {
this.contigIndex = contigInfo.getSequenceIndex(contig);
this.contigName = contig;
}
public void setStart(long start) {
@ -411,7 +184,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
throw new StingException("The two genome loc's need to be contigous");
}
return new GenomeLoc(getContig(),
return new GenomeLoc(getContig(), this.contigIndex,
Math.min(getStart(), that.getStart()),
Math.max( getStop(), that.getStop()) );
}
@ -497,54 +270,11 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
return new GenomeLoc(this);
}
//
// Comparison operations
//
// TODO: get rid of this method because it's sloooooooooooooow
@Deprecated
public static int compareContigs( final String thisContig, final String thatContig )
{
if ( thisContig == thatContig )
{
// Optimization. If the pointers are equal, then the contigs are equal.
return 0;
}
if ( hasKnownContigOrdering() )
{
int thisIndex = getContigIndex(thisContig);
int thatIndex = getContigIndex(thatContig);
if ( thisIndex == -1 )
{
if ( thatIndex == -1 )
{
// Use regular sorted order
return thisContig.compareTo(thatContig);
}
else
{
// this is always bigger if that is in the key set
return 1;
}
}
else if ( thatIndex == -1 )
{
return -1;
}
else
{
if ( thisIndex < thatIndex ) return -1;
if ( thisIndex > thatIndex ) return 1;
return 0;
}
}
else
{
return thisContig.compareTo(thatContig);
}
}
/**
* conpare this genomeLoc's contig to another genome loc
* @param that
* @return
*/
public final int compareContigs( GenomeLoc that ) {
if (this.contigIndex == that.contigIndex)
return 0;
@ -570,64 +300,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
}
/**
* Read a file of genome locations to process.
* regions specified by the location string. The string is of the form:
* Of the form: loc1;loc2;...
* Where each locN can be:
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
*
* @param file_name
*/
public static List<GenomeLoc> IntervalFileToList(final String file_name) {
// first try to read it as an interval file since that's well structured
// we'll fail quickly if it's not a valid file. Then try to parse it as
// a location string file
List<GenomeLoc> ret = null;
try {
IntervalList il = IntervalList.fromFile(new File(file_name));
// iterate through the list of merged intervals and add then as GenomeLocs
ret = new ArrayList<GenomeLoc>();
for(Interval interval : il.getUniqueIntervals()) {
ret.add(new GenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd()));
}
return ret;
} catch (Exception e) {
try {
xReadLines reader = new xReadLines(new File(file_name));
List<String> lines = reader.readLines();
reader.close();
String locStr = Utils.join(";", lines);
logger.debug("locStr: " + locStr);
ret = parseGenomeLocs(locStr);
return ret;
} catch (Exception e2) {
logger.error("Attempt to parse interval file in GATK format failed: "+e2.getMessage());
e2.printStackTrace();
throw new StingException("Unable to parse out interval file in either format", e);
}
}
}
/**
* Determines whether the given contig is valid with respect to the sequence dictionary
* already installed in the GenomeLoc.
* @return True if the contig is valid. False otherwise.
*/
private static boolean isContigValid( String contig ) {
int contigIndex = contigInfo.getSequenceIndex(contig);
return isSequenceIndexValid(contigIndex);
}
/**
* Determines whether the given sequence index is valid with respect to the sequence dictionary.
* @param sequenceIndex sequence index
* @return True if the sequence index is valid, false otherwise.
*/
private static boolean isSequenceIndexValid( int sequenceIndex ) {
return sequenceIndex >= 0 && sequenceIndex < contigInfo.size();
}
}

View File

@ -0,0 +1,484 @@
package org.broadinstitute.sting.utils;
import edu.mit.broad.picard.directed.IntervalList;
import edu.mit.broad.picard.util.Interval;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import java.io.File;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Created by IntelliJ IDEA.
* User: aaronmckenna
* Date: Jun 18, 2009
* Time: 11:17:01 PM
* To change this template use File | Settings | File Templates.
*/
public class GenomeLocParser {
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
// --------------------------------------------------------------------------------------------------------------
//
// Ugly global variable defining the optional ordering of contig elements
//
// --------------------------------------------------------------------------------------------------------------
//public static Map<String, Integer> refContigOrdering = null;
private static SAMSequenceDictionary contigInfo = null;
/**
* do we have a contig ordering setup?
*
* @return true if the contig order is setup
*/
public static boolean hasKnownContigOrdering() {
return contigInfo != null;
}
/**
* get the contig's SAMSequenceRecord
*
* @param contig the string name of the contig
*
* @return the sam sequence record
*/
public static SAMSequenceRecord getContigInfo(final String contig) {
return contigInfo.getSequence(contig);
}
/**
* Returns the contig index of a specified string version of the contig
*
* @param contig the contig string
*
* @return the contig index, -1 if not found
*/
public static int getContigIndex(final String contig) {
if (contigInfo.getSequenceIndex(contig) == -1)
Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
return contigInfo.getSequenceIndex(contig);
}
/**
* set our internal reference contig order
*
* @param refFile the reference file
*
* @return true if we were successful
*/
public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) {
return setupRefContigOrdering(refFile.getSequenceDictionary());
}
/**
* setup our internal reference contig order
*
* @param seqDict the sequence dictionary
*
* @return true if we were successful
*/
public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) {
if (seqDict == null) { // we couldn't load the reference dictionary
logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
Utils.scareUser("Failed to load reference dictionary");
return false;
} else if (contigInfo == null) {
contigInfo = seqDict;
logger.debug(String.format("Prepared reference sequence contig dictionary"));
for (SAMSequenceRecord contig : seqDict.getSequences()) {
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
}
}
GenomeLoc.MAX_CONTIG = contigInfo.getSequences().size();
return true;
}
/**
* parse a genome location, from a location string
*
* @param str the string to parse
*
* @return a GenomeLoc representing the String
*/
public static GenomeLoc parseGenomeLoc(final String str) {
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
//System.out.printf("Parsing location '%s'%n", str);
final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1
final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2
final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$"); // matches case 3
final Pattern regex4 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)\\+"); // matches case 4
String contig = null;
long start = 1;
long stop = Integer.MAX_VALUE;
boolean bad = false;
Matcher match1 = regex1.matcher(str);
Matcher match2 = regex2.matcher(str);
Matcher match3 = regex3.matcher(str);
Matcher match4 = regex4.matcher(str);
try {
if (match1.matches()) {
contig = match1.group(1);
} else if (match2.matches()) {
contig = match2.group(1);
start = parsePosition(match2.group(2));
stop = start;
} else if (match4.matches()) {
contig = match4.group(1);
start = parsePosition(match4.group(2));
} else if (match3.matches()) {
contig = match3.group(1);
start = parsePosition(match3.group(2));
stop = parsePosition(match3.group(3));
if (start > stop)
bad = true;
} else {
bad = true;
}
} catch (Exception e) {
bad = true;
}
if (bad) {
throw new StingException("Invalid Genome Location string: " + str);
}
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering()) {
// lookup the actually stop position!
stop = getContigInfo(contig).getSequenceLength();
}
if (!isContigValid(contig))
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
GenomeLoc loc = parseGenomeLoc(contig, start, stop);
// System.out.printf(" => Parsed location '%s' into %s%n", str, loc);
return loc;
}
/**
* Useful utility function that parses a location string into a coordinate-order sorted
* array of GenomeLoc objects
*
* @param str String representation of genome locs. Null string corresponds to no filter.
*
* @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
*/
public static List<GenomeLoc> parseGenomeLocs(final String str) {
// Null string means no filter.
if (str == null) return null;
// Of the form: loc1;loc2;...
// Where each locN can be:
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
try {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for (String loc : str.split(";"))
locs.add(parseGenomeLoc(loc.trim()));
Collections.sort(locs);
//logger.info(String.format("Going to process %d locations", locs.length));
locs = mergeOverlappingLocations(locs);
logger.debug("Locations are:" + Utils.join(", ", locs));
return locs;
} catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str),e);
}
}
// --------------------------------------------------------------------------------------------------------------
//
// Parsing string representations
//
// --------------------------------------------------------------------------------------------------------------
private static long parsePosition(final String pos) {
String x = pos.replaceAll(",", "");
return Long.parseLong(x);
}
/**
* merge a list of genome locs that may be overlapping, returning the list of unique genomic locations
*
* @param raw the unchecked genome loc list
*
* @return the list of merged locations
*/
public static List<GenomeLoc> mergeOverlappingLocations(final List<GenomeLoc> raw) {
logger.debug(" Raw locations are:\n" + Utils.join("\n", raw));
if (raw.size() <= 1)
return raw;
else {
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
Iterator<GenomeLoc> it = raw.iterator();
GenomeLoc prev = it.next();
while (it.hasNext()) {
GenomeLoc curr = it.next();
if (prev.contiguousP(curr)) {
prev = prev.merge(curr);
} else {
merged.add(prev);
prev = curr;
}
}
merged.add(prev);
return merged;
}
}
/**
* Determines whether the given contig is valid with respect to the sequence dictionary
* already installed in the GenomeLoc.
*
* @return True if the contig is valid. False otherwise.
*/
private static boolean isContigValid(String contig) {
int contigIndex = contigInfo.getSequenceIndex(contig);
return isSequenceIndexValid(contigIndex);
}
/**
* Determines whether the given sequence index is valid with respect to the sequence dictionary.
*
* @param sequenceIndex sequence index
*
* @return True if the sequence index is valid, false otherwise.
*/
private static boolean isSequenceIndexValid(int sequenceIndex) {
return sequenceIndex >= 0 && sequenceIndex < contigInfo.size();
}
/**
* Use this static constructor when the input data is under limited control (i.e. parsing user data).
*
* @param contig Contig to parse.
* @param start Starting point.
* @param stop Stop point.
*
* @return The genome location, or a MalformedGenomeLocException if unparseable.
*/
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
if (!isContigValid(contig))
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
return new GenomeLoc(contig, getContigIndex(contig), start, stop);
}
/**
* Read a file of genome locations to process.
* regions specified by the location string. The string is of the form:
* Of the form: loc1;loc2;...
* Where each locN can be:
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
*
* @param file_name
*/
public static List<GenomeLoc> intervalFileToList(final String file_name) {
/**
* first try to read it as an interval file since that's well structured
* we'll fail quickly if it's not a valid file. Then try to parse it as
* a location string file
*/
List<GenomeLoc> ret = null;
try {
IntervalList il = IntervalList.fromFile(new File(file_name));
// iterate through the list of merged intervals and add then as GenomeLocs
ret = new ArrayList<GenomeLoc>();
for (Interval interval : il.getUniqueIntervals()) {
ret.add(new GenomeLoc(interval.getSequence(), getContigIndex(interval.getSequence()), interval.getStart(), interval.getEnd()));
}
return ret;
} catch (Exception e) {
try {
xReadLines reader = new xReadLines(new File(file_name));
List<String> lines = reader.readLines();
reader.close();
String locStr = Utils.join(";", lines);
logger.debug("locStr: " + locStr);
ret = parseGenomeLocs(locStr);
return ret;
} catch (Exception e2) {
logger.error("Attempt to parse interval file in GATK format failed: " + e2.getMessage());
e2.printStackTrace();
throw new StingException("Unable to parse out interval file in either format", e);
}
}
}
/**
* get the sequence name from a sequence index
*
* @param contigIndex get the contig index
*
* @return the string that represents that contig name
*/
private static String getSequenceNameFromIndex(int contigIndex) {
return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName();
}
/**
* create a genome loc, given the contig name, start, and stop
*
* @param contig the contig name
* @param start the starting position
* @param stop the stop position
*
* @return a new genome loc
*/
public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) {
checkSetup();
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), start, stop));
}
/**
* create a genome loc, given the contig index, start, and stop
*
* @param contigIndex the contig index
* @param start the start position
* @param stop the stop position
*
* @return a new genome loc
*/
public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) {
checkSetup();
if (start < 0) {
throw new StingException("Bad start position " + start);
}
if (stop < -1) {
throw new StingException("Bad stop position " + stop);
} // a negative -1 indicates it's not a meaningful end position
return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop);
}
/**
* create a genome loc, given a read
*
* @param read
*
* @return
*/
public static GenomeLoc createGenomeLoc(final SAMRecord read) {
checkSetup();
return verifyGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
}
/**
* create a new genome loc, given the contig position, and a single position
*
* @param contig the contig name
* @param pos the postion
*
* @return a genome loc representing a single base at the specified postion on the contig
*/
public static GenomeLoc createGenomeLoc(final int contig, final long pos) {
checkSetup();
return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos));
}
/**
* create a new genome loc, given the contig name, and a single position
*
* @param contig the contig name
* @param pos the postion
*
* @return a genome loc representing a single base at the specified postion on the contig
*/
public static GenomeLoc createGenomeLoc(final String contig, final long pos) {
checkSetup();
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), pos, pos));
}
public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) {
checkSetup();
return verifyGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop()));
}
/**
* verify the specified genome loc is valid, if it's not, throw an exception
*
* @param toReturn teh genome loc we're about to return
*
* @return the genome loc if it's valid, otherwise we throw an exception
*/
private static GenomeLoc verifyGenomeLoc(GenomeLoc toReturn) {
// conditions to fail on - we currently use a start of zero to indicate infinite read count, so don't check for that
//if ((toReturn.getStop() < toReturn.getStart())) {
// throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is after the stop (Start = " + toReturn.getStart() + " stop = " + toReturn.getStop() + ")");
//}
if (toReturn.getStart() < 0) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
}
if (toReturn.getStop() < 0) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the stop position is less than 0");
}
if (toReturn.getContigIndex() < 0) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the contig index is less than 0");
}
if (toReturn.getContigIndex() >= contigInfo.getSequences().size()) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the contig index is greater then the stored sequence count");
}
return toReturn;
}
/**
* Move this Genome loc to the next contig, with a start
* and stop of 1.
*
* @return true if we are not out of contigs, otherwise false if we're
* at the end of the genome (no more contigs to jump to).
*/
public static GenomeLoc toNextContig(GenomeLoc current) {
if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) {
return null;
} else
return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1));
}
/**
* check to make sure that we've setup the contig information
*/
private static void checkSetup() {
if (contigInfo == null) {
throw new StingException("The GenomeLocParser hasn't been setup with a contig sequence yet");
}
}
/**
* compare two contig names, in the current context
*
* @param firstContig
* @param secondContig
*
* @return
*/
public static int compareContigs(String firstContig, String secondContig) {
checkSetup();
Integer ref1 = GenomeLocParser.getContigIndex(firstContig);
Integer ref2 = GenomeLocParser.getContigIndex(secondContig);
return ref1.compareTo(ref2);
}
}

View File

@ -168,8 +168,8 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
* |------| + |--------|
*
*/
GenomeLoc before = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
GenomeLoc after = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
int index = mArray.indexOf(g);
if (after.getStop() - after.getStart() > 0) {
mArray.add(index, after);
@ -207,9 +207,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
*/
if (e.getStart() < g.getStart()) {
l = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
l = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
} else {
l = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
l = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
}
// replace g with the new region
mArray.set(mArray.indexOf(g), l);
@ -230,7 +230,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
for (SAMSequenceRecord record : dict.getSequences()) {
returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
}
return returnSortedSet;
}
@ -258,7 +258,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
GenomeLocSortedSet ret = new GenomeLocSortedSet();
for (GenomeLoc loc : this.mArray) {
// ensure a deep copy
ret.mArray.add(new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
}
return ret;
}

View File

@ -7,8 +7,6 @@ import net.sf.samtools.util.CloseableIterator;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import java.io.File;
import java.util.Collections;
import java.util.List;
import java.util.Arrays;
import java.util.Iterator;
@ -16,8 +14,7 @@ import java.util.ArrayList;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLocParser;
/**
* User: hanna
* Date: Jun 11, 2009
@ -54,11 +51,11 @@ public class ArtificialSAMFileReader extends SAMFileReader {
*/
@Override
public CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
GenomeLoc region = new GenomeLoc(sequence, start, end);
GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end);
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
for( SAMRecord read: reads ) {
GenomeLoc readPosition = new GenomeLoc(read);
GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read);
if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.junit.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.LocusContext;
import net.sf.samtools.SAMRecord;
@ -40,13 +41,13 @@ public class AllLocusViewTest extends LocusViewTemplate {
AllLocusView allLocusView = (AllLocusView)view;
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
GenomeLoc site = new GenomeLoc("chr1",i);
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
LocusContext locusContext = allLocusView.next();
Assert.assertEquals("Locus context location is incorrect", site, locusContext.getLocation() );
int expectedReadsAtSite = 0;
for( SAMRecord read: reads ) {
if(new GenomeLoc(read).containsP(locusContext.getLocation())) {
if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) {
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
expectedReadsAtSite++;
}

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.junit.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.LocusContext;
import net.sf.samtools.SAMRecord;
@ -43,11 +44,11 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
GenomeLoc site = new GenomeLoc("chr1",i);
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
int expectedReadsAtSite = 0;
for( SAMRecord read: reads ) {
if( new GenomeLoc(read).containsP(site) )
if( GenomeLocParser.createGenomeLoc(read).containsP(site) )
expectedReadsAtSite++;
}
@ -61,7 +62,7 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
for( SAMRecord read: reads ) {
if(new GenomeLoc(read).containsP(locusContext.getLocation()))
if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation()))
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
}
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
import org.junit.Test;
import org.junit.Assert;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
@ -32,7 +33,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
*/
@Test(expected=InvalidPositionException.class)
public void testSingleBPFailure() {
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
Shard shard = new LocusShard( GenomeLocParser.createGenomeLoc(0,1,50) );
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
@ -45,12 +46,12 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
*/
@Test(expected=InvalidPositionException.class)
public void testBoundsFailure() {
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0,1,50) );
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
LocusReferenceView view = new LocusReferenceView(dataProvider);
view.getReferenceBase(new GenomeLoc(0,51));
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0,51));
}

View File

@ -1,31 +1,21 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.junit.BeforeClass;
import org.junit.Test;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
import org.broadinstitute.sting.gatk.Reads;
import java.io.FileNotFoundException;
import java.io.File;
import java.util.Collections;
import java.util.List;
import java.util.Iterator;
import java.util.ArrayList;
import java.util.Arrays;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import java.io.FileNotFoundException;
import java.util.*;
/**
* User: hanna
* Date: May 13, 2009
@ -49,14 +39,14 @@ public abstract class LocusViewTemplate extends BaseTest {
@BeforeClass
public static void setupGenomeLoc() throws FileNotFoundException {
sequenceSourceFile = fakeReferenceSequenceFile();
GenomeLoc.setupRefContigOrdering(sequenceSourceFile);
GenomeLocParser.setupRefContigOrdering(sequenceSourceFile);
}
@Test
public void emptyLocusContextTest() {
SAMRecordIterator iterator = new SAMRecordIterator();
GenomeLoc shardBounds = new GenomeLoc("chr1",1,5);
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5);
Shard shard = new LocusShard(shardBounds);
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
@ -70,7 +60,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read = buildSAMRecord("chr1",1,5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
GenomeLoc shardBounds = new GenomeLoc("chr1",1,5);
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5);
Shard shard = new LocusShard(shardBounds);
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
@ -84,7 +74,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read = buildSAMRecord("chr1",1,5);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -96,7 +86,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -108,7 +98,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read = buildSAMRecord("chr1",3,7);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -120,7 +110,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read = buildSAMRecord("chr1",1,10);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -132,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read = buildSAMRecord("chr1",6,15);
SAMRecordIterator iterator = new SAMRecordIterator(read);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -145,7 +135,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read2 = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -162,7 +152,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read4 = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -179,7 +169,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read4 = buildSAMRecord("chr1",5,9);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -198,7 +188,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecord read6 = buildSAMRecord("chr1",6,10);
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4,read5,read6);
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );
@ -224,7 +214,7 @@ public abstract class LocusViewTemplate extends BaseTest {
SAMRecordIterator iterator = new SAMRecordIterator(read01,read02,read03,read04,read05,read06,
read07,read08,read09,read10,read11,read12);
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15));
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
LocusView view = createView( dataProvider );

View File

@ -4,7 +4,7 @@ import org.junit.Test;
import org.junit.BeforeClass;
import org.junit.Assert;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -44,7 +44,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
public static void init() throws FileNotFoundException {
// sequence
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLoc.setupRefContigOrdering(seq);
GenomeLocParser.setupRefContigOrdering(seq);
}
/**
@ -52,11 +52,11 @@ public class ReferenceOrderedViewTest extends BaseTest {
*/
@Test
public void testNoBindings() {
Shard shard = new LocusShard(new GenomeLoc("chrM",1,30));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30));
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
ReferenceOrderedView view = new ReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",10));
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10));
Assert.assertNull("The tracker should not have produced any data", tracker.lookup("tableTest",null));
}
@ -69,12 +69,12 @@ public class ReferenceOrderedViewTest extends BaseTest {
ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod);
Shard shard = new LocusShard(new GenomeLoc("chrM",1,30));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30));
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.singletonList(dataSource));
ReferenceOrderedView view = new ReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",20));
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
TabularROD datum = (TabularROD)tracker.lookup("tableTest",null);
Assert.assertEquals("datum parameter for COL1 is incorrect", "C", datum.get("COL1"));
@ -95,12 +95,12 @@ public class ReferenceOrderedViewTest extends BaseTest {
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2);
Shard shard = new LocusShard(new GenomeLoc("chrM",1,30));
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30));
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Arrays.asList(dataSource1,dataSource2));
ReferenceOrderedView view = new ReferenceOrderedView( provider );
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",20));
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
TabularROD datum1 = (TabularROD)tracker.lookup("tableTest1",null);
Assert.assertEquals("datum1 parameter for COL1 is incorrect", "C", datum1.get("COL1"));

View File

@ -1,15 +1,15 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.FileNotFoundException;
import java.io.File;
import net.sf.samtools.SAMSequenceRecord;
import java.io.FileNotFoundException;
/**
* User: hanna
* Date: May 27, 2009
@ -43,7 +43,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
@BeforeClass
public static void initialize() throws FileNotFoundException {
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
GenomeLoc.setupRefContigOrdering(sequenceFile);
GenomeLocParser.setupRefContigOrdering(sequenceFile);
}
/**
@ -51,7 +51,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
*/
@Test
public void testReferenceStart() {
validateLocation( new GenomeLoc(0,1,25) );
validateLocation( GenomeLocParser.createGenomeLoc(0,1,25) );
}
/**
@ -63,7 +63,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1);
final long contigStart = selectedContig.getSequenceLength() - 24;
final long contigStop = selectedContig.getSequenceLength();
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
}
/**
@ -74,7 +74,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
// Test the last 25 bases of the first contig.
int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2;
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),1,25) );
validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),1,25) );
}
@ -88,7 +88,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
final long contigStart = selectedContig.getSequenceLength() - 24;
final long contigStop = selectedContig.getSequenceLength();
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
}
protected abstract void validateLocation( GenomeLoc loc );

View File

@ -7,6 +7,7 @@ import static org.junit.Assert.assertTrue;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.BaseTest;
import net.sf.samtools.SAMFileHeader;
@ -60,7 +61,7 @@ public class IntervalShardStrategyTest extends BaseTest {
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
mSortedSet = new GenomeLocSortedSet();
}
@ -71,7 +72,7 @@ public class IntervalShardStrategyTest extends BaseTest {
@Test
public void testSingleChromosomeFunctionality() {
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000);
mSortedSet.add(loc);
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
int counter = 0;
@ -87,7 +88,7 @@ public class IntervalShardStrategyTest extends BaseTest {
@Test
public void testMultipleChromosomeFunctionality() {
for (int x = 0; x < 5; x++) {
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
mSortedSet.add(loc);
}
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
@ -104,7 +105,7 @@ public class IntervalShardStrategyTest extends BaseTest {
@Test
public void testOddSizeShardFunctionality() {
for (int x = 0; x < 5; x++) {
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
mSortedSet.add(loc);
}
IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet);
@ -122,7 +123,7 @@ public class IntervalShardStrategyTest extends BaseTest {
@Test
public void testInfiniteShardSize() {
for (int x = 0; x < 5; x++) {
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
mSortedSet.add(loc);
}
IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet);
@ -137,7 +138,7 @@ public class IntervalShardStrategyTest extends BaseTest {
@Test(expected = UnsupportedOperationException.class)
public void testRemove() {
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000);
mSortedSet.add(loc);
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
strat.remove();

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Before;
import org.junit.Test;
@ -51,20 +52,20 @@ public class IntervalShardTest extends BaseTest {
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Test
public void simpleReturn() {
GenomeLoc loc = new GenomeLoc(1, 1, 100);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
intervalShard = new IntervalShard(loc);
assertTrue(intervalShard.getGenomeLoc().equals(loc));
}
@Test
public void ensureNotReference() {
GenomeLoc loc = new GenomeLoc(1, 1, 100);
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
intervalShard = new IntervalShard(loc);
assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
}

View File

@ -1,7 +1,7 @@
package org.broadinstitute.sting.gatk.datasources.shards;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.BaseTest;
import org.junit.Before;
@ -52,7 +52,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
}
@Test

View File

@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.*;
import static org.junit.Assert.assertTrue;
@ -42,7 +43,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
set = new GenomeLocSortedSet();
}
@ -54,7 +55,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
@Test
public void testReadInterval() {
GenomeLoc l = new GenomeLoc(0,1,100);
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
assertTrue(st instanceof IntervalShardStrategy);
@ -74,7 +75,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
@Test
public void testExpInterval() {
GenomeLoc l = new GenomeLoc(0,1,100);
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set);
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
@ -82,7 +83,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
@Test
public void testLinearInterval() {
GenomeLoc l = new GenomeLoc(0,1,100);
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
set.add(l);
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100,set);
assertTrue(st instanceof LinearLocusShardStrategy);

View File

@ -8,6 +8,7 @@ import static org.junit.Assert.assertTrue;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.TabularROD;
@ -38,13 +39,13 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
private ReferenceOrderedData<? extends ReferenceOrderedDatum> rod = null;
private final GenomeLoc testSite1 = new GenomeLoc("chrM",10);
private final GenomeLoc testSite2 = new GenomeLoc("chrM",20);
private final GenomeLoc testSite3 = new GenomeLoc("chrM",30);
private final GenomeLoc testSite1 = GenomeLocParser.createGenomeLoc("chrM",10);
private final GenomeLoc testSite2 = GenomeLocParser.createGenomeLoc("chrM",20);
private final GenomeLoc testSite3 = GenomeLocParser.createGenomeLoc("chrM",30);
@BeforeClass
public static void init() throws FileNotFoundException {
GenomeLoc.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile));
GenomeLocParser.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile));
TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX);
}

View File

@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.junit.After;
import org.junit.Before;
@ -60,7 +60,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
// sequence
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
}
/**

View File

@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
import org.broadinstitute.sting.gatk.iterators.*;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
import static org.junit.Assert.assertEquals;
@ -76,7 +77,7 @@ public class SAMByReadsTest extends BaseTest {
@Test
public void testToUnmappedReads() {
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
try {
int unmappedReadsSeen = 0;
int iterations = 0;
@ -109,7 +110,7 @@ public class SAMByReadsTest extends BaseTest {
@Test
public void testShardingOfReadsSize14() {
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
targetReadCount = 14;
try {
int iterations = 0;
@ -159,7 +160,7 @@ public class SAMByReadsTest extends BaseTest {
@Test
public void testShardingOfReadsSize25() {
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
targetReadCount = 25;
try {
int iterations = 0;

View File

@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SimpleDataSourceLoadException;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
@ -73,7 +73,7 @@ public class BoundedReadIteratorTest extends BaseTest {
// sequence
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
}

View File

@ -8,15 +8,13 @@ import org.junit.*;
import static org.junit.Assert.assertTrue;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.broadinstitute.sting.utils.RefHanger;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.io.File;
import java.io.PrintStream;
import java.io.FileOutputStream;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.List;
import java.util.ArrayList;
/**
@ -33,7 +31,7 @@ public class TabularRODTest extends BaseTest {
public static void init() {
// sequence
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLoc.setupRefContigOrdering(seq);
GenomeLocParser.setupRefContigOrdering(seq);
}
@Before
@ -50,7 +48,7 @@ public class TabularRODTest extends BaseTest {
logger.warn("Executing test1");
TabularROD one = (TabularROD)iter.next();
assertTrue(one.size() == 4);
assertTrue(one.getLocation().equals(new GenomeLoc("chrM", 10)));
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one.get("COL1").equals("A"));
assertTrue(one.get("COL2").equals("B"));
assertTrue(one.get("COL3").equals("C"));
@ -62,7 +60,7 @@ public class TabularRODTest extends BaseTest {
TabularROD one = (TabularROD)iter.next();
TabularROD two = (TabularROD)iter.next();
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 20)));
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
assertTrue(two.get("COL1").equals("C"));
assertTrue(two.get("COL2").equals("D"));
assertTrue(two.get("COL3").equals("E"));
@ -75,7 +73,7 @@ public class TabularRODTest extends BaseTest {
TabularROD two = (TabularROD)iter.next();
TabularROD three = (TabularROD)iter.next();
assertTrue(three.size() == 4);
assertTrue(three.getLocation().equals(new GenomeLoc("chrM", 30)));
assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30)));
assertTrue(three.get("COL1").equals("F"));
assertTrue(three.get("COL2").equals("G"));
assertTrue(three.get("COL3").equals("H"));
@ -93,9 +91,9 @@ public class TabularRODTest extends BaseTest {
@Test
public void testSeek() {
logger.warn("Executing testSeek");
TabularROD two = (TabularROD)iter.seekForward(new GenomeLoc("chrM", 20));
TabularROD two = (TabularROD)iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20));
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 20)));
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
assertTrue(two.get("COL1").equals("C"));
assertTrue(two.get("COL2").equals("D"));
assertTrue(two.get("COL3").equals("E"));
@ -118,7 +116,7 @@ public class TabularRODTest extends BaseTest {
logger.warn("Executing testDelim1");
TabularROD one2 = (TabularROD)iter_commas.next();
assertTrue(one2.size() == 5);
assertTrue(one2.getLocation().equals(new GenomeLoc("chrM", 10)));
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one2.get("COL1").equals("A"));
assertTrue(one2.get("COL2").equals("B"));
assertTrue(one2.get("COL3").equals("C"));
@ -135,7 +133,7 @@ public class TabularRODTest extends BaseTest {
logger.warn("Executing testDelim1");
TabularROD one2 = (TabularROD)iter_commas.next();
assertTrue(one2.size() == 5);
assertTrue(one2.getLocation().equals(new GenomeLoc("chrM", 10)));
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
assertTrue(one2.get("COL1").equals("A"));
assertTrue(one2.get("COL2").equals("B"));
assertTrue(one2.get("COL3").equals("C"));
@ -148,7 +146,7 @@ public class TabularRODTest extends BaseTest {
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
String rowData = String.format("%d %d %d", 1, 2, 3);
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
System.out.println(">>>>> " + row.toString());
assertTrue(row.toString().equals("chrM:1\t1\t2\t3"));
}
@ -166,11 +164,11 @@ public class TabularRODTest extends BaseTest {
out.println(TabularROD.headerString(header));
String rowData = String.format("%d %d %d", 1, 2, 3);
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
out.println(row.toString());
rowData = String.format("%d %d %d", 3, 4, 5);
row = new TabularROD("myName", header, new GenomeLoc("chrM", 2), rowData.split(" "));
row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 2), rowData.split(" "));
out.println(row.toString());
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class);
@ -178,14 +176,14 @@ public class TabularRODTest extends BaseTest {
TabularROD one = (TabularROD)iter_commas.next();
assertTrue(one.size() == 4);
assertTrue(one.getLocation().equals(new GenomeLoc("chrM", 1)));
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1)));
assertTrue(one.get("col1").equals("1"));
assertTrue(one.get("col2").equals("2"));
assertTrue(one.get("col3").equals("3"));
TabularROD two = (TabularROD)iter_commas.next();
assertTrue(two.size() == 4);
assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 2)));
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2)));
assertTrue(two.get("col1").equals("3"));
assertTrue(two.get("col2").equals("4"));
assertTrue(two.get("col3").equals("5"));
@ -195,14 +193,14 @@ public class TabularRODTest extends BaseTest {
public void testBadHeader1() {
logger.warn("Executing testBadHeader1");
ArrayList<String> header = new ArrayList<String>();
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1));
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1));
}
@Test (expected=RuntimeException.class )
public void testBadHeader2() {
logger.warn("Executing testBadHeader2");
ArrayList<String> header = new ArrayList<String>(Arrays.asList("col1", "col2", "col3"));
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1));
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1));
}
@Test (expected=RuntimeException.class )
@ -211,6 +209,6 @@ public class TabularRODTest extends BaseTest {
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
String rowData = String.format("%d %d %d %d", 1, 2, 3, 4);
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.traversals;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
@ -10,8 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import static org.junit.Assert.fail;
@ -116,7 +116,7 @@ public class TraverseReadsTest extends BaseTest {
catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
GenomeLoc.setupRefContigOrdering(ref);
GenomeLocParser.setupRefContigOrdering(ref);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
ref.getSequenceDictionary(),
@ -148,7 +148,7 @@ public class TraverseReadsTest extends BaseTest {
fail("Count read walker should return an interger.");
}
if (((Integer) accumulator) != 9721) {
fail("there should be 9721 mapped reads in the index file");
fail("there should be 9721 mapped reads in the index file, there was " + ((Integer) accumulator) );
}
}
@ -156,7 +156,6 @@ public class TraverseReadsTest extends BaseTest {
/** Test out that we can shard the file and iterate over every read */
@Test
public void testUnmappedReadCount() {
IndexedFastaSequenceFile ref = null;
try {
ref = new IndexedFastaSequenceFile(refFile);
@ -164,7 +163,7 @@ public class TraverseReadsTest extends BaseTest {
catch (FileNotFoundException ex) {
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
}
GenomeLoc.setupRefContigOrdering(ref);
GenomeLocParser.setupRefContigOrdering(ref);
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
ref.getSequenceDictionary(),
@ -195,7 +194,7 @@ public class TraverseReadsTest extends BaseTest {
fail("Count read walker should return an interger.");
}
if (((Integer) accumulator) != 10000) {
fail("there should be 10000 mapped reads in the index file");
fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator));
}
}

View File

@ -3,8 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.indels;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.OutputTracker;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileReader;
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -47,7 +47,7 @@ public class CleanedReadInjectorTest extends BaseTest {
@BeforeClass
public static void initialize() throws FileNotFoundException {
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
GenomeLoc.setupRefContigOrdering(sequenceFile);
GenomeLocParser.setupRefContigOrdering(sequenceFile);
}
@Test

View File

@ -0,0 +1,126 @@
package org.broadinstitute.sting.utils;
import static junit.framework.Assert.assertTrue;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.junit.Assert.assertEquals;
import org.junit.Test;
/**
* @author aaron
* <p/>
* Class GenomeLocParserTest
* <p/>
* Test out the functionality of the new genome loc parser
*/
public class GenomeLocParserTest extends BaseTest {
@Test(expected = StingException.class)
public void testUnsetupException() {
GenomeLocParser.createGenomeLoc(0, 0, 0);
}
@Test
public void testKnownContigOrder() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
// assert that it's false when the contig ordering is not setup
assertTrue(!GenomeLocParser.hasKnownContigOrdering());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
// assert that it's true when it is setup
assertTrue(GenomeLocParser.hasKnownContigOrdering());
}
@Test(expected = RuntimeException.class)
public void testGetContigIndex() {
assertEquals(-1, GenomeLocParser.getContigIndex("blah")); // should be in the reference
}
@Test
public void testGetContigIndexValid() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
assertEquals(0, GenomeLocParser.getContigIndex("chr1")); // should be in the reference
}
@Test
public void testGetContigInfoUnknownContig() {
assertEquals(null, GenomeLocParser.getContigInfo("blah")); // should be in the reference
}
@Test
public void testGetContigInfoKnownContig() {
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
assertEquals("chr1".compareTo(GenomeLocParser.getContigInfo("chr1").getSequenceName()), 0); // should be in the reference
}
@Test(expected = StingException.class)
public void testParseBadString() {
GenomeLocParser.parseGenomeLoc("Bad:0-1");
}
@Test
public void testParseGoodString() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-100");
assertEquals(loc.getContigIndex(), 0);
assertEquals(100, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test(expected = RuntimeException.class)
public void testParseBadLocations() {
GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0");
}
@Test
public void testParseGoodLocations() {
GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9");
}
@Test(expected = RuntimeException.class)
public void testParseGoodLocationsTooManySemiColons() {
GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;");
}
@Test
public void testCreateGenomeLoc1() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 100);
assertEquals(loc.getContigIndex(), 0);
assertEquals(100, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test
public void testCreateGenomeLoc2() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100);
assertEquals(loc.getContigIndex(), 0);
assertEquals(100, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test
public void testCreateGenomeLoc3() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1);
assertEquals(loc.getContigIndex(), 0);
assertEquals(1, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test
public void testCreateGenomeLoc4() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1);
assertEquals(loc.getContigIndex(), 0);
assertEquals(1, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test
public void testCreateGenomeLoc5() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,100);
GenomeLoc copy = GenomeLocParser.createGenomeLoc(loc);
assertEquals(copy.getContigIndex(), 0);
assertEquals(100, copy.getStop());
assertEquals(1, copy .getStart());
}
}

View File

@ -44,13 +44,13 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Before
public void setup() {
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
mSortedSet = new GenomeLocSortedSet();
}
@Test
public void testAdd() {
GenomeLoc g = new GenomeLoc(1, 0, 0);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);
assertTrue(mSortedSet.size() == 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
@ -59,7 +59,7 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void testRemove() {
assertTrue(mSortedSet.size() == 0);
GenomeLoc g = new GenomeLoc(1, 0, 0);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
mSortedSet.remove(g);
@ -69,9 +69,9 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void addRegion() {
assertTrue(mSortedSet.size() == 0);
GenomeLoc g = new GenomeLoc(1, 1, 50);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 1, 50);
mSortedSet.add(g);
GenomeLoc f = new GenomeLoc(1, 30, 80);
GenomeLoc f = GenomeLocParser.createGenomeLoc(1, 30, 80);
mSortedSet.addRegion(f);
assertTrue(mSortedSet.size() == 1);
@ -81,7 +81,7 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test(expected = IllegalArgumentException.class)
public void testAddDupplicate() {
assertTrue(mSortedSet.size() == 0);
GenomeLoc g = new GenomeLoc(1, 0, 0);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
mSortedSet.add(g);
@ -89,8 +89,8 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void mergingOverlappingBelow() {
GenomeLoc g = new GenomeLoc(1, 0, 50);
GenomeLoc e = new GenomeLoc(1, 49, 100);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 50);
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 49, 100);
assertTrue(mSortedSet.size() == 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
@ -105,8 +105,8 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void mergingOverlappingAbove() {
GenomeLoc e = new GenomeLoc(1, 0, 50);
GenomeLoc g = new GenomeLoc(1, 49, 100);
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100);
assertTrue(mSortedSet.size() == 0);
mSortedSet.add(g);
assertTrue(mSortedSet.size() == 1);
@ -121,13 +121,13 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void deleteSubRegion() {
GenomeLoc e = new GenomeLoc(1, 0, 50);
GenomeLoc g = new GenomeLoc(1, 49, 100);
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100);
mSortedSet.add(g);
mSortedSet.addRegion(e);
// now delete a region
GenomeLoc d = new GenomeLoc(1, 25, 75);
GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 25, 75);
mSortedSet.removeRegion(d);
Iterator<GenomeLoc> iter = mSortedSet.iterator();
GenomeLoc loc = iter.next();
@ -143,20 +143,20 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void deleteAllByRegion() {
GenomeLoc e = new GenomeLoc(1, 1, 100);
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100);
mSortedSet.add(e);
for (int x = 1; x < 101; x++) {
GenomeLoc del = new GenomeLoc(1,x,x);
GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x);
mSortedSet.removeRegion(del);
}
assertTrue(mSortedSet.isEmpty());
}
@Test
public void deleteSomeByRegion() {
GenomeLoc e = new GenomeLoc(1, 1, 100);
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100);
mSortedSet.add(e);
for (int x = 1; x < 50; x++) {
GenomeLoc del = new GenomeLoc(1,x,x);
GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x);
mSortedSet.removeRegion(del);
}
assertTrue(!mSortedSet.isEmpty());
@ -169,13 +169,13 @@ public class GenomeLocSortedSetTest extends BaseTest {
@Test
public void deleteSuperRegion() {
GenomeLoc e = new GenomeLoc(1, 10, 20);
GenomeLoc g = new GenomeLoc(1, 70, 100);
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 10, 20);
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 70, 100);
mSortedSet.add(g);
mSortedSet.addRegion(e);
assertTrue(mSortedSet.size() == 2);
// now delete a region
GenomeLoc d = new GenomeLoc(1, 15, 75);
GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 15, 75);
mSortedSet.removeRegion(d);
Iterator<GenomeLoc> iter = mSortedSet.iterator();
GenomeLoc loc = iter.next();

View File

@ -22,7 +22,7 @@ public class GenomeLocTest extends BaseTest {
public static void init() {
// sequence
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLoc.setupRefContigOrdering(seq);
GenomeLocParser.setupRefContigOrdering(seq);
}
/**
@ -32,10 +32,10 @@ public class GenomeLocTest extends BaseTest {
public void testIsBetween() {
logger.warn("Executing testIsBetween");
GenomeLoc locMiddle = new GenomeLoc("chr1", 3, 3);
GenomeLoc locMiddle = GenomeLocParser.createGenomeLoc("chr1", 3, 3);
GenomeLoc locLeft = new GenomeLoc("chr1", 1, 1);
GenomeLoc locRight = new GenomeLoc("chr1", 5, 5);
GenomeLoc locLeft = GenomeLocParser.createGenomeLoc("chr1", 1, 1);
GenomeLoc locRight = GenomeLocParser.createGenomeLoc("chr1", 5, 5);
Assert.assertTrue(locMiddle.isBetween(locLeft, locRight));
Assert.assertFalse(locLeft.isBetween(locMiddle, locRight));
@ -45,15 +45,15 @@ public class GenomeLocTest extends BaseTest {
@Test
public void testContigIndex() {
logger.warn("Executing testContigIndex");
GenomeLoc locOne = new GenomeLoc("chr1",1,1);
GenomeLoc locOne = GenomeLocParser.createGenomeLoc("chr1",1,1);
Assert.assertEquals(locOne.getContigIndex(), 1);
Assert.assertEquals(locOne.getContig(), "chr1");
GenomeLoc locX = new GenomeLoc("chrX",1,1);
GenomeLoc locX = GenomeLocParser.createGenomeLoc("chrX",1,1);
Assert.assertEquals(locX.getContigIndex(), 23);
Assert.assertEquals(locX.getContig(), "chrX");
GenomeLoc locNumber = new GenomeLoc(1,1,1);
GenomeLoc locNumber = GenomeLocParser.createGenomeLoc(1,1,1);
Assert.assertEquals(locNumber.getContigIndex(), 1);
Assert.assertEquals(locNumber.getContig(), "chr1");
Assert.assertEquals(locOne.compareTo(locNumber), 0);
@ -63,15 +63,15 @@ public class GenomeLocTest extends BaseTest {
@Test
public void testCompareTo() {
logger.warn("Executing testCompareTo");
GenomeLoc twoOne = new GenomeLoc("chr2", 1);
GenomeLoc twoFive = new GenomeLoc("chr2", 5);
GenomeLoc twoOtherFive = new GenomeLoc("chr2", 5);
GenomeLoc twoOne = GenomeLocParser.createGenomeLoc("chr2", 1);
GenomeLoc twoFive = GenomeLocParser.createGenomeLoc("chr2", 5);
GenomeLoc twoOtherFive = GenomeLocParser.createGenomeLoc("chr2", 5);
Assert.assertEquals(0, twoFive.compareTo(twoOtherFive));
Assert.assertEquals(-1, twoOne.compareTo(twoFive));
Assert.assertEquals(1, twoFive.compareTo(twoOne));
GenomeLoc oneOne = new GenomeLoc("chr1", 5);
GenomeLoc oneOne = GenomeLocParser.createGenomeLoc("chr1", 5);
Assert.assertEquals(-1, oneOne.compareTo(twoOne));
Assert.assertEquals(1, twoOne.compareTo(oneOne));
}

View File

@ -4,11 +4,12 @@ package org.broadinstitute.sting.utils;
// the imports for unit testing.
import org.junit.*;
import static org.junit.Assert.assertTrue;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
import org.broadinstitute.sting.utils.RefHanger;
import static org.junit.Assert.assertTrue;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.util.Arrays;
@ -69,7 +70,7 @@ public class RefHangerTest extends BaseTest {
public static void init() {
// sequence
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLoc.setupRefContigOrdering(seq);
GenomeLocParser.setupRefContigOrdering(seq);
System.out.printf("Filled hanger is %n%s%n", makeFilledHanger());
}
@ -81,7 +82,7 @@ public class RefHangerTest extends BaseTest {
l3 = Arrays.asList(6);
l4 = Arrays.asList(7, 8);
l5 = Arrays.asList(9, 10);
p1 = new GenomeLoc(0, 1, 1);
p1 = GenomeLocParser.createGenomeLoc(0, 1, 1);
p2 = new GenomeLoc(p1).nextLoc();
p3 = new GenomeLoc(p2).nextLoc();
p4 = new GenomeLoc(p3).nextLoc();
@ -94,7 +95,7 @@ public class RefHangerTest extends BaseTest {
@Before
public void setupHanger() {
startLoc = new GenomeLoc(0, 1, 1); // chrM 1
startLoc = GenomeLocParser.createGenomeLoc(0, 1, 1); // chrM 1
emptyHanger = new RefHanger<Integer>();
filledHanger = makeFilledHanger();
@ -145,7 +146,7 @@ public class RefHangerTest extends BaseTest {
assertTrue(filledHanger.hasLocation(p3));
assertTrue(filledHanger.hasLocation(p4));
assertTrue(filledHanger.hasLocation(p5));
assertTrue(! filledHanger.hasLocation(new GenomeLoc(0, 6, 6)));
assertTrue(! filledHanger.hasLocation(GenomeLocParser.createGenomeLoc(0, 6, 6)));
assertTrue(filledHanger.getHanger(0) != null);
assertTrue(filledHanger.getHanger(1) != null);