Aaron: 1, GenomeLoc: 0. I changed our GenomeLoc class, seperating the creation of a genome loc (with the reference setup) to a parser class. GenomeLoc now just represents the actual genomic postion. The constructors are now package-protected (to enforce using the parser), but we may want to expose some constructors in the future.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1069 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
26eb362f52
commit
bcb64d92e9
|
|
@ -101,7 +101,7 @@ public class GenomeAnalysisEngine {
|
|||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
if (argCollection.referenceFile != null) {
|
||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
|
||||
GenomeLoc.setupRefContigOrdering(refFile);
|
||||
GenomeLocParser.setupRefContigOrdering(refFile);
|
||||
}
|
||||
|
||||
// Determine the validation stringency. Default to ValidationStringency.STRICT.
|
||||
|
|
@ -145,7 +145,7 @@ public class GenomeAnalysisEngine {
|
|||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
if (argCollection.referenceFile != null) {
|
||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(argCollection.referenceFile);
|
||||
GenomeLoc.setupRefContigOrdering(refFile);
|
||||
GenomeLocParser.setupRefContigOrdering(refFile);
|
||||
}
|
||||
|
||||
// Determine the validation stringency. Default to ValidationStringency.STRICT.
|
||||
|
|
@ -222,10 +222,10 @@ public class GenomeAnalysisEngine {
|
|||
if ( intervalsString != null) {
|
||||
if (new File(intervalsString).exists()) {
|
||||
if (! quiet) logger.info("Intervals argument specifies a file. Loading intervals from file.");
|
||||
locs = GenomeLoc.IntervalFileToList(intervalsString);
|
||||
locs = GenomeLocParser.intervalFileToList(intervalsString);
|
||||
} else {
|
||||
if (! quiet) logger.info("Intervals argument does not specify a file. Trying to parse it as a simple string.");
|
||||
locs = GenomeLoc.parseGenomeLocs(intervalsString);
|
||||
locs = GenomeLocParser.parseGenomeLocs(intervalsString);
|
||||
}
|
||||
}
|
||||
return locs;
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Iterator;
|
||||
/**
|
||||
|
|
@ -63,7 +64,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
|||
LocusShardStrategy( SAMSequenceDictionary dic ) {
|
||||
this.dic = dic;
|
||||
limitingFactor = -1;
|
||||
mLoc = new GenomeLoc(0, 0, 0);
|
||||
mLoc = GenomeLocParser.createGenomeLoc(0, 0, 0);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
|
|
@ -98,7 +99,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
|||
throw new IllegalArgumentException("Interval files must contain at least one interval");
|
||||
}
|
||||
GenomeLoc loc = intervals.iterator().next();
|
||||
mLoc = new GenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1);
|
||||
mLoc = GenomeLocParser.createGenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1);
|
||||
if (dic.getSequences().size() > 0) {
|
||||
nextContig = true;
|
||||
}
|
||||
|
|
@ -174,7 +175,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
|||
intervals.removeRegion(loc);
|
||||
return new IntervalShard(loc);
|
||||
} else {
|
||||
GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
|
||||
GenomeLoc subLoc = GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
|
||||
intervals.removeRegion(subLoc);
|
||||
return new IntervalShard(subLoc);
|
||||
}
|
||||
|
|
@ -193,7 +194,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
|||
// can we fit it into the current seq size?
|
||||
if (nextStart + proposedSize - 1 < length) {
|
||||
lastGenomeLocSize = proposedSize;
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
|
||||
mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
|
||||
return LocusShard.toShard(mLoc);
|
||||
}
|
||||
// else we can't make it in the current location, we have to stitch one together
|
||||
|
|
@ -207,7 +208,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
|||
|
||||
// move to the next contig
|
||||
// the next sequence should start at the begining of the next contig
|
||||
Shard ret = LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1));
|
||||
Shard ret = LocusShard.toShard(GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + lastGenomeLocSize - 1));
|
||||
|
||||
// now jump ahead to the next contig
|
||||
jumpContig();
|
||||
|
|
@ -226,7 +227,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
|
|||
return;
|
||||
}
|
||||
logger.debug("Next contig, index = " + dic.getSequence(seqLoc).getSequenceIndex());
|
||||
mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0);
|
||||
mLoc = GenomeLocParser.createGenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), 0, 0);
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import org.broadinstitute.sting.gatk.Reads;
|
|||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -184,7 +185,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
|
||||
if (!intoUnmappedReads) {
|
||||
if (lastReadPos == null) {
|
||||
lastReadPos = new GenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||
lastReadPos = GenomeLocParser.createGenomeLoc(getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, Integer.MAX_VALUE);
|
||||
iter = iteratorPool.iterator(lastReadPos);
|
||||
return InitialReadIterator(shard.getSize(), iter);
|
||||
} else {
|
||||
|
|
@ -280,7 +281,8 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
++x;
|
||||
} else {
|
||||
// jump contigs
|
||||
if (lastReadPos.toNextContig() == false) {
|
||||
lastReadPos = GenomeLocParser.toNextContig(lastReadPos);
|
||||
if (lastReadPos == null) {
|
||||
// check to see if we're using unmapped reads, if not return, we're done
|
||||
readsTaken = 0;
|
||||
intoUnmappedReads = true;
|
||||
|
|
@ -304,7 +306,7 @@ public class SAMDataSource implements SimpleDataSource {
|
|||
else if (rec != null) {
|
||||
int stopPos = rec.getAlignmentStart();
|
||||
if (stopPos < lastReadPos.getStart()) {
|
||||
lastReadPos = new GenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
|
||||
lastReadPos = GenomeLocParser.createGenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
|
||||
} else {
|
||||
lastReadPos.setStart(rec.getAlignmentStart());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,9 +18,9 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -202,7 +202,7 @@ public abstract class MicroScheduler {
|
|||
catch (FileNotFoundException ex) {
|
||||
throw new StingException("I/O error while opening fasta file: " + ex.getMessage(), ex);
|
||||
}
|
||||
GenomeLoc.setupRefContigOrdering(ref);
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
return ref;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
/**
|
||||
|
|
@ -39,7 +39,7 @@ public class GenomeLocusIterator implements LocusIterator {
|
|||
*/
|
||||
public GenomeLocusIterator( GenomeLoc completeLocus ) {
|
||||
this.completeLocus = completeLocus;
|
||||
this.currentLocus = new GenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
||||
this.currentLocus = GenomeLocParser.createGenomeLoc(completeLocus.getContig(),completeLocus.getStart());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.iterators;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.AlignmentBlock;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.RefHanger;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.broadinstitute.sting.utils.RefHanger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Iterator that traverses a SAM File, accumulating information on a per-locus basis
|
||||
*/
|
||||
|
|
@ -103,12 +104,12 @@ public class LocusContextIteratorByHanger extends LocusContextIterator {
|
|||
}
|
||||
|
||||
protected void hangRead(final SAMRecord read) {
|
||||
GenomeLoc readLoc = new GenomeLoc(read);
|
||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
|
||||
for ( AlignmentBlock block : read.getAlignmentBlocks() ) {
|
||||
if ( DEBUG ) logger.debug(String.format("Processing block %s len=%d", block, block.getLength()));
|
||||
for ( int i = 0; i < block.getLength(); i++ ) {
|
||||
GenomeLoc offset = new GenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
|
||||
GenomeLoc offset = GenomeLocParser.createGenomeLoc(readLoc.getContigIndex(), block.getReferenceStart() + i);
|
||||
readHanger.expandingPut(offset, read);
|
||||
offsetHanger.expandingPut(offset, block.getReadStart() + i - 1);
|
||||
if ( DEBUG ) logger.debug(String.format(" # Added %s", offset));
|
||||
|
|
@ -134,7 +135,7 @@ public class LocusContextIteratorByHanger extends LocusContextIterator {
|
|||
return true;
|
||||
else {
|
||||
final SAMRecord read = it.peek();
|
||||
GenomeLoc readLoc = new GenomeLoc(read);
|
||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
final boolean coveredP = currentPositionIsFullyCovered(readLoc);
|
||||
//System.out.printf("CoverP = %s => %b%n", readLoc, coveredP);
|
||||
return coveredP;
|
||||
|
|
@ -161,7 +162,7 @@ public class LocusContextIteratorByHanger extends LocusContextIterator {
|
|||
SAMRecord read = it.next();
|
||||
justCleared = false;
|
||||
|
||||
GenomeLoc readLoc = new GenomeLoc(read);
|
||||
GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
if ( DEBUG ) {
|
||||
logger.debug(String.format(" Expanding window sizes %d with %d : left=%s, right=%s, readLoc = %s, cmp=%d",
|
||||
readHanger.size(), incrementSize,
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import net.sf.samtools.util.RuntimeIOException;
|
|||
import net.sf.samtools.util.StringUtil;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -65,7 +66,7 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
|||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return new GenomeLoc(getCurrentContig().getName(), getPosition());
|
||||
return GenomeLocParser.createGenomeLoc(getCurrentContig().getName(), getPosition());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -137,11 +138,11 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
|||
if (seekOffset < offset ) {
|
||||
// bad boy -- can't go backward safely
|
||||
throw new IllegalArgumentException(String.format("Invalid seek %s => %s, which is usually due to out of order reads%n",
|
||||
new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset)));
|
||||
GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset)));
|
||||
} else if (seekOffset >= currentContig.length()) {
|
||||
// bad boy -- can't go beyond the contig length
|
||||
throw new IllegalArgumentException(String.format("Invalid seek to %s, which is beyond the end of the contig%n",
|
||||
new GenomeLoc(currentContig.getName(), seekOffset + 1)));
|
||||
GenomeLocParser.createGenomeLoc(currentContig.getName(), seekOffset + 1)));
|
||||
} else {
|
||||
offset = seekOffset - 1;
|
||||
return next();
|
||||
|
|
@ -160,15 +161,16 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
|||
if (DEBUG)
|
||||
logger.debug(String.format(" -> Seeking to %s %d from %s %d%n", seekContigName, seekOffset, currentContig.getName(), offset));
|
||||
|
||||
int cmpContigs = GenomeLoc.compareContigs(seekContigName, currentContig.getName());
|
||||
|
||||
int cmpContigs = GenomeLocParser.compareContigs(seekContigName,currentContig.getName());
|
||||
|
||||
if ( cmpContigs < 0 && GenomeLoc.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late!
|
||||
if ( cmpContigs < 0 && GenomeLocParser.hasKnownContigOrdering() ) { // if we know the order of contigs and we are already past the contig we seek, it's too late!
|
||||
// The contig we are looking for is before the currentContig -- it's an error
|
||||
throw new IllegalArgumentException(String.format("Invalid seek %s => %s, contigs/sequences are out of order%n",
|
||||
new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset)));
|
||||
GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset)));
|
||||
}
|
||||
|
||||
if ( cmpContigs > 0 || (! GenomeLoc.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek
|
||||
if ( cmpContigs > 0 || (! GenomeLocParser.hasKnownContigOrdering() ) && cmpContigs != 0 ) { // if contig we seek is still ahead, or if we have no idea what the order is and current contig is not what we seek
|
||||
// then try to seek forward in the reference file until we get the contig we need
|
||||
if (DEBUG)
|
||||
logger.debug(String.format(" -> Seeking in the fasta file to %s from %s%n", seekContigName, currentContig.getName()));
|
||||
|
|
@ -176,7 +178,7 @@ public class ReferenceIterator implements Iterator<ReferenceIterator> {
|
|||
if (!refFile.seekToContig(seekContigName)) { // ok, do the seek
|
||||
// a false result indicates a failure, throw a somewhat cryptic call
|
||||
throw new RuntimeIOException(String.format("Unexpected seek failure from %s to %s%n",
|
||||
new GenomeLoc(currentContig.getName(), offset), new GenomeLoc(seekContigName, seekOffset)));
|
||||
GenomeLocParser.createGenomeLoc(currentContig.getName(), offset), GenomeLocParser.createGenomeLoc(seekContigName, seekOffset)));
|
||||
}
|
||||
|
||||
readNextContig(); // since we haven't failed, we just read in the next contig (which is seekContigName)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.iterators;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
|
@ -63,8 +64,8 @@ public class VerifyingSamIterator implements StingSAMIterator {
|
|||
if ( last == null || cur.getReadUnmappedFlag() )
|
||||
return false;
|
||||
else {
|
||||
GenomeLoc lastLoc = new GenomeLoc( last );
|
||||
GenomeLoc curLoc = new GenomeLoc( cur );
|
||||
GenomeLoc lastLoc = GenomeLocParser.createGenomeLoc( last );
|
||||
GenomeLoc curLoc = GenomeLocParser.createGenomeLoc( cur );
|
||||
return curLoc.compareTo(lastLoc) == -1;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -79,7 +80,7 @@ public class HapMapAlleleFrequenciesROD extends BasicReferenceOrderedDatum {
|
|||
varFreq = Double.parseDouble(parts[11]); // CEU_var_freq
|
||||
totalCounts = Integer.parseInt(parts[12]); // CEU_var
|
||||
|
||||
loc = GenomeLoc.parseGenomeLoc(contig, start, stop);
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, stop);
|
||||
|
||||
} catch ( RuntimeException e ) {
|
||||
System.out.printf(" Exception caught during parsing HapMap Allele Freq %s%n", Utils.join(" <=> ", parts));
|
||||
|
|
|
|||
|
|
@ -1,19 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/**
|
||||
* loc ref alt EM_alt_freq discovery_likelihood discovery_null discovery_prior discovery_lod EM_N n_ref n_het n_hom
|
||||
|
|
@ -28,7 +18,7 @@ public class KGenomesSNPROD extends TabularROD implements SNPCallFromGenotypes {
|
|||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
loc = new GenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
|
||||
loc = GenomeLocParser.createGenomeLoc(this.get("0"), Long.parseLong(this.get("1")));
|
||||
return loc;
|
||||
}
|
||||
public String getRefBasesFWD() { return this.get("2"); }
|
||||
|
|
|
|||
|
|
@ -6,10 +6,7 @@ import java.util.*;
|
|||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.Pileup;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFileWalker;
|
||||
|
||||
|
|
@ -97,8 +94,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
if ( refBaseChar == '*' ) {
|
||||
|
||||
parseIndels(observedString) ;
|
||||
if ( varType == DELETION_VARIANT ) loc = new GenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = new GenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.createGenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = GenomeLocParser.createGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
} else {
|
||||
parseBasesAndQuals(line,pos[7]+1,pos[8], pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) );
|
||||
// parseBasesAndQuals(line.substring(pos[7]+1,pos[8]), line.substring(pos[8]+1, ( pos.length > 9 ? pos[9] : line.length()) ) );
|
||||
|
|
@ -108,7 +105,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
refBases = line.substring(pos[1]+1, pos[2]).toUpperCase();
|
||||
eventLength = 1;
|
||||
//loc = new GenomeLoc(contig, start, start+1);
|
||||
loc = new GenomeLoc(contig, start, start);
|
||||
loc = GenomeLocParser.createGenomeLoc(contig, start, start);
|
||||
|
||||
char ch = observedString.charAt(0);
|
||||
|
||||
|
|
@ -170,8 +167,8 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
if ( refBaseChar == '*' ) {
|
||||
|
||||
parseIndels(parts[3]) ;
|
||||
if ( varType == DELETION_VARIANT ) loc = GenomeLoc.parseGenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = GenomeLoc.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
if ( varType == DELETION_VARIANT ) loc = GenomeLocParser.parseGenomeLoc(contig, start, start+eventLength-1);
|
||||
else loc = GenomeLocParser.parseGenomeLoc(contig, start, start-1); // if it's not a deletion and we are biallelic, this got to be an insertion; otherwise the state is inconsistent!!!!
|
||||
}
|
||||
else {
|
||||
parseBasesAndQuals(parts[8], parts[9]);
|
||||
|
|
@ -181,7 +178,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
refBases = parts[2].toUpperCase();
|
||||
eventLength = 1;
|
||||
//loc = GenomeLoc.parseGenomeLoc(contig, start, start+1);
|
||||
loc = GenomeLoc.parseGenomeLoc(contig, start, start);
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, start);
|
||||
|
||||
char ch = parts[3].charAt(0);
|
||||
|
||||
|
|
@ -623,7 +620,7 @@ class SAMPileupRecord implements Genotype, GenotypeList, Pileup {
|
|||
System.exit(1);
|
||||
}
|
||||
|
||||
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
|
||||
int counter = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
|
|
@ -201,7 +200,7 @@ public class TabularROD extends BasicReferenceOrderedDatum implements Map<String
|
|||
String s = get(header.get(0));
|
||||
if ( s == null )
|
||||
return null;
|
||||
return GenomeLoc.parseGenomeLoc(s);
|
||||
return GenomeLocParser.parseGenomeLoc(s);
|
||||
}
|
||||
|
||||
public ArrayList<String> getHeader() {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import java.util.List;
|
|||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
/** Holds a single transcript annotation: refseq id, gene name, genomic locations of the locus, of the coding region
|
||||
* and of all the exons.
|
||||
|
|
@ -75,8 +76,8 @@ public class Transcript {
|
|||
else throw new StingException("Expected strand symbol (+/-), found: "+fields[3]);
|
||||
|
||||
String contig_name = fields[2];
|
||||
transcript_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
|
||||
transcript_coding_interval = GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
|
||||
transcript_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
|
||||
transcript_coding_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
|
||||
gene_name = fields[12];
|
||||
String[] exon_starts = fields[9].split(",");
|
||||
String[] exon_stops = fields[10].split(",");
|
||||
|
|
@ -89,7 +90,7 @@ public class Transcript {
|
|||
exon_frames = new ArrayList<Integer>(eframes.length);
|
||||
|
||||
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
||||
exons.add(GenomeLoc.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||
exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||
exon_frames.add(Integer.decode(eframes[i]));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,10 +4,10 @@ import net.sf.picard.util.SequenceUtil;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.MalformedGenomeLocException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.refdata.AllelicVariant;
|
||||
|
||||
/**
|
||||
|
|
@ -162,7 +162,7 @@ public class rodDbSNP extends BasicReferenceOrderedDatum implements AllelicVaria
|
|||
String contig = parts[1];
|
||||
long start = Long.parseLong(parts[2]) + 1; // The final is 0 based
|
||||
long stop = Long.parseLong(parts[3]) + 1; // The final is 0 based
|
||||
loc = GenomeLoc.parseGenomeLoc(contig, start, stop-1);
|
||||
loc = GenomeLocParser.parseGenomeLoc(contig, start, stop-1);
|
||||
|
||||
name = parts[4];
|
||||
refBases = parts[5];
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Scanner;
|
||||
import java.util.Map;
|
||||
import java.util.Scanner;
|
||||
import java.util.regex.MatchResult;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
/**
|
||||
* Class for representing arbitrary reference ordered data sets
|
||||
*
|
||||
|
|
@ -73,7 +73,7 @@ public class rodGFF extends BasicReferenceOrderedDatum {
|
|||
}
|
||||
|
||||
public GenomeLoc getLocation() {
|
||||
return GenomeLoc.parseGenomeLoc(contig, start, stop);
|
||||
return GenomeLocParser.parseGenomeLoc(contig, start, stop);
|
||||
}
|
||||
|
||||
public String getAttribute(final String key) {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.Transcript;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
public class rodRefSeq extends BasicReferenceOrderedDatum {
|
||||
|
||||
|
|
@ -178,8 +179,8 @@ class refSeqIterator implements Iterator<rodRefSeq> {
|
|||
|
||||
while ( reader.hasNext() ) {
|
||||
Transcript t = reader.peek();
|
||||
int ci1 = GenomeLoc.getContigIndex(curr_contig_name);
|
||||
int ci2 = GenomeLoc.getContigIndex( t.getLocation().getContig() );
|
||||
int ci1 = GenomeLocParser.getContigIndex(curr_contig_name);
|
||||
int ci2 = GenomeLocParser.getContigIndex( t.getLocation().getContig() );
|
||||
if ( ci1 > ci2 ) throw new StingException("RefSeq track seems to be not contig-ordered");
|
||||
if ( ci1 < ci2 ) break; // next transcript is on the next contig, we do not need it yet...
|
||||
if ( t.getLocation().getStart() > curr_position ) break; // next transcript is on the same contig but starts after the current position; we are done
|
||||
|
|
@ -192,7 +193,7 @@ class refSeqIterator implements Iterator<rodRefSeq> {
|
|||
|
||||
// 'records' and current position are fully updated. We can now create new rod and return it (NOTE: this iterator will break if the list
|
||||
// of pre-loaded records is meddled with by the clients between iterations, so we return them as unmodifiable list)
|
||||
rodRefSeq rod = new rodRefSeq(name,GenomeLoc.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records));
|
||||
rodRefSeq rod = new rodRefSeq(name, GenomeLocParser.parseGenomeLoc(curr_contig_name,curr_position, curr_position),Collections.unmodifiableList(records));
|
||||
// if ( (++z) % 1000000 == 0 ) {
|
||||
// System.out.println(rod.getLocation()+": holding "+records.size()+ "; time per 1M ref positions: "+((double)(System.currentTimeMillis()-t)/1000.0)+" s");
|
||||
// z = 0;
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import java.util.*;
|
|||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFileWalker;
|
||||
|
||||
|
|
@ -192,7 +193,7 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
|
|||
System.exit(1);
|
||||
}
|
||||
|
||||
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
|
||||
int counter = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
|
@ -22,7 +23,7 @@ public class rodVariants extends BasicReferenceOrderedDatum {
|
|||
|
||||
public boolean parseLine(Object header, String[] parts) throws IOException {
|
||||
if (!parts[0].startsWith("#")) {
|
||||
loc = new GenomeLoc(parts[0], Long.valueOf(parts[1]));
|
||||
loc = GenomeLocParser.createGenomeLoc(parts[0], Long.valueOf(parts[1]));
|
||||
refBase = parts[2].charAt(0);
|
||||
depth = Integer.valueOf(parts[3]);
|
||||
maxMappingQuality = Integer.valueOf(parts[4]);
|
||||
|
|
|
|||
|
|
@ -443,7 +443,7 @@ public abstract class TraversalEngine {
|
|||
//this.refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(refFileName);
|
||||
this.refFile = new FastaSequenceFile2(refFileName); // todo: replace when FastaSequenceFile2 is in picard
|
||||
this.refIter = new ReferenceIterator(this.refFile);
|
||||
if (!GenomeLoc.setupRefContigOrdering(this.refFile)) {
|
||||
if (!GenomeLocParser.setupRefContigOrdering(this.refFile)) {
|
||||
// We couldn't process the reference contig ordering, fail since we need it
|
||||
Utils.scareUser(String.format("We couldn't load the contig dictionary associated with %s. At the current time we require this dictionary file to efficiently access the FASTA file. Please use /seq/software/picard/current/bin/CreateSequenceDictionary.jar to create a sequence dictionary for your file", refFileName));
|
||||
}
|
||||
|
|
@ -470,7 +470,7 @@ public abstract class TraversalEngine {
|
|||
while (true) {
|
||||
ReferenceSequence ref = refFile.nextSequence();
|
||||
logger.debug(String.format("%s %d %d", ref.getName(), ref.length(), System.currentTimeMillis()));
|
||||
printProgress(true, "loci", new GenomeLoc("foo", 1));
|
||||
printProgress(true, "loci", GenomeLocParser.createGenomeLoc("foo", 1));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
|
|||
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.File;
|
||||
|
|
@ -141,7 +142,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
walker.nonIntervalReadAction(read);
|
||||
}
|
||||
else {
|
||||
GenomeLoc loc = new GenomeLoc(read);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
||||
// if we're in the current interval, add it to the list
|
||||
if ( currentInterval.overlapsP(loc) ) {
|
||||
intervalReads.add(read);
|
||||
|
|
@ -243,7 +244,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
}
|
||||
}
|
||||
|
||||
GenomeLoc window = new GenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
GenomeLoc window = GenomeLocParser.createGenomeLoc(interval.getContig(), leftmostIndex, rightmostIndex);
|
||||
LocusContext locus = new LocusContext(window, reads, null);
|
||||
if ( DOWNSAMPLE_BY_COVERAGE )
|
||||
locus.downsampleToCoverage(downsamplingCoverage);
|
||||
|
|
@ -255,8 +256,8 @@ public class TraverseByLocusWindows extends TraversalEngine {
|
|||
GenomeLoc loc = locus1.getLocation().merge(locus2.getLocation());
|
||||
TreeSet<SAMRecord> set = new TreeSet<SAMRecord>(new Comparator<SAMRecord>() {
|
||||
public int compare(SAMRecord obj1, SAMRecord obj2) {
|
||||
GenomeLoc myLoc = new GenomeLoc(obj1);
|
||||
GenomeLoc hisLoc = new GenomeLoc(obj2);
|
||||
GenomeLoc myLoc = GenomeLocParser.createGenomeLoc(obj1);
|
||||
GenomeLoc hisLoc = GenomeLocParser.createGenomeLoc(obj2);
|
||||
int comparison = myLoc.compareTo(hisLoc);
|
||||
// if the reads have the same start position, we must give a non-zero comparison
|
||||
// (because java Sets often require "consistency with equals")
|
||||
|
|
|
|||
|
|
@ -1,25 +1,28 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
import net.sf.picard.filter.FilteringIterator;
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -66,12 +69,12 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
|
||||
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter)
|
||||
{
|
||||
GenomeLoc site = new GenomeLoc(read);
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
|
||||
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
|
||||
|
||||
l.add(read);
|
||||
for (SAMRecord read2: iter) {
|
||||
GenomeLoc site2 = new GenomeLoc(read2);
|
||||
GenomeLoc site2 = GenomeLocParser.createGenomeLoc(read2);
|
||||
|
||||
// the next read starts too late
|
||||
if ( site2.getStart() != site.getStart() ) {
|
||||
|
|
@ -105,12 +108,12 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
// At this point, there are two possibilities, we have found at least one dup or not
|
||||
// if it's a dup, add it to the dups list, otherwise add it to the uniques list
|
||||
if ( key != null ) {
|
||||
final GenomeLoc keyLoc = new GenomeLoc(key);
|
||||
final GenomeLoc keyMateLoc = new GenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
|
||||
final GenomeLoc keyLoc = GenomeLocParser.createGenomeLoc(key);
|
||||
final GenomeLoc keyMateLoc = GenomeLocParser.createGenomeLoc(key.getMateReferenceIndex(), key.getMateAlignmentStart(), key.getMateAlignmentStart());
|
||||
|
||||
for ( SAMRecord read : reads ) {
|
||||
final GenomeLoc readLoc = new GenomeLoc(read);
|
||||
final GenomeLoc readMateLoc = new GenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
|
||||
final GenomeLoc readLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
final GenomeLoc readMateLoc = GenomeLocParser.createGenomeLoc(read.getMateReferenceIndex(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
|
||||
if (DEBUG) logger.debug(String.format("Examining reads at %s vs. %s at %s / %s vs. %s / %s%n", key.getReadName(), read.getReadName(), keyLoc, keyMateLoc, readLoc, readMateLoc));
|
||||
|
||||
// read and key start at the same place, and either the this read and the key
|
||||
|
|
@ -150,7 +153,7 @@ public class TraverseDuplicates extends TraversalEngine {
|
|||
PushbackIterator<SAMRecord> iter = new PushbackIterator<SAMRecord>(readIter);
|
||||
for (SAMRecord read: iter) {
|
||||
// get the genome loc from the read
|
||||
GenomeLoc site = new GenomeLoc(read);
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
|
||||
List<SAMRecord> reads = readsAtLoc(read, iter);
|
||||
Pair<List<SAMRecord>, List<SAMRecord>> split = splitDuplicates(reads);
|
||||
List<SAMRecord> uniqueReads = split.getFirst();
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -121,7 +122,7 @@ public class TraverseReads extends TraversalEngine {
|
|||
|
||||
if (needsReferenceBasesP && read.getReferenceIndex() >= 0) {
|
||||
// get the genome loc from the read
|
||||
GenomeLoc site = new GenomeLoc(read);
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc(read);
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
locus = new LocusContext(site, Arrays.asList(read), Arrays.asList(0));
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.playground.gatk;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.picard.cmdline.CommandLineProgram;
|
||||
import net.sf.picard.cmdline.Usage;
|
||||
import net.sf.picard.cmdline.Option;
|
||||
|
|
@ -11,8 +10,6 @@ import org.broadinstitute.sting.utils.*;
|
|||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class PrepareROD extends CommandLineProgram {
|
||||
|
|
@ -33,7 +30,7 @@ public class PrepareROD extends CommandLineProgram {
|
|||
|
||||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
|
||||
GenomeLoc.setupRefContigOrdering(refFile);
|
||||
GenomeLocParser.setupRefContigOrdering(refFile);
|
||||
|
||||
Class<? extends ReferenceOrderedDatum> rodClass = ReferenceOrderedData.Types.get(ROD_TYPE.toLowerCase()).type;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,24 +1,20 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.Queue;
|
||||
import java.util.LinkedList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
/**
|
||||
* User: hanna
|
||||
* User: hanna
|
||||
* Date: Jun 10, 2009
|
||||
* Time: 2:40:19 PM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
|
|
@ -99,7 +95,7 @@ public class CleanedReadInjector extends ReadWalker<Integer,Integer> {
|
|||
*/
|
||||
@Override
|
||||
public Integer map(char[] ref, SAMRecord read) {
|
||||
GenomeLoc loc = new GenomeLoc(read);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(read);
|
||||
|
||||
while( loc.isPast(interval) && intervals.size() > 0 ) {
|
||||
interval = intervals.remove();
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@ import org.broadinstitute.sting.gatk.refdata.RODIterator;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodRefSeq;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.playground.indels.AlignmentUtils;
|
||||
import org.broadinstitute.sting.playground.utils.CircularArray;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
|
||||
|
|
@ -82,7 +82,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
int nSams = getToolkit().getArguments().samFiles.size();
|
||||
|
||||
location = new GenomeLoc(0,1);
|
||||
location = GenomeLocParser.createGenomeLoc(0,1);
|
||||
|
||||
if ( call_somatic ) {
|
||||
if ( nSams != 2 ) {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.AlignmentBlock;
|
||||
|
||||
|
|
@ -35,8 +36,8 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
|
|||
long indelLeftEdge = read.getAlignmentStart() + blocks.get(0).getLength() - 1;
|
||||
long indelRightEdge = read.getAlignmentEnd() - blocks.get(blocks.size()-1).getLength() + 1;
|
||||
|
||||
GenomeLoc indelLoc = new GenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
|
||||
GenomeLoc refLoc = new GenomeLoc(read);
|
||||
GenomeLoc indelLoc = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(), indelLeftEdge, indelRightEdge);
|
||||
GenomeLoc refLoc = GenomeLocParser.createGenomeLoc(read);
|
||||
|
||||
return new Interval(refLoc, indelLoc);
|
||||
}
|
||||
|
|
@ -81,7 +82,7 @@ public class IndelIntervalWalker extends ReadWalker<IndelIntervalWalker.Interval
|
|||
public Interval merge(Interval i) {
|
||||
long indelLeftEdge = Math.min(this.indelLoc.getStart(), i.indelLoc.getStart());
|
||||
long indelRightEdge = Math.max(this.indelLoc.getStop(), i.indelLoc.getStop());
|
||||
GenomeLoc mergedIndelLoc = new GenomeLoc(this.indelLoc.getContigIndex(), indelLeftEdge, indelRightEdge);
|
||||
GenomeLoc mergedIndelLoc = GenomeLocParser.createGenomeLoc(this.indelLoc.getContigIndex(), indelLeftEdge, indelRightEdge);
|
||||
Interval mergedInterval = new Interval(this.readLoc.merge(i.readLoc), mergedIndelLoc);
|
||||
mergedInterval.indelCount = this.indelCount + i.indelCount;
|
||||
return mergedInterval;
|
||||
|
|
|
|||
|
|
@ -915,7 +915,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
reads.add(r5);
|
||||
reads.add(r6);
|
||||
reads.add(r7);
|
||||
clean(reads, reference, new GenomeLoc(0,0));
|
||||
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
||||
}
|
||||
|
||||
private void testCleanWithDeletion() {
|
||||
|
|
@ -971,7 +971,7 @@ public class IntervalCleanerWalker extends LocusWindowWalker<Integer, Integer>
|
|||
reads.add(r6);
|
||||
reads.add(r7);
|
||||
reads.add(r8);
|
||||
clean(reads, reference, new GenomeLoc(0,0));
|
||||
clean(reads, reference, GenomeLocParser.createGenomeLoc(0,0));
|
||||
}
|
||||
|
||||
public static String cigarToString(Cigar cig) {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.playground.indels;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
|
|
@ -16,6 +15,7 @@ import net.sf.picard.reference.ReferenceSequence;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
public class IndelInspectorMain extends CommandLineProgram {
|
||||
|
||||
|
|
@ -55,10 +55,10 @@ public class IndelInspectorMain extends CommandLineProgram {
|
|||
System.out.println("No reference sequence dictionary found. Abort.");
|
||||
}
|
||||
|
||||
GenomeLoc.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(reference.getSequenceDictionary());
|
||||
GenomeLoc location = null;
|
||||
if ( GENOME_LOCATION != null ) {
|
||||
location = GenomeLoc.parseGenomeLoc(GENOME_LOCATION);
|
||||
location = GenomeLocParser.parseGenomeLoc(GENOME_LOCATION);
|
||||
}
|
||||
|
||||
if ( COUNT_CUTOFF == null ) COUNT_CUTOFF = 2;
|
||||
|
|
@ -117,8 +117,8 @@ public class IndelInspectorMain extends CommandLineProgram {
|
|||
cur_contig = r.getReferenceName();
|
||||
System.out.println("Contig "+cur_contig);
|
||||
// if contig is specified and we are past that contig, we are done:
|
||||
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == 1 ) break;
|
||||
if ( location == null || GenomeLoc.compareContigs(cur_contig, location.getContig()) == 0 ) {
|
||||
if ( location != null && GenomeLocParser.compareContigs(cur_contig, location.getContig()) == 1 ) break;
|
||||
if ( location == null || GenomeLocParser.compareContigs(cur_contig, location.getContig()) == 0 ) {
|
||||
if ( location != null ) System.out.println("Time spent to scroll input bam file to the specified chromosome: "+ ((System.currentTimeMillis()-tc)/1000) + " seconds.");
|
||||
tc = System.currentTimeMillis();
|
||||
contig_seq = reference.get(r.getReferenceIndex());
|
||||
|
|
@ -130,7 +130,7 @@ public class IndelInspectorMain extends CommandLineProgram {
|
|||
}
|
||||
|
||||
// if contig is specified and we did not reach it yet, skip the records until we reach that contig:
|
||||
if ( location != null && GenomeLoc.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
|
||||
if ( location != null && GenomeLocParser.compareContigs(cur_contig, location.getContig()) == -1 ) continue;
|
||||
|
||||
if ( location != null && r.getAlignmentEnd() < location.getStart() ) continue;
|
||||
|
||||
|
|
@ -327,7 +327,7 @@ public class IndelInspectorMain extends CommandLineProgram {
|
|||
setDefaultContigOrdering();
|
||||
return;
|
||||
}
|
||||
GenomeLoc.setupRefContigOrdering(h.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(h.getSequenceDictionary());
|
||||
}
|
||||
|
||||
private void setDefaultContigOrdering() {
|
||||
|
|
|
|||
|
|
@ -1,17 +1,10 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import edu.mit.broad.picard.util.Interval;
|
||||
import edu.mit.broad.picard.directed.IntervalList;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -26,253 +19,49 @@ import java.util.regex.Pattern;
|
|||
public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
||||
private static Logger logger = Logger.getLogger(GenomeLoc.class);
|
||||
|
||||
/**
|
||||
* the basic components of a genome loc, its contig index,
|
||||
* start and stop position, and (optionally) the contig name
|
||||
*/
|
||||
private int contigIndex;
|
||||
private long start;
|
||||
private long stop;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Ugly global variable defining the optional ordering of contig elements
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//public static Map<String, Integer> refContigOrdering = null;
|
||||
private static SAMSequenceDictionary contigInfo = null;
|
||||
|
||||
public static boolean hasKnownContigOrdering() {
|
||||
return contigInfo != null;
|
||||
}
|
||||
|
||||
|
||||
public static SAMSequenceRecord getContigInfo( final String contig ) {
|
||||
return contigInfo.getSequence(contig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contig index of a specified string version of the contig
|
||||
* @param contig the contig string
|
||||
* @return the contig index, -1 if not found
|
||||
*/
|
||||
public static int getContigIndex( final String contig ) {
|
||||
if (contigInfo.getSequenceIndex(contig) == -1)
|
||||
Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
||||
|
||||
return contigInfo.getSequenceIndex(contig);
|
||||
}
|
||||
|
||||
public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) {
|
||||
return setupRefContigOrdering(refFile.getSequenceDictionary());
|
||||
}
|
||||
|
||||
public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) {
|
||||
if (seqDict == null) { // we couldn't load the reference dictionary
|
||||
logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
|
||||
Utils.scareUser("Failed to load reference dictionary");
|
||||
return false;
|
||||
} else if ( contigInfo == null ){
|
||||
contigInfo = seqDict;
|
||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||
for (SAMSequenceRecord contig : seqDict.getSequences() ) {
|
||||
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
private String contigName;
|
||||
static int MAX_CONTIG;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
public GenomeLoc( int contigIndex, final long start, final long stop ) {
|
||||
if(contigInfo == null) { throw new StingException("Contig info has not been setup in the GenomeLoc context yet."); }
|
||||
|
||||
if (!isSequenceIndexValid(contigIndex)) {
|
||||
throw new StingException("Contig info has not been setup in the GenomeLoc context yet.");
|
||||
}
|
||||
/*GenomeLoc( int contigIndex, final long start, final long stop ) {
|
||||
MAX_CONTIG = Integer.MAX_VALUE;
|
||||
if (start < 0) { throw new StingException("Bad start position " + start);}
|
||||
if (stop < -1) { throw new StingException("Bad stop position " + stop); } // a negative -1 indicates it's not a meaningful end position
|
||||
|
||||
this.contigIndex = contigIndex;
|
||||
this.start = start;
|
||||
this.contigName = null; // we just don't know
|
||||
this.stop = stop == -1 ? start : stop;
|
||||
}*/
|
||||
|
||||
GenomeLoc(final SAMRecord read) {
|
||||
this(read.getHeader().getSequence(read.getReferenceIndex()).getSequenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
|
||||
}
|
||||
|
||||
public GenomeLoc(final SAMRecord read) {
|
||||
this(read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd());
|
||||
GenomeLoc( final String contig, final int contigIndex, final long start, final long stop ) {
|
||||
this.contigName = contig;
|
||||
this.contigIndex = contigIndex;
|
||||
this.start = start;
|
||||
this.stop = stop;
|
||||
}
|
||||
|
||||
public GenomeLoc( final String contig, final long start, final long stop ) {
|
||||
this(contigInfo.getSequenceIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
public GenomeLoc( final String contig, final long pos ) {
|
||||
this(contig, pos, pos);
|
||||
}
|
||||
|
||||
public GenomeLoc( final int contig, final long pos ) {
|
||||
/*GenomeLoc( final int contig, final long pos ) {
|
||||
this(contig, pos, pos );
|
||||
}
|
||||
|
||||
public GenomeLoc( final GenomeLoc toCopy ) {
|
||||
this( toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Parsing string representations
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
private static long parsePosition( final String pos ) {
|
||||
String x = pos.replaceAll(",", "");
|
||||
return Long.parseLong(x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this static constructor when the input data is under limited control (i.e. parsing user data).
|
||||
* @param contig Contig to parse.
|
||||
* @param start Starting point.
|
||||
* @param stop Stop point.
|
||||
* @return The genome location, or a MalformedGenomeLocException if unparseable.
|
||||
*/
|
||||
public static GenomeLoc parseGenomeLoc( final String contig, long start, long stop ) {
|
||||
if( !isContigValid(contig) )
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
||||
return new GenomeLoc(contig,start,stop);
|
||||
}
|
||||
|
||||
public static GenomeLoc parseGenomeLoc( final String str ) {
|
||||
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
//System.out.printf("Parsing location '%s'%n", str);
|
||||
|
||||
final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1
|
||||
final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2
|
||||
final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$"); // matches case 3
|
||||
final Pattern regex4 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)\\+"); // matches case 4
|
||||
|
||||
String contig = null;
|
||||
long start = 1;
|
||||
long stop = Integer.MAX_VALUE;
|
||||
boolean bad = false;
|
||||
|
||||
Matcher match1 = regex1.matcher(str);
|
||||
Matcher match2 = regex2.matcher(str);
|
||||
Matcher match3 = regex3.matcher(str);
|
||||
Matcher match4 = regex4.matcher(str);
|
||||
|
||||
try {
|
||||
if ( match1.matches() ) {
|
||||
contig = match1.group(1);
|
||||
}
|
||||
else if ( match2.matches() ) {
|
||||
contig = match2.group(1);
|
||||
start = parsePosition(match2.group(2));
|
||||
stop = start;
|
||||
}
|
||||
else if ( match4.matches() ) {
|
||||
contig = match4.group(1);
|
||||
start = parsePosition(match4.group(2));
|
||||
}
|
||||
else if ( match3.matches() ) {
|
||||
contig = match3.group(1);
|
||||
start = parsePosition(match3.group(2));
|
||||
stop = parsePosition(match3.group(3));
|
||||
|
||||
if ( start > stop )
|
||||
bad = true;
|
||||
}
|
||||
else {
|
||||
bad = true;
|
||||
}
|
||||
} catch ( Exception e ) {
|
||||
bad = true;
|
||||
}
|
||||
|
||||
if ( bad ) {
|
||||
throw new StingException("Invalid Genome Location string: " + str);
|
||||
}
|
||||
|
||||
if ( stop == Integer.MAX_VALUE && hasKnownContigOrdering() ) {
|
||||
// lookup the actually stop position!
|
||||
stop = getContigInfo(contig).getSequenceLength();
|
||||
}
|
||||
|
||||
if( !isContigValid(contig) )
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
||||
|
||||
GenomeLoc loc = parseGenomeLoc(contig,start,stop);
|
||||
// System.out.printf(" => Parsed location '%s' into %s%n", str, loc);
|
||||
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Useful utility function that parses a location string into a coordinate-order sorted
|
||||
* array of GenomeLoc objects
|
||||
*
|
||||
* @param str String representation of genome locs. Null string corresponds to no filter.
|
||||
* @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
|
||||
*/
|
||||
public static List<GenomeLoc> parseGenomeLocs(final String str) {
|
||||
// Null string means no filter.
|
||||
if( str == null ) return null;
|
||||
|
||||
// Of the form: loc1;loc2;...
|
||||
// Where each locN can be:
|
||||
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
try {
|
||||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for( String loc: str.split(";") )
|
||||
locs.add( parseGenomeLoc(loc.trim()) );
|
||||
Collections.sort(locs);
|
||||
//logger.info(String.format("Going to process %d locations", locs.length));
|
||||
locs = mergeOverlappingLocations(locs);
|
||||
logger.debug("Locations are:" + Utils.join(", ", locs));
|
||||
return locs;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
Utils.scareUser(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static List<GenomeLoc> mergeOverlappingLocations(final List<GenomeLoc> raw) {
|
||||
logger.debug(" Raw locations are:\n" + Utils.join("\n", raw));
|
||||
if ( raw.size() <= 1 )
|
||||
return raw;
|
||||
else {
|
||||
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
|
||||
Iterator<GenomeLoc> it = raw.iterator();
|
||||
GenomeLoc prev = it.next();
|
||||
while ( it.hasNext() ) {
|
||||
GenomeLoc curr = it.next();
|
||||
if ( prev.contiguousP(curr) ) {
|
||||
prev = prev.merge(curr);
|
||||
} else {
|
||||
merged.add(prev);
|
||||
prev = curr;
|
||||
}
|
||||
}
|
||||
merged.add(prev);
|
||||
return merged;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Move this Genome loc to the next contig, with a start
|
||||
* and stop of 1.
|
||||
* @return true if we are not out of contigs, otherwise false if we're
|
||||
* at the end of the genome (no more contigs to jump to).
|
||||
*/
|
||||
public boolean toNextContig() {
|
||||
if ((contigIndex + 1) < GenomeLoc.contigInfo.size()) {
|
||||
this.contigIndex++;
|
||||
this.start = 1;
|
||||
this.stop = 1;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
*/
|
||||
GenomeLoc( final GenomeLoc toCopy ) {
|
||||
this( toCopy.getContig(), toCopy.contigIndex, toCopy.getStart(), toCopy.getStop() );
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -319,7 +108,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
return returnTrueIfEmpty;
|
||||
|
||||
// skip loci before intervals begin
|
||||
if ( hasKnownContigOrdering() && curr.contigIndex < locs.get(0).contigIndex )
|
||||
if ( curr.contigIndex < locs.get(0).contigIndex )
|
||||
return false;
|
||||
|
||||
for ( GenomeLoc loc : locs ) {
|
||||
|
|
@ -336,23 +125,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
// Accessors and setters
|
||||
//
|
||||
public final String getContig() {
|
||||
//this.contigIndex != -1;
|
||||
if (!(contigInfo != null && contigInfo.getSequences() != null)) {
|
||||
throw new StingException("The contig information or it's sequences are null");
|
||||
}
|
||||
if ((this.contigIndex < 0) || (this.contigIndex >= contigInfo.getSequences().size())) {
|
||||
throw new StingException("The contig index is not bounded by the zero and seqeunce count, contig index: " + contigIndex);
|
||||
}
|
||||
if (contigInfo.getSequence(this.contigIndex) == null ||
|
||||
contigInfo.getSequence(this.contigIndex).getSequenceName() == null) {
|
||||
throw new StingException("The associated sequence index for contig " + contigIndex + " is null");
|
||||
}
|
||||
return contigInfo.getSequence(this.contigIndex).getSequenceName();
|
||||
//if (contigInfo != null && contigInfo.getSequence(this.contigIndex) != null) {
|
||||
// return contigInfo.getSequence(this.contigIndex).getSequenceName();
|
||||
//}
|
||||
|
||||
//return null;
|
||||
return this.contigName;
|
||||
}
|
||||
|
||||
public final int getContigIndex() { return this.contigIndex; }
|
||||
|
|
@ -372,7 +145,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
public final boolean atBeginningOfContigP() { return this.start == 1; }
|
||||
|
||||
public void setContig(String contig) {
|
||||
this.contigIndex = contigInfo.getSequenceIndex(contig);
|
||||
this.contigName = contig;
|
||||
}
|
||||
|
||||
public void setStart(long start) {
|
||||
|
|
@ -411,7 +184,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
throw new StingException("The two genome loc's need to be contigous");
|
||||
}
|
||||
|
||||
return new GenomeLoc(getContig(),
|
||||
return new GenomeLoc(getContig(), this.contigIndex,
|
||||
Math.min(getStart(), that.getStart()),
|
||||
Math.max( getStop(), that.getStop()) );
|
||||
}
|
||||
|
|
@ -497,54 +270,11 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
return new GenomeLoc(this);
|
||||
}
|
||||
|
||||
//
|
||||
// Comparison operations
|
||||
//
|
||||
// TODO: get rid of this method because it's sloooooooooooooow
|
||||
@Deprecated
|
||||
public static int compareContigs( final String thisContig, final String thatContig )
|
||||
{
|
||||
if ( thisContig == thatContig )
|
||||
{
|
||||
// Optimization. If the pointers are equal, then the contigs are equal.
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( hasKnownContigOrdering() )
|
||||
{
|
||||
int thisIndex = getContigIndex(thisContig);
|
||||
int thatIndex = getContigIndex(thatContig);
|
||||
|
||||
if ( thisIndex == -1 )
|
||||
{
|
||||
if ( thatIndex == -1 )
|
||||
{
|
||||
// Use regular sorted order
|
||||
return thisContig.compareTo(thatContig);
|
||||
}
|
||||
else
|
||||
{
|
||||
// this is always bigger if that is in the key set
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if ( thatIndex == -1 )
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( thisIndex < thatIndex ) return -1;
|
||||
if ( thisIndex > thatIndex ) return 1;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return thisContig.compareTo(thatContig);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* conpare this genomeLoc's contig to another genome loc
|
||||
* @param that
|
||||
* @return
|
||||
*/
|
||||
public final int compareContigs( GenomeLoc that ) {
|
||||
if (this.contigIndex == that.contigIndex)
|
||||
return 0;
|
||||
|
|
@ -570,64 +300,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read a file of genome locations to process.
|
||||
* regions specified by the location string. The string is of the form:
|
||||
* Of the form: loc1;loc2;...
|
||||
* Where each locN can be:
|
||||
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
*
|
||||
* @param file_name
|
||||
*/
|
||||
public static List<GenomeLoc> IntervalFileToList(final String file_name) {
|
||||
// first try to read it as an interval file since that's well structured
|
||||
// we'll fail quickly if it's not a valid file. Then try to parse it as
|
||||
// a location string file
|
||||
List<GenomeLoc> ret = null;
|
||||
try {
|
||||
IntervalList il = IntervalList.fromFile(new File(file_name));
|
||||
|
||||
// iterate through the list of merged intervals and add then as GenomeLocs
|
||||
ret = new ArrayList<GenomeLoc>();
|
||||
for(Interval interval : il.getUniqueIntervals()) {
|
||||
ret.add(new GenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd()));
|
||||
}
|
||||
return ret;
|
||||
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
xReadLines reader = new xReadLines(new File(file_name));
|
||||
List<String> lines = reader.readLines();
|
||||
reader.close();
|
||||
String locStr = Utils.join(";", lines);
|
||||
logger.debug("locStr: " + locStr);
|
||||
ret = parseGenomeLocs(locStr);
|
||||
return ret;
|
||||
} catch (Exception e2) {
|
||||
logger.error("Attempt to parse interval file in GATK format failed: "+e2.getMessage());
|
||||
e2.printStackTrace();
|
||||
throw new StingException("Unable to parse out interval file in either format", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
private static boolean isContigValid( String contig ) {
|
||||
int contigIndex = contigInfo.getSequenceIndex(contig);
|
||||
return isSequenceIndexValid(contigIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given sequence index is valid with respect to the sequence dictionary.
|
||||
* @param sequenceIndex sequence index
|
||||
* @return True if the sequence index is valid, false otherwise.
|
||||
*/
|
||||
private static boolean isSequenceIndexValid( int sequenceIndex ) {
|
||||
return sequenceIndex >= 0 && sequenceIndex < contigInfo.size();
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,484 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import edu.mit.broad.picard.directed.IntervalList;
|
||||
import edu.mit.broad.picard.util.Interval;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: aaronmckenna
|
||||
* Date: Jun 18, 2009
|
||||
* Time: 11:17:01 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class GenomeLocParser {
|
||||
private static Logger logger = Logger.getLogger(GenomeLocParser.class);
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Ugly global variable defining the optional ordering of contig elements
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//public static Map<String, Integer> refContigOrdering = null;
|
||||
private static SAMSequenceDictionary contigInfo = null;
|
||||
|
||||
/**
|
||||
* do we have a contig ordering setup?
|
||||
*
|
||||
* @return true if the contig order is setup
|
||||
*/
|
||||
public static boolean hasKnownContigOrdering() {
|
||||
return contigInfo != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the contig's SAMSequenceRecord
|
||||
*
|
||||
* @param contig the string name of the contig
|
||||
*
|
||||
* @return the sam sequence record
|
||||
*/
|
||||
public static SAMSequenceRecord getContigInfo(final String contig) {
|
||||
return contigInfo.getSequence(contig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contig index of a specified string version of the contig
|
||||
*
|
||||
* @param contig the contig string
|
||||
*
|
||||
* @return the contig index, -1 if not found
|
||||
*/
|
||||
public static int getContigIndex(final String contig) {
|
||||
if (contigInfo.getSequenceIndex(contig) == -1)
|
||||
Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
||||
|
||||
return contigInfo.getSequenceIndex(contig);
|
||||
}
|
||||
|
||||
/**
|
||||
* set our internal reference contig order
|
||||
*
|
||||
* @param refFile the reference file
|
||||
*
|
||||
* @return true if we were successful
|
||||
*/
|
||||
public static boolean setupRefContigOrdering(final ReferenceSequenceFile refFile) {
|
||||
return setupRefContigOrdering(refFile.getSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* setup our internal reference contig order
|
||||
*
|
||||
* @param seqDict the sequence dictionary
|
||||
*
|
||||
* @return true if we were successful
|
||||
*/
|
||||
public static boolean setupRefContigOrdering(final SAMSequenceDictionary seqDict) {
|
||||
if (seqDict == null) { // we couldn't load the reference dictionary
|
||||
logger.info("Failed to load reference dictionary, falling back to lexicographic order for contigs");
|
||||
Utils.scareUser("Failed to load reference dictionary");
|
||||
return false;
|
||||
} else if (contigInfo == null) {
|
||||
contigInfo = seqDict;
|
||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||
for (SAMSequenceRecord contig : seqDict.getSequences()) {
|
||||
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||
}
|
||||
}
|
||||
GenomeLoc.MAX_CONTIG = contigInfo.getSequences().size();
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a genome location, from a location string
|
||||
*
|
||||
* @param str the string to parse
|
||||
*
|
||||
* @return a GenomeLoc representing the String
|
||||
*/
|
||||
public static GenomeLoc parseGenomeLoc(final String str) {
|
||||
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
//System.out.printf("Parsing location '%s'%n", str);
|
||||
|
||||
final Pattern regex1 = Pattern.compile("([\\w&&[^:]]+)$"); // matches case 1
|
||||
final Pattern regex2 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)$"); // matches case 2
|
||||
final Pattern regex3 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)-([\\d,]+)$"); // matches case 3
|
||||
final Pattern regex4 = Pattern.compile("([\\w&&[^:]]+):([\\d,]+)\\+"); // matches case 4
|
||||
|
||||
String contig = null;
|
||||
long start = 1;
|
||||
long stop = Integer.MAX_VALUE;
|
||||
boolean bad = false;
|
||||
|
||||
Matcher match1 = regex1.matcher(str);
|
||||
Matcher match2 = regex2.matcher(str);
|
||||
Matcher match3 = regex3.matcher(str);
|
||||
Matcher match4 = regex4.matcher(str);
|
||||
|
||||
try {
|
||||
if (match1.matches()) {
|
||||
contig = match1.group(1);
|
||||
} else if (match2.matches()) {
|
||||
contig = match2.group(1);
|
||||
start = parsePosition(match2.group(2));
|
||||
stop = start;
|
||||
} else if (match4.matches()) {
|
||||
contig = match4.group(1);
|
||||
start = parsePosition(match4.group(2));
|
||||
} else if (match3.matches()) {
|
||||
contig = match3.group(1);
|
||||
start = parsePosition(match3.group(2));
|
||||
stop = parsePosition(match3.group(3));
|
||||
|
||||
if (start > stop)
|
||||
bad = true;
|
||||
} else {
|
||||
bad = true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
bad = true;
|
||||
}
|
||||
|
||||
if (bad) {
|
||||
throw new StingException("Invalid Genome Location string: " + str);
|
||||
}
|
||||
|
||||
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering()) {
|
||||
// lookup the actually stop position!
|
||||
stop = getContigInfo(contig).getSequenceLength();
|
||||
}
|
||||
|
||||
if (!isContigValid(contig))
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
||||
|
||||
GenomeLoc loc = parseGenomeLoc(contig, start, stop);
|
||||
// System.out.printf(" => Parsed location '%s' into %s%n", str, loc);
|
||||
|
||||
return loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Useful utility function that parses a location string into a coordinate-order sorted
|
||||
* array of GenomeLoc objects
|
||||
*
|
||||
* @param str String representation of genome locs. Null string corresponds to no filter.
|
||||
*
|
||||
* @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
|
||||
*/
|
||||
public static List<GenomeLoc> parseGenomeLocs(final String str) {
|
||||
// Null string means no filter.
|
||||
if (str == null) return null;
|
||||
|
||||
// Of the form: loc1;loc2;...
|
||||
// Where each locN can be:
|
||||
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
try {
|
||||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String loc : str.split(";"))
|
||||
locs.add(parseGenomeLoc(loc.trim()));
|
||||
Collections.sort(locs);
|
||||
//logger.info(String.format("Going to process %d locations", locs.length));
|
||||
locs = mergeOverlappingLocations(locs);
|
||||
logger.debug("Locations are:" + Utils.join(", ", locs));
|
||||
return locs;
|
||||
} catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out
|
||||
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str),e);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Parsing string representations
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
private static long parsePosition(final String pos) {
|
||||
String x = pos.replaceAll(",", "");
|
||||
return Long.parseLong(x);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* merge a list of genome locs that may be overlapping, returning the list of unique genomic locations
|
||||
*
|
||||
* @param raw the unchecked genome loc list
|
||||
*
|
||||
* @return the list of merged locations
|
||||
*/
|
||||
public static List<GenomeLoc> mergeOverlappingLocations(final List<GenomeLoc> raw) {
|
||||
logger.debug(" Raw locations are:\n" + Utils.join("\n", raw));
|
||||
if (raw.size() <= 1)
|
||||
return raw;
|
||||
else {
|
||||
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
|
||||
Iterator<GenomeLoc> it = raw.iterator();
|
||||
GenomeLoc prev = it.next();
|
||||
while (it.hasNext()) {
|
||||
GenomeLoc curr = it.next();
|
||||
if (prev.contiguousP(curr)) {
|
||||
prev = prev.merge(curr);
|
||||
} else {
|
||||
merged.add(prev);
|
||||
prev = curr;
|
||||
}
|
||||
}
|
||||
merged.add(prev);
|
||||
return merged;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
*
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
private static boolean isContigValid(String contig) {
|
||||
int contigIndex = contigInfo.getSequenceIndex(contig);
|
||||
return isSequenceIndexValid(contigIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given sequence index is valid with respect to the sequence dictionary.
|
||||
*
|
||||
* @param sequenceIndex sequence index
|
||||
*
|
||||
* @return True if the sequence index is valid, false otherwise.
|
||||
*/
|
||||
private static boolean isSequenceIndexValid(int sequenceIndex) {
|
||||
return sequenceIndex >= 0 && sequenceIndex < contigInfo.size();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this static constructor when the input data is under limited control (i.e. parsing user data).
|
||||
*
|
||||
* @param contig Contig to parse.
|
||||
* @param start Starting point.
|
||||
* @param stop Stop point.
|
||||
*
|
||||
* @return The genome location, or a MalformedGenomeLocException if unparseable.
|
||||
*/
|
||||
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
|
||||
if (!isContigValid(contig))
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
||||
return new GenomeLoc(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read a file of genome locations to process.
|
||||
* regions specified by the location string. The string is of the form:
|
||||
* Of the form: loc1;loc2;...
|
||||
* Where each locN can be:
|
||||
* 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
|
||||
*
|
||||
* @param file_name
|
||||
*/
|
||||
public static List<GenomeLoc> intervalFileToList(final String file_name) {
|
||||
/**
|
||||
* first try to read it as an interval file since that's well structured
|
||||
* we'll fail quickly if it's not a valid file. Then try to parse it as
|
||||
* a location string file
|
||||
*/
|
||||
List<GenomeLoc> ret = null;
|
||||
try {
|
||||
IntervalList il = IntervalList.fromFile(new File(file_name));
|
||||
|
||||
// iterate through the list of merged intervals and add then as GenomeLocs
|
||||
ret = new ArrayList<GenomeLoc>();
|
||||
for (Interval interval : il.getUniqueIntervals()) {
|
||||
ret.add(new GenomeLoc(interval.getSequence(), getContigIndex(interval.getSequence()), interval.getStart(), interval.getEnd()));
|
||||
}
|
||||
return ret;
|
||||
|
||||
} catch (Exception e) {
|
||||
try {
|
||||
xReadLines reader = new xReadLines(new File(file_name));
|
||||
List<String> lines = reader.readLines();
|
||||
reader.close();
|
||||
String locStr = Utils.join(";", lines);
|
||||
logger.debug("locStr: " + locStr);
|
||||
ret = parseGenomeLocs(locStr);
|
||||
return ret;
|
||||
} catch (Exception e2) {
|
||||
logger.error("Attempt to parse interval file in GATK format failed: " + e2.getMessage());
|
||||
e2.printStackTrace();
|
||||
throw new StingException("Unable to parse out interval file in either format", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* get the sequence name from a sequence index
|
||||
*
|
||||
* @param contigIndex get the contig index
|
||||
*
|
||||
* @return the string that represents that contig name
|
||||
*/
|
||||
private static String getSequenceNameFromIndex(int contigIndex) {
|
||||
return GenomeLocParser.contigInfo.getSequence(contigIndex).getSequenceName();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome loc, given the contig name, start, and stop
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the starting position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return a new genome loc
|
||||
*/
|
||||
public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) {
|
||||
checkSetup();
|
||||
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), start, stop));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome loc, given the contig index, start, and stop
|
||||
*
|
||||
* @param contigIndex the contig index
|
||||
* @param start the start position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return a new genome loc
|
||||
*/
|
||||
public static GenomeLoc createGenomeLoc(int contigIndex, final long start, final long stop) {
|
||||
checkSetup();
|
||||
if (start < 0) {
|
||||
throw new StingException("Bad start position " + start);
|
||||
}
|
||||
if (stop < -1) {
|
||||
throw new StingException("Bad stop position " + stop);
|
||||
} // a negative -1 indicates it's not a meaningful end position
|
||||
|
||||
|
||||
return new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome loc, given a read
|
||||
*
|
||||
* @param read
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static GenomeLoc createGenomeLoc(final SAMRecord read) {
|
||||
checkSetup();
|
||||
return verifyGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* create a new genome loc, given the contig position, and a single position
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the postion
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
public static GenomeLoc createGenomeLoc(final int contig, final long pos) {
|
||||
checkSetup();
|
||||
return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos));
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc, given the contig name, and a single position
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the postion
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
public static GenomeLoc createGenomeLoc(final String contig, final long pos) {
|
||||
checkSetup();
|
||||
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), pos, pos));
|
||||
}
|
||||
|
||||
public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) {
|
||||
checkSetup();
|
||||
return verifyGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop()));
|
||||
}
|
||||
|
||||
/**
|
||||
* verify the specified genome loc is valid, if it's not, throw an exception
|
||||
*
|
||||
* @param toReturn teh genome loc we're about to return
|
||||
*
|
||||
* @return the genome loc if it's valid, otherwise we throw an exception
|
||||
*/
|
||||
private static GenomeLoc verifyGenomeLoc(GenomeLoc toReturn) {
|
||||
// conditions to fail on - we currently use a start of zero to indicate infinite read count, so don't check for that
|
||||
//if ((toReturn.getStop() < toReturn.getStart())) {
|
||||
// throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is after the stop (Start = " + toReturn.getStart() + " stop = " + toReturn.getStop() + ")");
|
||||
//}
|
||||
if (toReturn.getStart() < 0) {
|
||||
throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
|
||||
}
|
||||
if (toReturn.getStop() < 0) {
|
||||
throw new StingException("Parameters to GenomeLocParser are incorrect: the stop position is less than 0");
|
||||
}
|
||||
if (toReturn.getContigIndex() < 0) {
|
||||
throw new StingException("Parameters to GenomeLocParser are incorrect: the contig index is less than 0");
|
||||
}
|
||||
if (toReturn.getContigIndex() >= contigInfo.getSequences().size()) {
|
||||
throw new StingException("Parameters to GenomeLocParser are incorrect: the contig index is greater then the stored sequence count");
|
||||
|
||||
}
|
||||
return toReturn;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Move this Genome loc to the next contig, with a start
|
||||
* and stop of 1.
|
||||
*
|
||||
* @return true if we are not out of contigs, otherwise false if we're
|
||||
* at the end of the genome (no more contigs to jump to).
|
||||
*/
|
||||
public static GenomeLoc toNextContig(GenomeLoc current) {
|
||||
if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) {
|
||||
return null;
|
||||
} else
|
||||
return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* check to make sure that we've setup the contig information
|
||||
*/
|
||||
private static void checkSetup() {
|
||||
if (contigInfo == null) {
|
||||
throw new StingException("The GenomeLocParser hasn't been setup with a contig sequence yet");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* compare two contig names, in the current context
|
||||
*
|
||||
* @param firstContig
|
||||
* @param secondContig
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public static int compareContigs(String firstContig, String secondContig) {
|
||||
checkSetup();
|
||||
Integer ref1 = GenomeLocParser.getContigIndex(firstContig);
|
||||
Integer ref2 = GenomeLocParser.getContigIndex(secondContig);
|
||||
return ref1.compareTo(ref2);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -168,8 +168,8 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
* |------| + |--------|
|
||||
*
|
||||
*/
|
||||
GenomeLoc before = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
|
||||
GenomeLoc after = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
|
||||
GenomeLoc before = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
|
||||
GenomeLoc after = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
|
||||
int index = mArray.indexOf(g);
|
||||
if (after.getStop() - after.getStart() > 0) {
|
||||
mArray.add(index, after);
|
||||
|
|
@ -207,9 +207,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
*/
|
||||
|
||||
if (e.getStart() < g.getStart()) {
|
||||
l = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
|
||||
l = GenomeLocParser.createGenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
|
||||
} else {
|
||||
l = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
|
||||
l = GenomeLocParser.createGenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
|
||||
}
|
||||
// replace g with the new region
|
||||
mArray.set(mArray.indexOf(g), l);
|
||||
|
|
@ -230,7 +230,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
|
||||
GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
|
||||
for (SAMSequenceRecord record : dict.getSequences()) {
|
||||
returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
|
||||
returnSortedSet.add(GenomeLocParser.createGenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
|
||||
}
|
||||
return returnSortedSet;
|
||||
}
|
||||
|
|
@ -258,7 +258,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
GenomeLocSortedSet ret = new GenomeLocSortedSet();
|
||||
for (GenomeLoc loc : this.mArray) {
|
||||
// ensure a deep copy
|
||||
ret.mArray.add(new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
|
||||
ret.mArray.add(GenomeLocParser.createGenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,8 +7,6 @@ import net.sf.samtools.util.CloseableIterator;
|
|||
import java.io.InputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -16,8 +14,7 @@ import java.util.ArrayList;
|
|||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 11, 2009
|
||||
|
|
@ -54,11 +51,11 @@ public class ArtificialSAMFileReader extends SAMFileReader {
|
|||
*/
|
||||
@Override
|
||||
public CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
|
||||
GenomeLoc region = new GenomeLoc(sequence, start, end);
|
||||
GenomeLoc region = GenomeLocParser.createGenomeLoc(sequence, start, end);
|
||||
List<SAMRecord> coveredSubset = new ArrayList<SAMRecord>();
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
GenomeLoc readPosition = new GenomeLoc(read);
|
||||
GenomeLoc readPosition = GenomeLocParser.createGenomeLoc(read);
|
||||
if( contained && region.containsP(readPosition) ) coveredSubset.add(read);
|
||||
else if( !contained && readPosition.overlapsP(region) ) coveredSubset.add(read);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
|
||||
import org.junit.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
@ -40,13 +41,13 @@ public class AllLocusViewTest extends LocusViewTemplate {
|
|||
AllLocusView allLocusView = (AllLocusView)view;
|
||||
|
||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||
GenomeLoc site = new GenomeLoc("chr1",i);
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
|
||||
LocusContext locusContext = allLocusView.next();
|
||||
Assert.assertEquals("Locus context location is incorrect", site, locusContext.getLocation() );
|
||||
int expectedReadsAtSite = 0;
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
if(new GenomeLoc(read).containsP(locusContext.getLocation())) {
|
||||
if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation())) {
|
||||
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
||||
expectedReadsAtSite++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
|
||||
import org.junit.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
|
|
@ -43,11 +44,11 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
|
|||
CoveredLocusView coveredLocusView = (CoveredLocusView)view;
|
||||
|
||||
for( long i = bounds.getStart(); i <= bounds.getStop(); i++ ) {
|
||||
GenomeLoc site = new GenomeLoc("chr1",i);
|
||||
GenomeLoc site = GenomeLocParser.createGenomeLoc("chr1",i);
|
||||
|
||||
int expectedReadsAtSite = 0;
|
||||
for( SAMRecord read: reads ) {
|
||||
if( new GenomeLoc(read).containsP(site) )
|
||||
if( GenomeLocParser.createGenomeLoc(read).containsP(site) )
|
||||
expectedReadsAtSite++;
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +62,7 @@ public class CoveredLocusViewTest extends LocusViewTemplate {
|
|||
Assert.assertEquals("Found wrong number of reads at site", expectedReadsAtSite, locusContext.getReads().size());
|
||||
|
||||
for( SAMRecord read: reads ) {
|
||||
if(new GenomeLoc(read).containsP(locusContext.getLocation()))
|
||||
if(GenomeLocParser.createGenomeLoc(read).containsP(locusContext.getLocation()))
|
||||
Assert.assertTrue("Target locus context does not contain reads", locusContext.getReads().contains(read) );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.datasources.providers;
|
|||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
|
|
@ -32,7 +33,7 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
*/
|
||||
@Test(expected=InvalidPositionException.class)
|
||||
public void testSingleBPFailure() {
|
||||
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
|
||||
Shard shard = new LocusShard( GenomeLocParser.createGenomeLoc(0,1,50) );
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
|
@ -45,12 +46,12 @@ public class LocusReferenceViewTest extends ReferenceViewTemplate {
|
|||
*/
|
||||
@Test(expected=InvalidPositionException.class)
|
||||
public void testBoundsFailure() {
|
||||
Shard shard = new LocusShard( new GenomeLoc(0,1,50) );
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc(0,1,50) );
|
||||
|
||||
ShardDataProvider dataProvider = new ShardDataProvider(shard,null,sequenceFile,null);
|
||||
LocusReferenceView view = new LocusReferenceView(dataProvider);
|
||||
|
||||
view.getReferenceBase(new GenomeLoc(0,51));
|
||||
view.getReferenceBase(GenomeLocParser.createGenomeLoc(0,51));
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +1,21 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.LocusShard;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 13, 2009
|
||||
|
|
@ -49,14 +39,14 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
@BeforeClass
|
||||
public static void setupGenomeLoc() throws FileNotFoundException {
|
||||
sequenceSourceFile = fakeReferenceSequenceFile();
|
||||
GenomeLoc.setupRefContigOrdering(sequenceSourceFile);
|
||||
GenomeLocParser.setupRefContigOrdering(sequenceSourceFile);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emptyLocusContextTest() {
|
||||
SAMRecordIterator iterator = new SAMRecordIterator();
|
||||
|
||||
GenomeLoc shardBounds = new GenomeLoc("chr1",1,5);
|
||||
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5);
|
||||
Shard shard = new LocusShard(shardBounds);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
|
||||
|
|
@ -70,7 +60,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read = buildSAMRecord("chr1",1,5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
GenomeLoc shardBounds = new GenomeLoc("chr1",1,5);
|
||||
GenomeLoc shardBounds = GenomeLocParser.createGenomeLoc("chr1",1,5);
|
||||
Shard shard = new LocusShard(shardBounds);
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
|
||||
|
|
@ -84,7 +74,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read = buildSAMRecord("chr1",1,5);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -96,7 +86,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -108,7 +98,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read = buildSAMRecord("chr1",3,7);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -120,7 +110,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read = buildSAMRecord("chr1",1,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -132,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read = buildSAMRecord("chr1",6,15);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -145,7 +135,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read2 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -162,7 +152,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read4 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -179,7 +169,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read4 = buildSAMRecord("chr1",5,9);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -198,7 +188,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecord read6 = buildSAMRecord("chr1",6,10);
|
||||
SAMRecordIterator iterator = new SAMRecordIterator(read1,read2,read3,read4,read5,read6);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",1,10));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",1,10));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
@ -224,7 +214,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
SAMRecordIterator iterator = new SAMRecordIterator(read01,read02,read03,read04,read05,read06,
|
||||
read07,read08,read09,read10,read11,read12);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chr1",6,15));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chr1",6,15));
|
||||
ShardDataProvider dataProvider = new ShardDataProvider( shard, iterator );
|
||||
LocusView view = createView( dataProvider );
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import org.junit.Test;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Assert;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -44,7 +44,7 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
public static void init() throws FileNotFoundException {
|
||||
// sequence
|
||||
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLoc.setupRefContigOrdering(seq);
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -52,11 +52,11 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
*/
|
||||
@Test
|
||||
public void testNoBindings() {
|
||||
Shard shard = new LocusShard(new GenomeLoc("chrM",1,30));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30));
|
||||
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||
ReferenceOrderedView view = new ReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",10));
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",10));
|
||||
Assert.assertNull("The tracker should not have produced any data", tracker.lookup("tableTest",null));
|
||||
}
|
||||
|
||||
|
|
@ -69,12 +69,12 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
ReferenceOrderedData rod = new ReferenceOrderedData("tableTest", file, TabularROD.class);
|
||||
ReferenceOrderedDataSource dataSource = new ReferenceOrderedDataSource(rod);
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chrM",1,30));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30));
|
||||
|
||||
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Collections.singletonList(dataSource));
|
||||
ReferenceOrderedView view = new ReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",20));
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
|
||||
TabularROD datum = (TabularROD)tracker.lookup("tableTest",null);
|
||||
|
||||
Assert.assertEquals("datum parameter for COL1 is incorrect", "C", datum.get("COL1"));
|
||||
|
|
@ -95,12 +95,12 @@ public class ReferenceOrderedViewTest extends BaseTest {
|
|||
ReferenceOrderedDataSource dataSource2 = new ReferenceOrderedDataSource(rod2);
|
||||
|
||||
|
||||
Shard shard = new LocusShard(new GenomeLoc("chrM",1,30));
|
||||
Shard shard = new LocusShard(GenomeLocParser.createGenomeLoc("chrM",1,30));
|
||||
|
||||
ShardDataProvider provider = new ShardDataProvider(shard, null, seq, Arrays.asList(dataSource1,dataSource2));
|
||||
ReferenceOrderedView view = new ReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(new GenomeLoc("chrM",20));
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(GenomeLocParser.createGenomeLoc("chrM",20));
|
||||
TabularROD datum1 = (TabularROD)tracker.lookup("tableTest1",null);
|
||||
|
||||
Assert.assertEquals("datum1 parameter for COL1 is incorrect", "C", datum1.get("COL1"));
|
||||
|
|
|
|||
|
|
@ -1,15 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import java.io.FileNotFoundException;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: May 27, 2009
|
||||
|
|
@ -43,7 +43,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
|||
@BeforeClass
|
||||
public static void initialize() throws FileNotFoundException {
|
||||
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
|
||||
GenomeLoc.setupRefContigOrdering(sequenceFile);
|
||||
GenomeLocParser.setupRefContigOrdering(sequenceFile);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -51,7 +51,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
|||
*/
|
||||
@Test
|
||||
public void testReferenceStart() {
|
||||
validateLocation( new GenomeLoc(0,1,25) );
|
||||
validateLocation( GenomeLocParser.createGenomeLoc(0,1,25) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -63,7 +63,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
|||
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1);
|
||||
final long contigStart = selectedContig.getSequenceLength() - 24;
|
||||
final long contigStop = selectedContig.getSequenceLength();
|
||||
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
|
||||
validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -74,7 +74,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
|||
// Test the last 25 bases of the first contig.
|
||||
int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2;
|
||||
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
|
||||
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),1,25) );
|
||||
validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),1,25) );
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ public abstract class ReferenceViewTemplate extends BaseTest {
|
|||
SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition);
|
||||
final long contigStart = selectedContig.getSequenceLength() - 24;
|
||||
final long contigStop = selectedContig.getSequenceLength();
|
||||
validateLocation( new GenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
|
||||
validateLocation( GenomeLocParser.createGenomeLoc(selectedContig.getSequenceIndex(),contigStart,contigStop) );
|
||||
}
|
||||
|
||||
protected abstract void validateLocation( GenomeLoc loc );
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ import static org.junit.Assert.assertTrue;
|
|||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
|
|
@ -60,7 +61,7 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
mSortedSet = new GenomeLocSortedSet();
|
||||
}
|
||||
|
||||
|
|
@ -71,7 +72,7 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testSingleChromosomeFunctionality() {
|
||||
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||
int counter = 0;
|
||||
|
|
@ -87,7 +88,7 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
@Test
|
||||
public void testMultipleChromosomeFunctionality() {
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
}
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||
|
|
@ -104,7 +105,7 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
@Test
|
||||
public void testOddSizeShardFunctionality() {
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
}
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(789, mSortedSet);
|
||||
|
|
@ -122,7 +123,7 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
@Test
|
||||
public void testInfiniteShardSize() {
|
||||
for (int x = 0; x < 5; x++) {
|
||||
GenomeLoc loc = new GenomeLoc(x, 1, 1000);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(x, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
}
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(Long.MAX_VALUE, mSortedSet);
|
||||
|
|
@ -137,7 +138,7 @@ public class IntervalShardStrategyTest extends BaseTest {
|
|||
|
||||
@Test(expected = UnsupportedOperationException.class)
|
||||
public void testRemove() {
|
||||
GenomeLoc loc = new GenomeLoc(1, 1, 1000);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 1000);
|
||||
mSortedSet.add(loc);
|
||||
IntervalShardStrategy strat = new IntervalShardStrategy(100, mSortedSet);
|
||||
strat.remove();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.datasources.shards;
|
|||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
|
@ -51,20 +52,20 @@ public class IntervalShardTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void simpleReturn() {
|
||||
GenomeLoc loc = new GenomeLoc(1, 1, 100);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
intervalShard = new IntervalShard(loc);
|
||||
assertTrue(intervalShard.getGenomeLoc().equals(loc));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void ensureNotReference() {
|
||||
GenomeLoc loc = new GenomeLoc(1, 1, 100);
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
intervalShard = new IntervalShard(loc);
|
||||
assertTrue(intervalShard.getGenomeLoc() != loc && intervalShard.getGenomeLoc().equals(loc));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.shards;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Before;
|
||||
|
|
@ -52,7 +52,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.*;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
|
@ -42,7 +43,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
set = new GenomeLocSortedSet();
|
||||
}
|
||||
|
||||
|
|
@ -54,7 +55,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testReadInterval() {
|
||||
GenomeLoc l = new GenomeLoc(0,1,100);
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
set.add(l);
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
|
||||
assertTrue(st instanceof IntervalShardStrategy);
|
||||
|
|
@ -74,7 +75,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testExpInterval() {
|
||||
GenomeLoc l = new GenomeLoc(0,1,100);
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
set.add(l);
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set);
|
||||
assertTrue(st instanceof ExpGrowthLocusShardStrategy);
|
||||
|
|
@ -82,7 +83,7 @@ public class ShardStrategyFactoryTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testLinearInterval() {
|
||||
GenomeLoc l = new GenomeLoc(0,1,100);
|
||||
GenomeLoc l = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
set.add(l);
|
||||
ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100,set);
|
||||
assertTrue(st instanceof LinearLocusShardStrategy);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import static org.junit.Assert.assertTrue;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.TabularROD;
|
||||
|
|
@ -38,13 +39,13 @@ public class ReferenceOrderedDataPoolTest extends BaseTest {
|
|||
|
||||
private ReferenceOrderedData<? extends ReferenceOrderedDatum> rod = null;
|
||||
|
||||
private final GenomeLoc testSite1 = new GenomeLoc("chrM",10);
|
||||
private final GenomeLoc testSite2 = new GenomeLoc("chrM",20);
|
||||
private final GenomeLoc testSite3 = new GenomeLoc("chrM",30);
|
||||
private final GenomeLoc testSite1 = GenomeLocParser.createGenomeLoc("chrM",10);
|
||||
private final GenomeLoc testSite2 = GenomeLocParser.createGenomeLoc("chrM",20);
|
||||
private final GenomeLoc testSite3 = GenomeLocParser.createGenomeLoc("chrM",30);
|
||||
|
||||
@BeforeClass
|
||||
public static void init() throws FileNotFoundException {
|
||||
GenomeLoc.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile));
|
||||
GenomeLocParser.setupRefContigOrdering(new IndexedFastaSequenceFile(sequenceFile));
|
||||
TabularROD.setDelimiter(TabularROD.DEFAULT_DELIMITER, TabularROD.DEFAULT_DELIMITER_REGEX);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
|||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
|
@ -60,7 +60,7 @@ public class SAMBAMDataSourceTest extends BaseTest {
|
|||
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.iterators.BoundedReadIterator;
|
|||
import org.broadinstitute.sting.gatk.iterators.*;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMQueryIterator;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
|
@ -76,7 +77,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
@Test
|
||||
public void testToUnmappedReads() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
|
||||
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
try {
|
||||
int unmappedReadsSeen = 0;
|
||||
int iterations = 0;
|
||||
|
|
@ -109,7 +110,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
@Test
|
||||
public void testShardingOfReadsSize14() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
|
||||
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
targetReadCount = 14;
|
||||
try {
|
||||
int iterations = 0;
|
||||
|
|
@ -159,7 +160,7 @@ public class SAMByReadsTest extends BaseTest {
|
|||
@Test
|
||||
public void testShardingOfReadsSize25() {
|
||||
ArtificialResourcePool gen = new ArtificialResourcePool(1,10,100,1000);
|
||||
GenomeLoc.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(gen.getHeader().getSequenceDictionary());
|
||||
targetReadCount = 25;
|
||||
try {
|
||||
int iterations = 0;
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategyFactory;
|
|||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SimpleDataSourceLoadException;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
|
@ -73,7 +73,7 @@ public class BoundedReadIteratorTest extends BaseTest {
|
|||
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(seq.getSequenceDictionary());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,15 +8,13 @@ import org.junit.*;
|
|||
import static org.junit.Assert.assertTrue;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.RefHanger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
|
|
@ -33,7 +31,7 @@ public class TabularRODTest extends BaseTest {
|
|||
public static void init() {
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLoc.setupRefContigOrdering(seq);
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
@Before
|
||||
|
|
@ -50,7 +48,7 @@ public class TabularRODTest extends BaseTest {
|
|||
logger.warn("Executing test1");
|
||||
TabularROD one = (TabularROD)iter.next();
|
||||
assertTrue(one.size() == 4);
|
||||
assertTrue(one.getLocation().equals(new GenomeLoc("chrM", 10)));
|
||||
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
|
||||
assertTrue(one.get("COL1").equals("A"));
|
||||
assertTrue(one.get("COL2").equals("B"));
|
||||
assertTrue(one.get("COL3").equals("C"));
|
||||
|
|
@ -62,7 +60,7 @@ public class TabularRODTest extends BaseTest {
|
|||
TabularROD one = (TabularROD)iter.next();
|
||||
TabularROD two = (TabularROD)iter.next();
|
||||
assertTrue(two.size() == 4);
|
||||
assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 20)));
|
||||
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
|
||||
assertTrue(two.get("COL1").equals("C"));
|
||||
assertTrue(two.get("COL2").equals("D"));
|
||||
assertTrue(two.get("COL3").equals("E"));
|
||||
|
|
@ -75,7 +73,7 @@ public class TabularRODTest extends BaseTest {
|
|||
TabularROD two = (TabularROD)iter.next();
|
||||
TabularROD three = (TabularROD)iter.next();
|
||||
assertTrue(three.size() == 4);
|
||||
assertTrue(three.getLocation().equals(new GenomeLoc("chrM", 30)));
|
||||
assertTrue(three.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 30)));
|
||||
assertTrue(three.get("COL1").equals("F"));
|
||||
assertTrue(three.get("COL2").equals("G"));
|
||||
assertTrue(three.get("COL3").equals("H"));
|
||||
|
|
@ -93,9 +91,9 @@ public class TabularRODTest extends BaseTest {
|
|||
@Test
|
||||
public void testSeek() {
|
||||
logger.warn("Executing testSeek");
|
||||
TabularROD two = (TabularROD)iter.seekForward(new GenomeLoc("chrM", 20));
|
||||
TabularROD two = (TabularROD)iter.seekForward(GenomeLocParser.createGenomeLoc("chrM", 20));
|
||||
assertTrue(two.size() == 4);
|
||||
assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 20)));
|
||||
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 20)));
|
||||
assertTrue(two.get("COL1").equals("C"));
|
||||
assertTrue(two.get("COL2").equals("D"));
|
||||
assertTrue(two.get("COL3").equals("E"));
|
||||
|
|
@ -118,7 +116,7 @@ public class TabularRODTest extends BaseTest {
|
|||
logger.warn("Executing testDelim1");
|
||||
TabularROD one2 = (TabularROD)iter_commas.next();
|
||||
assertTrue(one2.size() == 5);
|
||||
assertTrue(one2.getLocation().equals(new GenomeLoc("chrM", 10)));
|
||||
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
|
||||
assertTrue(one2.get("COL1").equals("A"));
|
||||
assertTrue(one2.get("COL2").equals("B"));
|
||||
assertTrue(one2.get("COL3").equals("C"));
|
||||
|
|
@ -135,7 +133,7 @@ public class TabularRODTest extends BaseTest {
|
|||
logger.warn("Executing testDelim1");
|
||||
TabularROD one2 = (TabularROD)iter_commas.next();
|
||||
assertTrue(one2.size() == 5);
|
||||
assertTrue(one2.getLocation().equals(new GenomeLoc("chrM", 10)));
|
||||
assertTrue(one2.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 10)));
|
||||
assertTrue(one2.get("COL1").equals("A"));
|
||||
assertTrue(one2.get("COL2").equals("B"));
|
||||
assertTrue(one2.get("COL3").equals("C"));
|
||||
|
|
@ -148,7 +146,7 @@ public class TabularRODTest extends BaseTest {
|
|||
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
|
||||
assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
|
||||
String rowData = String.format("%d %d %d", 1, 2, 3);
|
||||
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
|
||||
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
|
||||
System.out.println(">>>>> " + row.toString());
|
||||
assertTrue(row.toString().equals("chrM:1\t1\t2\t3"));
|
||||
}
|
||||
|
|
@ -166,11 +164,11 @@ public class TabularRODTest extends BaseTest {
|
|||
out.println(TabularROD.headerString(header));
|
||||
|
||||
String rowData = String.format("%d %d %d", 1, 2, 3);
|
||||
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
|
||||
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
|
||||
out.println(row.toString());
|
||||
|
||||
rowData = String.format("%d %d %d", 3, 4, 5);
|
||||
row = new TabularROD("myName", header, new GenomeLoc("chrM", 2), rowData.split(" "));
|
||||
row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 2), rowData.split(" "));
|
||||
out.println(row.toString());
|
||||
|
||||
ReferenceOrderedData ROD_commas = new ReferenceOrderedData("tableTest", outputFile, TabularROD.class);
|
||||
|
|
@ -178,14 +176,14 @@ public class TabularRODTest extends BaseTest {
|
|||
|
||||
TabularROD one = (TabularROD)iter_commas.next();
|
||||
assertTrue(one.size() == 4);
|
||||
assertTrue(one.getLocation().equals(new GenomeLoc("chrM", 1)));
|
||||
assertTrue(one.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 1)));
|
||||
assertTrue(one.get("col1").equals("1"));
|
||||
assertTrue(one.get("col2").equals("2"));
|
||||
assertTrue(one.get("col3").equals("3"));
|
||||
|
||||
TabularROD two = (TabularROD)iter_commas.next();
|
||||
assertTrue(two.size() == 4);
|
||||
assertTrue(two.getLocation().equals(new GenomeLoc("chrM", 2)));
|
||||
assertTrue(two.getLocation().equals(GenomeLocParser.createGenomeLoc("chrM", 2)));
|
||||
assertTrue(two.get("col1").equals("3"));
|
||||
assertTrue(two.get("col2").equals("4"));
|
||||
assertTrue(two.get("col3").equals("5"));
|
||||
|
|
@ -195,14 +193,14 @@ public class TabularRODTest extends BaseTest {
|
|||
public void testBadHeader1() {
|
||||
logger.warn("Executing testBadHeader1");
|
||||
ArrayList<String> header = new ArrayList<String>();
|
||||
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1));
|
||||
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1));
|
||||
}
|
||||
|
||||
@Test (expected=RuntimeException.class )
|
||||
public void testBadHeader2() {
|
||||
logger.warn("Executing testBadHeader2");
|
||||
ArrayList<String> header = new ArrayList<String>(Arrays.asList("col1", "col2", "col3"));
|
||||
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1));
|
||||
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1));
|
||||
}
|
||||
|
||||
@Test (expected=RuntimeException.class )
|
||||
|
|
@ -211,6 +209,6 @@ public class TabularRODTest extends BaseTest {
|
|||
ArrayList<String> header = new ArrayList<String>(Arrays.asList("HEADER", "col1", "col2", "col3"));
|
||||
assertTrue(TabularROD.headerString(header).equals("HEADER\tcol1\tcol2\tcol3"));
|
||||
String rowData = String.format("%d %d %d %d", 1, 2, 3, 4);
|
||||
TabularROD row = new TabularROD("myName", header, new GenomeLoc("chrM", 1), rowData.split(" "));
|
||||
TabularROD row = new TabularROD("myName", header, GenomeLocParser.createGenomeLoc("chrM", 1), rowData.split(" "));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
|
||||
|
|
@ -10,8 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.CountReadsWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.Reads;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import static org.junit.Assert.fail;
|
||||
|
|
@ -116,7 +116,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||
}
|
||||
GenomeLoc.setupRefContigOrdering(ref);
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
ref.getSequenceDictionary(),
|
||||
|
|
@ -148,7 +148,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
fail("Count read walker should return an interger.");
|
||||
}
|
||||
if (((Integer) accumulator) != 9721) {
|
||||
fail("there should be 9721 mapped reads in the index file");
|
||||
fail("there should be 9721 mapped reads in the index file, there was " + ((Integer) accumulator) );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -156,7 +156,6 @@ public class TraverseReadsTest extends BaseTest {
|
|||
/** Test out that we can shard the file and iterate over every read */
|
||||
@Test
|
||||
public void testUnmappedReadCount() {
|
||||
|
||||
IndexedFastaSequenceFile ref = null;
|
||||
try {
|
||||
ref = new IndexedFastaSequenceFile(refFile);
|
||||
|
|
@ -164,7 +163,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
catch (FileNotFoundException ex) {
|
||||
throw new RuntimeException("File not found opening fasta file; please do this check before MicroManaging", ex);
|
||||
}
|
||||
GenomeLoc.setupRefContigOrdering(ref);
|
||||
GenomeLocParser.setupRefContigOrdering(ref);
|
||||
|
||||
ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,
|
||||
ref.getSequenceDictionary(),
|
||||
|
|
@ -195,7 +194,7 @@ public class TraverseReadsTest extends BaseTest {
|
|||
fail("Count read walker should return an interger.");
|
||||
}
|
||||
if (((Integer) accumulator) != 10000) {
|
||||
fail("there should be 10000 mapped reads in the index file");
|
||||
fail("there should be 10000 mapped reads in the index file, there was " + ((Integer) accumulator));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ package org.broadinstitute.sting.playground.gatk.walkers.indels;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.OutputTracker;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileReader;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMFileWriter;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
|
@ -47,7 +47,7 @@ public class CleanedReadInjectorTest extends BaseTest {
|
|||
@BeforeClass
|
||||
public static void initialize() throws FileNotFoundException {
|
||||
sequenceFile = new IndexedFastaSequenceFile( new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta") );
|
||||
GenomeLoc.setupRefContigOrdering(sequenceFile);
|
||||
GenomeLocParser.setupRefContigOrdering(sequenceFile);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import static junit.framework.Assert.assertTrue;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import org.junit.Test;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenomeLocParserTest
|
||||
* <p/>
|
||||
* Test out the functionality of the new genome loc parser
|
||||
*/
|
||||
public class GenomeLocParserTest extends BaseTest {
|
||||
|
||||
@Test(expected = StingException.class)
|
||||
public void testUnsetupException() {
|
||||
GenomeLocParser.createGenomeLoc(0, 0, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testKnownContigOrder() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
// assert that it's false when the contig ordering is not setup
|
||||
assertTrue(!GenomeLocParser.hasKnownContigOrdering());
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
// assert that it's true when it is setup
|
||||
assertTrue(GenomeLocParser.hasKnownContigOrdering());
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
public void testGetContigIndex() {
|
||||
assertEquals(-1, GenomeLocParser.getContigIndex("blah")); // should be in the reference
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetContigIndexValid() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
assertEquals(0, GenomeLocParser.getContigIndex("chr1")); // should be in the reference
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetContigInfoUnknownContig() {
|
||||
assertEquals(null, GenomeLocParser.getContigInfo("blah")); // should be in the reference
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testGetContigInfoKnownContig() {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||
assertEquals("chr1".compareTo(GenomeLocParser.getContigInfo("chr1").getSequenceName()), 0); // should be in the reference
|
||||
}
|
||||
|
||||
@Test(expected = StingException.class)
|
||||
public void testParseBadString() {
|
||||
GenomeLocParser.parseGenomeLoc("Bad:0-1");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseGoodString() {
|
||||
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1-100");
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
assertEquals(100, loc.getStop());
|
||||
assertEquals(1, loc.getStart());
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
public void testParseBadLocations() {
|
||||
GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseGoodLocations() {
|
||||
GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9");
|
||||
}
|
||||
|
||||
@Test(expected = RuntimeException.class)
|
||||
public void testParseGoodLocationsTooManySemiColons() {
|
||||
GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGenomeLoc1() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 100);
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
assertEquals(100, loc.getStop());
|
||||
assertEquals(1, loc.getStart());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGenomeLoc2() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1, 100);
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
assertEquals(100, loc.getStop());
|
||||
assertEquals(1, loc.getStart());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGenomeLoc3() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0, 1);
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
assertEquals(1, loc.getStop());
|
||||
assertEquals(1, loc.getStart());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGenomeLoc4() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1);
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
assertEquals(1, loc.getStop());
|
||||
assertEquals(1, loc.getStart());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateGenomeLoc5() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,100);
|
||||
GenomeLoc copy = GenomeLocParser.createGenomeLoc(loc);
|
||||
assertEquals(copy.getContigIndex(), 0);
|
||||
assertEquals(100, copy.getStop());
|
||||
assertEquals(1, copy .getStart());
|
||||
}
|
||||
}
|
||||
|
|
@ -44,13 +44,13 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void setup() {
|
||||
GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
mSortedSet = new GenomeLocSortedSet();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAdd() {
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
|
|
@ -59,7 +59,7 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
@Test
|
||||
public void testRemove() {
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
mSortedSet.remove(g);
|
||||
|
|
@ -69,9 +69,9 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
@Test
|
||||
public void addRegion() {
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(1, 1, 50);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 1, 50);
|
||||
mSortedSet.add(g);
|
||||
GenomeLoc f = new GenomeLoc(1, 30, 80);
|
||||
GenomeLoc f = GenomeLocParser.createGenomeLoc(1, 30, 80);
|
||||
mSortedSet.addRegion(f);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
|
||||
|
|
@ -81,7 +81,7 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
@Test(expected = IllegalArgumentException.class)
|
||||
public void testAddDupplicate() {
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 0);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
mSortedSet.add(g);
|
||||
|
|
@ -89,8 +89,8 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void mergingOverlappingBelow() {
|
||||
GenomeLoc g = new GenomeLoc(1, 0, 50);
|
||||
GenomeLoc e = new GenomeLoc(1, 49, 100);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 0, 50);
|
||||
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 49, 100);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
|
|
@ -105,8 +105,8 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void mergingOverlappingAbove() {
|
||||
GenomeLoc e = new GenomeLoc(1, 0, 50);
|
||||
GenomeLoc g = new GenomeLoc(1, 49, 100);
|
||||
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100);
|
||||
assertTrue(mSortedSet.size() == 0);
|
||||
mSortedSet.add(g);
|
||||
assertTrue(mSortedSet.size() == 1);
|
||||
|
|
@ -121,13 +121,13 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void deleteSubRegion() {
|
||||
GenomeLoc e = new GenomeLoc(1, 0, 50);
|
||||
GenomeLoc g = new GenomeLoc(1, 49, 100);
|
||||
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 0, 50);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 49, 100);
|
||||
mSortedSet.add(g);
|
||||
mSortedSet.addRegion(e);
|
||||
|
||||
// now delete a region
|
||||
GenomeLoc d = new GenomeLoc(1, 25, 75);
|
||||
GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 25, 75);
|
||||
mSortedSet.removeRegion(d);
|
||||
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
|
|
@ -143,20 +143,20 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void deleteAllByRegion() {
|
||||
GenomeLoc e = new GenomeLoc(1, 1, 100);
|
||||
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
mSortedSet.add(e);
|
||||
for (int x = 1; x < 101; x++) {
|
||||
GenomeLoc del = new GenomeLoc(1,x,x);
|
||||
GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x);
|
||||
mSortedSet.removeRegion(del);
|
||||
}
|
||||
assertTrue(mSortedSet.isEmpty());
|
||||
}
|
||||
@Test
|
||||
public void deleteSomeByRegion() {
|
||||
GenomeLoc e = new GenomeLoc(1, 1, 100);
|
||||
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 1, 100);
|
||||
mSortedSet.add(e);
|
||||
for (int x = 1; x < 50; x++) {
|
||||
GenomeLoc del = new GenomeLoc(1,x,x);
|
||||
GenomeLoc del = GenomeLocParser.createGenomeLoc(1,x,x);
|
||||
mSortedSet.removeRegion(del);
|
||||
}
|
||||
assertTrue(!mSortedSet.isEmpty());
|
||||
|
|
@ -169,13 +169,13 @@ public class GenomeLocSortedSetTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void deleteSuperRegion() {
|
||||
GenomeLoc e = new GenomeLoc(1, 10, 20);
|
||||
GenomeLoc g = new GenomeLoc(1, 70, 100);
|
||||
GenomeLoc e = GenomeLocParser.createGenomeLoc(1, 10, 20);
|
||||
GenomeLoc g = GenomeLocParser.createGenomeLoc(1, 70, 100);
|
||||
mSortedSet.add(g);
|
||||
mSortedSet.addRegion(e);
|
||||
assertTrue(mSortedSet.size() == 2);
|
||||
// now delete a region
|
||||
GenomeLoc d = new GenomeLoc(1, 15, 75);
|
||||
GenomeLoc d = GenomeLocParser.createGenomeLoc(1, 15, 75);
|
||||
mSortedSet.removeRegion(d);
|
||||
Iterator<GenomeLoc> iter = mSortedSet.iterator();
|
||||
GenomeLoc loc = iter.next();
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ public class GenomeLocTest extends BaseTest {
|
|||
public static void init() {
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLoc.setupRefContigOrdering(seq);
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -32,10 +32,10 @@ public class GenomeLocTest extends BaseTest {
|
|||
public void testIsBetween() {
|
||||
logger.warn("Executing testIsBetween");
|
||||
|
||||
GenomeLoc locMiddle = new GenomeLoc("chr1", 3, 3);
|
||||
GenomeLoc locMiddle = GenomeLocParser.createGenomeLoc("chr1", 3, 3);
|
||||
|
||||
GenomeLoc locLeft = new GenomeLoc("chr1", 1, 1);
|
||||
GenomeLoc locRight = new GenomeLoc("chr1", 5, 5);
|
||||
GenomeLoc locLeft = GenomeLocParser.createGenomeLoc("chr1", 1, 1);
|
||||
GenomeLoc locRight = GenomeLocParser.createGenomeLoc("chr1", 5, 5);
|
||||
|
||||
Assert.assertTrue(locMiddle.isBetween(locLeft, locRight));
|
||||
Assert.assertFalse(locLeft.isBetween(locMiddle, locRight));
|
||||
|
|
@ -45,15 +45,15 @@ public class GenomeLocTest extends BaseTest {
|
|||
@Test
|
||||
public void testContigIndex() {
|
||||
logger.warn("Executing testContigIndex");
|
||||
GenomeLoc locOne = new GenomeLoc("chr1",1,1);
|
||||
GenomeLoc locOne = GenomeLocParser.createGenomeLoc("chr1",1,1);
|
||||
Assert.assertEquals(locOne.getContigIndex(), 1);
|
||||
Assert.assertEquals(locOne.getContig(), "chr1");
|
||||
|
||||
GenomeLoc locX = new GenomeLoc("chrX",1,1);
|
||||
GenomeLoc locX = GenomeLocParser.createGenomeLoc("chrX",1,1);
|
||||
Assert.assertEquals(locX.getContigIndex(), 23);
|
||||
Assert.assertEquals(locX.getContig(), "chrX");
|
||||
|
||||
GenomeLoc locNumber = new GenomeLoc(1,1,1);
|
||||
GenomeLoc locNumber = GenomeLocParser.createGenomeLoc(1,1,1);
|
||||
Assert.assertEquals(locNumber.getContigIndex(), 1);
|
||||
Assert.assertEquals(locNumber.getContig(), "chr1");
|
||||
Assert.assertEquals(locOne.compareTo(locNumber), 0);
|
||||
|
|
@ -63,15 +63,15 @@ public class GenomeLocTest extends BaseTest {
|
|||
@Test
|
||||
public void testCompareTo() {
|
||||
logger.warn("Executing testCompareTo");
|
||||
GenomeLoc twoOne = new GenomeLoc("chr2", 1);
|
||||
GenomeLoc twoFive = new GenomeLoc("chr2", 5);
|
||||
GenomeLoc twoOtherFive = new GenomeLoc("chr2", 5);
|
||||
GenomeLoc twoOne = GenomeLocParser.createGenomeLoc("chr2", 1);
|
||||
GenomeLoc twoFive = GenomeLocParser.createGenomeLoc("chr2", 5);
|
||||
GenomeLoc twoOtherFive = GenomeLocParser.createGenomeLoc("chr2", 5);
|
||||
Assert.assertEquals(0, twoFive.compareTo(twoOtherFive));
|
||||
|
||||
Assert.assertEquals(-1, twoOne.compareTo(twoFive));
|
||||
Assert.assertEquals(1, twoFive.compareTo(twoOne));
|
||||
|
||||
GenomeLoc oneOne = new GenomeLoc("chr1", 5);
|
||||
GenomeLoc oneOne = GenomeLocParser.createGenomeLoc("chr1", 5);
|
||||
Assert.assertEquals(-1, oneOne.compareTo(twoOne));
|
||||
Assert.assertEquals(1, twoOne.compareTo(oneOne));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,11 +4,12 @@ package org.broadinstitute.sting.utils;
|
|||
|
||||
// the imports for unit testing.
|
||||
|
||||
import org.junit.*;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
|
||||
import org.broadinstitute.sting.utils.RefHanger;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -69,7 +70,7 @@ public class RefHangerTest extends BaseTest {
|
|||
public static void init() {
|
||||
// sequence
|
||||
seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
|
||||
GenomeLoc.setupRefContigOrdering(seq);
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
System.out.printf("Filled hanger is %n%s%n", makeFilledHanger());
|
||||
}
|
||||
|
|
@ -81,7 +82,7 @@ public class RefHangerTest extends BaseTest {
|
|||
l3 = Arrays.asList(6);
|
||||
l4 = Arrays.asList(7, 8);
|
||||
l5 = Arrays.asList(9, 10);
|
||||
p1 = new GenomeLoc(0, 1, 1);
|
||||
p1 = GenomeLocParser.createGenomeLoc(0, 1, 1);
|
||||
p2 = new GenomeLoc(p1).nextLoc();
|
||||
p3 = new GenomeLoc(p2).nextLoc();
|
||||
p4 = new GenomeLoc(p3).nextLoc();
|
||||
|
|
@ -94,7 +95,7 @@ public class RefHangerTest extends BaseTest {
|
|||
|
||||
@Before
|
||||
public void setupHanger() {
|
||||
startLoc = new GenomeLoc(0, 1, 1); // chrM 1
|
||||
startLoc = GenomeLocParser.createGenomeLoc(0, 1, 1); // chrM 1
|
||||
emptyHanger = new RefHanger<Integer>();
|
||||
filledHanger = makeFilledHanger();
|
||||
|
||||
|
|
@ -145,7 +146,7 @@ public class RefHangerTest extends BaseTest {
|
|||
assertTrue(filledHanger.hasLocation(p3));
|
||||
assertTrue(filledHanger.hasLocation(p4));
|
||||
assertTrue(filledHanger.hasLocation(p5));
|
||||
assertTrue(! filledHanger.hasLocation(new GenomeLoc(0, 6, 6)));
|
||||
assertTrue(! filledHanger.hasLocation(GenomeLocParser.createGenomeLoc(0, 6, 6)));
|
||||
|
||||
assertTrue(filledHanger.getHanger(0) != null);
|
||||
assertTrue(filledHanger.getHanger(1) != null);
|
||||
|
|
|
|||
Loading…
Reference in New Issue