Fixed genome loc to be immutable, the only way to now change it's values is through the GenomeLocParser.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1132 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-06-30 19:17:24 +00:00
parent 455275996f
commit f5cba5a6bb
14 changed files with 165 additions and 111 deletions

View File

@ -1,15 +1,10 @@
package org.broadinstitute.sting.gatk;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeSet;
import java.util.Random;
import org.broadinstitute.sting.utils.GenomeLoc;
import net.sf.picard.reference.ReferenceSequence;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;
/**
* Useful class for forwarding on locusContext data from this iterator
@ -81,7 +76,9 @@ public class LocusContext {
public String getContig() { return getLocation().getContig(); }
public long getPosition() { return getLocation().getStart(); }
public GenomeLoc getLocation() { return loc; }
public void setLocation(GenomeLoc loc) {
this.loc = loc.clone();
}
/**
* Returns the entire reference sequence contig associated with these reads
*

View File

@ -180,7 +180,7 @@ public class SAMDataSource implements SimpleDataSource {
iter = iteratorPool.iterator(new MappedStreamSegment(lastReadPos));
return InitialReadIterator(shard.getSize(), iter);
} else {
lastReadPos.setStop(-1);
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
iter = fastMappedReadSeek(shard.getSize(), StingSAMIteratorAdapter.adapt(reads, iteratorPool.iterator(new MappedStreamSegment(lastReadPos))));
}
@ -267,7 +267,7 @@ public class SAMDataSource implements SimpleDataSource {
} else {
readsTaken = readCount;
readsSeenAtLastPos = 0;
lastReadPos.setStop(-1);
lastReadPos = GenomeLocParser.setStop(lastReadPos,-1);
CloseableIterator<SAMRecord> ret = iteratorPool.iterator(new MappedStreamSegment(lastReadPos));
return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, ret), readCount);
}
@ -285,7 +285,7 @@ public class SAMDataSource implements SimpleDataSource {
if (stopPos < lastReadPos.getStart()) {
lastReadPos = GenomeLocParser.createGenomeLoc(lastReadPos.getContigIndex() + 1, stopPos, stopPos);
} else {
lastReadPos.setStart(rec.getAlignmentStart());
lastReadPos = GenomeLocParser.setStart(lastReadPos,rec.getAlignmentStart());
}
}
// in case we're run out of reads, get out

View File

@ -34,7 +34,7 @@ public class GenomeLocusIterator implements LocusIterator {
/**
* Creates an iterator that can traverse over the entire
* reference specified in the given ShardDataProvider.
* @param provider Data provider to use as a backing source.
* @param completeLocus Data provider to use as a backing source.
* Provider must have a reference (hasReference() == true).
*/
public GenomeLocusIterator( GenomeLoc completeLocus ) {
@ -58,7 +58,7 @@ public class GenomeLocusIterator implements LocusIterator {
if( !hasNext() )
throw new NoSuchElementException("No elements remaining in bounded reference region.");
GenomeLoc toReturn = (GenomeLoc)currentLocus.clone();
currentLocus.incPos();
currentLocus = GenomeLocParser.incPos(currentLocus);
return toReturn;
}

View File

@ -1,21 +1,31 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.picard.sam.SamFileHeaderMerger;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.Reads;
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.walkers.LocusWindowWalker;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.gatk.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.iterators.MergingSamRecordIterator2;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.fasta.*;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import java.util.*;
import java.io.*;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.util.CloseableIterator;
import net.sf.picard.sam.SamFileHeaderMerger;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -136,7 +146,7 @@ public class TraverseByLocusWindows extends TraversalEngine {
String refSuffix = "";
if ( window.getLocation().getStop() > contigLength ) {
refSuffix = Utils.dupString('x', (int)window.getLocation().getStop() - contigLength);
window.getLocation().setStop(contigLength);
window.setLocation(GenomeLocParser.setStop(window.getLocation(),contigLength));
}
StringBuffer refBases = new StringBuffer(new String(sequenceFile.getSubsequenceAt(window.getContig(),window.getLocation().getStart(),window.getLocation().getStop()).getBases()));

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RefWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Pair;
// create a fasta sequence file from a reference and intervals
@ -34,7 +35,7 @@ public class FastaReferenceWalker extends RefWalker<Pair<GenomeLoc, Character>,
}
// otherwise, merge them
else {
sum.first.setStop(value.first.getStop());
sum.first = GenomeLocParser.setStop(sum.first,value.first.getStop());
sum.second = new String(sum.second + value.second);
}
return sum;

View File

@ -58,7 +58,7 @@ public class CoverageGapIntervalWalker extends LocusWalker<Pair<GenomeLoc, Integ
public GenomeLoc reduce(Pair<GenomeLoc, Integer> value, GenomeLoc sum) {
if ( value.second > 1000 ) {
if ( sum != null )
sum.setStop(value.first.getStop());
sum = GenomeLocParser.setStop(sum,value.first.getStop());
else
sum = value.first;
} else if ( sum != null ) {

View File

@ -1,29 +1,22 @@
package org.broadinstitute.sting.playground.gatk.walkers.indels;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.refdata.RODIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodRefSeq;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.playground.utils.CircularArray;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
@Argument(fullName="bed", shortName="bed", doc="BED output file name", required=true)
@ -189,7 +182,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
currentContigIndex = read.getReferenceIndex();
currentPosition = read.getAlignmentStart();
refName = new String(read.getReferenceName());
location.setContig(refName);
location = GenomeLocParser.setContig(location,refName);
coverage.clear(); // reset coverage window; this will also set reference position to 0
if ( call_somatic) normal_coverage.clear();
@ -315,7 +308,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
continue; // too dirty
}
location.setStart(pos); location.setStop(pos); // retrieve annotation data
location = GenomeLocParser.setStart(location,pos); location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
rodRefSeq annotation = (refseqIterator == null ? null : refseqIterator.seekForward(location));
int total_variant_count = 0;
@ -412,7 +405,7 @@ public class IndelGenotyperWalker extends ReadWalker<Integer,Integer> {
"\tTUMOR TOO DIRTY\t"+total_mismatches_tumor);
continue; // too dirty
}
location.setStart(pos); location.setStop(pos); // retrieve annotation data
location = GenomeLocParser.setStart(location,pos); location = GenomeLocParser.setStop(location,pos); // retrieve annotation data
rodRefSeq annotation = (refseqIterator == null ? null : refseqIterator.seekForward(location));

View File

@ -84,7 +84,7 @@ public class IntervalMergerWalker extends ReadWalker<Integer,Integer> {
// at this point, we're in the first interval.
// now we can merge any other intervals which we overlap
while ( intervals.size() > 0 && loc.overlapsP(intervals.getFirst()) )
firstInterval.setStop(intervals.removeFirst().getStop());
firstInterval = GenomeLocParser.setStop(firstInterval,intervals.removeFirst().getStop());
return 1;
}

View File

@ -93,17 +93,17 @@ public class MismatchIntervalWalker extends LocusWalker<Pair<GenomeLoc, Boolean>
// if there is no interval to the left, then this is the first one
if ( sum.second == null ) {
sum.second = value.first;
sum.second.setStart(sum.second.getStart() - windowSize + firstMismatch + 1);
sum.second = GenomeLocParser.setStart(sum.second, sum.second.getStart() - windowSize + firstMismatch + 1);
}
// if the intervals don't overlap, print out the leftmost one and start a new one
else if ( value.first.getStop() - sum.second.getStop() > windowSize ) {
out.println(sum.second);
sum.second = value.first;
sum.second.setStart(sum.second.getStart() - windowSize + firstMismatch + 1);
sum.second = GenomeLocParser.setStart(sum.second,sum.second.getStart() - windowSize + firstMismatch + 1);
}
// otherwise, merge them
else {
sum.second.setStop(value.first.getStop());
sum.second = GenomeLocParser.setStop(sum.second, value.first.getStop());
}
}

View File

@ -54,7 +54,7 @@ public class SNPClusterWalker extends RefWalker<GenomeLoc, GenomeLoc> {
// if the last SNP location was within a window, merge them
if ( value.getStart() - sum.getStop() <= windowSize ) {
sum.setStop(value.getStart());
sum = GenomeLocParser.setStop(sum,value.getStart());
return sum;
}

View File

@ -23,12 +23,11 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
* the basic components of a genome loc, its contig index,
* start and stop position, and (optionally) the contig name
*/
private int contigIndex;
private long start;
private long stop;
private String contigName;
static int MAX_CONTIG;
protected final int contigIndex;
protected final long start;
protected final long stop;
protected final String contigName;
// --------------------------------------------------------------------------------------------------------------
//
// constructors
@ -146,28 +145,6 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
public final boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
public final boolean atBeginningOfContigP() { return this.start == 1; }
/** Sets contig name. UNSAFE since it 1) does NOT update contig index; 2) does not validate the name
*
* @param contig
*/
public void setContig(String contig) {
this.contigName = contig;
}
/** Sets contig index. UNSAFE since it 1) does NOT update contig name; 2) does not validate the index
*
* @param contig
*/
public void setContigIndex(int contig) {
this.contigIndex = contig;
}
public void setStart(long start) {
this.start = start;
}
public void setStop(long stop) {
this.stop = stop;
}
public final boolean isSingleBP() { return stop == start; }
@ -230,19 +207,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() ));
}
public final void incPos() {
incPos(1);
}
public final void incPos(long by) {
this.start += by;
this.stop += by;
}
public final GenomeLoc nextLoc() {
GenomeLoc n = new GenomeLoc(this);
n.incPos();
return n;
}
/**
* Check to see whether two genomeLocs are equal.

View File

@ -124,7 +124,6 @@ public class GenomeLocParser {
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
}
}
GenomeLoc.MAX_CONTIG = contigInfo.getSequences().size();
return true;
}
@ -221,7 +220,7 @@ public class GenomeLocParser {
logger.debug("Locations are:" + Utils.join(", ", locs));
return locs;
} catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str),e);
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e);
}
}
@ -446,14 +445,10 @@ public class GenomeLocParser {
* @return the genome loc if it's valid, otherwise we throw an exception
*/
private static GenomeLoc verifyGenomeLoc(GenomeLoc toReturn) {
// conditions to fail on - we currently use a start of zero to indicate infinite read count, so don't check for that
//if ((toReturn.getStop() < toReturn.getStart())) {
// throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is after the stop (Start = " + toReturn.getStart() + " stop = " + toReturn.getStop() + ")");
//}
if (toReturn.getStart() < 0) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
}
if (toReturn.getStop() < 0) {
if ((toReturn.getStop() != -1) && (toReturn.getStop() < 0)) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the stop position is less than 0");
}
if (toReturn.getContigIndex() < 0) {
@ -483,8 +478,102 @@ public class GenomeLocParser {
}
/**
* check to make sure that we've setup the contig information
* create a new genome loc, given an old location and a new contig
*
* @param loc the old location
* @param contig the new contig to set
*
* @return a new genome loc with an updated contig name and index
*/
public static GenomeLoc setContig(GenomeLoc loc, String contig) {
checkSetup();
if (!GenomeLocParser.contigInfo.getSequences().contains(contig)) {
throw new StingException("Contig name ( " + contig + " ) not in the set sequence dictionary.");
}
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.contigInfo.getSequences().indexOf(contig), loc.start, loc.getStop()));
}
/** Sets contig index. UNSAFE since it 1) does NOT update contig name; 2) does not validate the index
*
* @param contig
*/
public static GenomeLoc setContigIndex(GenomeLoc loc, int contig) {
checkSetup();
if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) {
throw new StingException("Contig index ( " + contig + " ) is not in the sequence dictionary set.");
}
return verifyGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop()));
}
/**
* create a new genome loc from an existing loc, with a new start position
*
* @param loc the old location
* @param start a new start position
*
* @return the newly created genome loc
*/
public static GenomeLoc setStart(GenomeLoc loc, long start) {
checkSetup();
if (loc.getContigIndex() < 0 || loc.getContigIndex() >= contigInfo.getSequences().size()) {
throw new StingException("Genome loc passed in to setStart has a contig index outside the range of our current sequence dictionary");
}
if (start > GenomeLocParser.contigInfo.getSequences().get(loc.getContigIndex()).getSequenceLength()) {
throw new StingException("start value of " + start + " is greater than the contig length, and is not -1.");
}
return verifyGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
}
/**
* create a new genome loc from an existing loc, with a new stop position
*
* @param loc the old location
* @param stop a new stop position
*
* @return
*/
public static GenomeLoc setStop(GenomeLoc loc, long stop) {
checkSetup();
if (loc.getContigIndex() < 0 || loc.getContigIndex() >= contigInfo.getSequences().size()) {
throw new StingException("Genome loc passed in to setStart has a contig index outside the range of our current sequence dictionary");
}
if ((stop != -1) && (stop > GenomeLocParser.contigInfo.getSequences().get(loc.getContigIndex()).getSequenceLength())) {
throw new StingException("stop value of " + stop + " is greater than the contig length, and is not -1.");
}
return verifyGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
}
/**
* return a new genome loc, with an incremented position
* @param loc the old location
* @return a new genome loc
*/
public static GenomeLoc incPos(GenomeLoc loc) {
return incPos(loc, 1);
}
/**
* return a new genome loc, with an incremented position
* @param loc the old location
* @param by how much to move the start and stop by
* @return a new genome loc
*/
public static GenomeLoc incPos(GenomeLoc loc, long by) {
return verifyGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
}
/**
* create a new genome loc with an incremented position
* @param loc the location
* @return a new genome loc
*/
public static GenomeLoc nextLoc(GenomeLoc loc) {
return incPos(loc);
}
/** check to make sure that we've setup the contig information */
private static void checkSetup() {
if (contigInfo == null) {
throw new StingException("The GenomeLocParser hasn't been setup with a contig sequence yet");

View File

@ -1,11 +1,10 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.apache.log4j.Logger;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Hanging data off the reference sequence
@ -271,12 +270,12 @@ public class RefHanger<T> {
else {
//assert pos.compareTo(getRightLoc()) == 1 : pos + " " + getRightLoc() + " => " + pos.compareTo(getRightLoc());
GenomeLoc nextRight = getRightLoc().nextLoc();
GenomeLoc nextRight = GenomeLocParser.nextLoc(getRightLoc());
while (pos.compareTo(nextRight) == 1) {
//printState();
//System.out.printf(" *** Extending %s, heading for %s%n", nextRight, pos);
ensurePos(nextRight);
nextRight = nextRight.nextLoc();
nextRight = GenomeLocParser.nextLoc(nextRight);
}
ensurePos(pos);

View File

@ -83,10 +83,10 @@ public class RefHangerTest extends BaseTest {
l4 = Arrays.asList(7, 8);
l5 = Arrays.asList(9, 10);
p1 = GenomeLocParser.createGenomeLoc(0, 1, 1);
p2 = new GenomeLoc(p1).nextLoc();
p3 = new GenomeLoc(p2).nextLoc();
p4 = new GenomeLoc(p3).nextLoc();
p5 = new GenomeLoc(p4).nextLoc();
p2 = GenomeLocParser.nextLoc(p1);
p3 = GenomeLocParser.nextLoc(p2);
p4 = GenomeLocParser.nextLoc(p3);
p5 = GenomeLocParser.nextLoc(p4);
filledHanger.addDataList(Arrays.asList(p1, p2, p3, p4, p5),
Arrays.asList(l1, l2, l3, l4, l5));