2009-03-12 04:58:01 +08:00
|
|
|
package org.broadinstitute.sting.utils;
|
2009-03-03 02:18:48 +08:00
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
import com.google.java.contract.Ensures;
|
|
|
|
|
import com.google.java.contract.Requires;
|
2010-09-12 23:07:38 +08:00
|
|
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
2009-03-29 04:37:27 +08:00
|
|
|
|
2009-10-28 05:53:40 +08:00
|
|
|
import java.io.Serializable;
|
2011-11-02 22:49:40 +08:00
|
|
|
import java.util.ArrayList;
|
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
import java.util.Collections;
|
|
|
|
|
import java.util.List;
|
2009-03-29 04:37:27 +08:00
|
|
|
|
2009-03-03 02:18:48 +08:00
|
|
|
/**
|
|
|
|
|
* Created by IntelliJ IDEA.
|
|
|
|
|
* User: mdepristo
|
|
|
|
|
* Date: Mar 2, 2009
|
|
|
|
|
* Time: 8:50:11 AM
|
|
|
|
|
*
|
2011-05-21 10:01:59 +08:00
|
|
|
* Genome location representation. It is *** 1 *** based closed. Note that GenomeLocs start and stop values
|
|
|
|
|
* can be any positive or negative number, by design. Bound validation is a feature of the GenomeLocParser,
|
|
|
|
|
* and not a fundamental constraint of the GenomeLoc
|
2009-03-03 02:18:48 +08:00
|
|
|
*/
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenomeLocation {
|
2009-06-22 22:39:41 +08:00
|
|
|
/**
|
|
|
|
|
* the basic components of a genome loc, its contig index,
|
|
|
|
|
* start and stop position, and (optionally) the contig name
|
|
|
|
|
*/
|
2009-07-01 03:17:24 +08:00
|
|
|
protected final int contigIndex;
|
2010-11-11 01:59:50 +08:00
|
|
|
protected final int start;
|
|
|
|
|
protected final int stop;
|
2009-07-01 03:17:24 +08:00
|
|
|
protected final String contigName;
|
2010-12-10 03:51:48 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* A static constant to use when referring to the unmapped section of a datafile
|
|
|
|
|
* file. The unmapped region cannot be subdivided. Only this instance of
|
|
|
|
|
* the object may be used to refer to the region, as '==' comparisons are used
|
|
|
|
|
* in comparators, etc.
|
|
|
|
|
*/
|
2011-01-18 05:23:09 +08:00
|
|
|
// TODO - WARNING WARNING WARNING code somehow depends on the name of the contig being null!
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
public static final GenomeLoc UNMAPPED = new GenomeLoc((String)null);
|
|
|
|
|
public static final GenomeLoc WHOLE_GENOME = new GenomeLoc("all");
|
|
|
|
|
|
2011-01-18 05:23:09 +08:00
|
|
|
public static final boolean isUnmapped(GenomeLoc loc) {
|
|
|
|
|
return loc == UNMAPPED;
|
|
|
|
|
}
|
2011-01-20 20:54:03 +08:00
|
|
|
|
2009-03-29 04:37:27 +08:00
|
|
|
// --------------------------------------------------------------------------------------------------------------
|
|
|
|
|
//
|
|
|
|
|
// constructors
|
|
|
|
|
//
|
|
|
|
|
// --------------------------------------------------------------------------------------------------------------
|
2009-03-03 02:18:48 +08:00
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires({
|
|
|
|
|
"contig != null",
|
|
|
|
|
"contigIndex >= 0", // I believe we aren't allowed to create GenomeLocs without a valid contigIndex
|
2011-05-21 10:01:59 +08:00
|
|
|
"start <= stop"})
|
2010-11-11 01:59:50 +08:00
|
|
|
protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) {
|
2009-06-22 22:39:41 +08:00
|
|
|
this.contigName = contig;
|
|
|
|
|
this.contigIndex = contigIndex;
|
|
|
|
|
this.start = start;
|
|
|
|
|
this.stop = stop;
|
2009-04-12 10:25:17 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
/** Unsafe constructor for special constant genome locs */
|
|
|
|
|
private GenomeLoc( final String contig ) {
|
|
|
|
|
this.contigName = contig;
|
|
|
|
|
this.contigIndex = -1;
|
|
|
|
|
this.start = 0;
|
|
|
|
|
this.stop = 0;
|
2009-04-04 02:24:08 +08:00
|
|
|
}
|
|
|
|
|
|
2009-03-03 02:18:48 +08:00
|
|
|
//
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
// Accessors
|
2009-03-03 02:18:48 +08:00
|
|
|
//
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Ensures("result != null")
|
2011-01-20 20:54:03 +08:00
|
|
|
public final GenomeLoc getLocation() { return this; }
|
|
|
|
|
|
2011-06-23 06:53:37 +08:00
|
|
|
public final GenomeLoc getStartLocation() { return new GenomeLoc(getContig(),getContigIndex(),getStart(),getStart()); }
|
|
|
|
|
|
|
|
|
|
public final GenomeLoc getStopLocation() { return new GenomeLoc(getContig(),getContigIndex(),getStop(),getStop()); }
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
/**
|
|
|
|
|
* @return the name of the contig of this GenomeLoc
|
|
|
|
|
*/
|
2009-04-13 08:48:21 +08:00
|
|
|
public final String getContig() {
|
2009-06-22 22:39:41 +08:00
|
|
|
return this.contigName;
|
2009-04-13 08:48:21 +08:00
|
|
|
}
|
|
|
|
|
|
2009-04-12 10:25:17 +08:00
|
|
|
public final int getContigIndex() { return this.contigIndex; }
|
2010-11-11 01:59:50 +08:00
|
|
|
public final int getStart() { return this.start; }
|
|
|
|
|
public final int getStop() { return this.stop; }
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
|
|
|
|
|
@Ensures("result != null")
|
2009-03-03 02:18:48 +08:00
|
|
|
public final String toString() {
|
2011-01-18 05:23:09 +08:00
|
|
|
if(GenomeLoc.isUnmapped(this)) return "unmapped";
|
2009-03-03 02:18:48 +08:00
|
|
|
if ( throughEndOfContigP() && atBeginningOfContigP() )
|
|
|
|
|
return getContig();
|
|
|
|
|
else if ( throughEndOfContigP() || getStart() == getStop() )
|
|
|
|
|
return String.format("%s:%d", getContig(), getStart());
|
|
|
|
|
else
|
|
|
|
|
return String.format("%s:%d-%d", getContig(), getStart(), getStop());
|
|
|
|
|
}
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
|
2010-11-11 01:59:50 +08:00
|
|
|
private boolean throughEndOfContigP() { return this.stop == Integer.MAX_VALUE; }
|
2009-03-03 02:18:48 +08:00
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
private boolean atBeginningOfContigP() { return this.start == 1; }
|
|
|
|
|
|
|
|
|
|
@Requires("that != null")
|
2009-03-03 02:18:48 +08:00
|
|
|
public final boolean disjointP(GenomeLoc that) {
|
2009-06-23 00:01:59 +08:00
|
|
|
return this.contigIndex != that.contigIndex || this.start > that.stop || that.start > this.stop;
|
2009-03-03 02:18:48 +08:00
|
|
|
}
|
2009-03-13 07:30:19 +08:00
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-03-29 04:37:27 +08:00
|
|
|
public final boolean discontinuousP(GenomeLoc that) {
|
2009-06-23 00:01:59 +08:00
|
|
|
return this.contigIndex != that.contigIndex || (this.start - 1) > that.stop || (that.start - 1) > this.stop;
|
2009-03-29 04:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-03-03 02:18:48 +08:00
|
|
|
public final boolean overlapsP(GenomeLoc that) {
|
|
|
|
|
return ! disjointP( that );
|
|
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-03-29 04:37:27 +08:00
|
|
|
public final boolean contiguousP(GenomeLoc that) {
|
|
|
|
|
return ! discontinuousP( that );
|
|
|
|
|
}
|
|
|
|
|
|
2012-08-30 04:58:21 +08:00
|
|
|
/**
|
|
|
|
|
* Return true if this GenomeLoc represents the UNMAPPED location
|
|
|
|
|
* @return
|
|
|
|
|
*/
|
|
|
|
|
public final boolean isUnmapped() {
|
|
|
|
|
return isUnmapped(this);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a new GenomeLoc that represents the entire span of this and that. Requires that
|
|
|
|
|
* this and that GenomeLoc are contiguous and both mapped
|
|
|
|
|
*/
|
|
|
|
|
@Requires({
|
|
|
|
|
"that != null",
|
2011-05-21 10:01:59 +08:00
|
|
|
"isUnmapped(this) == isUnmapped(that)"})
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Ensures("result != null")
|
2010-09-12 23:07:38 +08:00
|
|
|
public GenomeLoc merge( GenomeLoc that ) throws ReviewedStingException {
|
2011-01-18 05:23:09 +08:00
|
|
|
if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
|
|
|
|
|
if(! GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
|
2010-12-10 03:51:48 +08:00
|
|
|
throw new ReviewedStingException("Tried to merge a mapped and an unmapped genome loc");
|
|
|
|
|
return UNMAPPED;
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-30 21:54:51 +08:00
|
|
|
if (!(this.contiguousP(that))) {
|
2012-08-24 22:48:41 +08:00
|
|
|
throw new ReviewedStingException("The two genome loc's need to be contiguous");
|
2009-04-30 21:54:51 +08:00
|
|
|
}
|
2009-03-29 04:37:27 +08:00
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
return new GenomeLoc(getContig(), this.contigIndex,
|
2012-01-05 06:03:21 +08:00
|
|
|
Math.min( getStart(), that.getStart() ),
|
2009-03-29 04:37:27 +08:00
|
|
|
Math.max( getStop(), that.getStop()) );
|
|
|
|
|
}
|
|
|
|
|
|
2011-06-23 06:53:37 +08:00
|
|
|
/**
|
|
|
|
|
* Returns a new GenomeLoc that represents the region between the endpoints of this and that. Requires that
|
|
|
|
|
* this and that GenomeLoc are both mapped.
|
|
|
|
|
*/
|
|
|
|
|
@Requires({"that != null", "isUnmapped(this) == isUnmapped(that)"})
|
|
|
|
|
@Ensures("result != null")
|
|
|
|
|
public GenomeLoc endpointSpan(GenomeLoc that) throws ReviewedStingException {
|
|
|
|
|
if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
|
|
|
|
|
throw new ReviewedStingException("Cannot get endpoint span for unmerged genome locs");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ( ! this.getContig().equals(that.getContig()) ) {
|
|
|
|
|
throw new ReviewedStingException("Cannot get endpoint span for genome locs on different contigs");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new GenomeLoc(getContig(),this.contigIndex,Math.min(getStart(),that.getStart()),Math.max(getStop(),that.getStop()));
|
|
|
|
|
}
|
|
|
|
|
|
2011-03-08 07:00:17 +08:00
|
|
|
/**
|
|
|
|
|
* Splits the contig into to regions: [start,split point) and [split point, end].
|
|
|
|
|
* @param splitPoint The point at which to split the contig. Must be contained in the given interval.
|
|
|
|
|
* @return A two element array consisting of the genome loc before the split and the one after.
|
|
|
|
|
*/
|
|
|
|
|
public GenomeLoc[] split(final int splitPoint) {
|
|
|
|
|
if(splitPoint < getStart() || splitPoint > getStop())
|
|
|
|
|
throw new ReviewedStingException(String.format("Unable to split contig %s at split point %d; split point is not contained in region.",this,splitPoint));
|
|
|
|
|
return new GenomeLoc[] { new GenomeLoc(getContig(),contigIndex,getStart(),splitPoint-1), new GenomeLoc(getContig(),contigIndex,splitPoint,getStop()) };
|
|
|
|
|
}
|
|
|
|
|
|
2011-11-02 22:49:40 +08:00
|
|
|
public GenomeLoc union( GenomeLoc that ) { return merge(that); }
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
|
|
|
|
@Ensures("result != null")
|
2010-09-12 23:07:38 +08:00
|
|
|
public GenomeLoc intersect( GenomeLoc that ) throws ReviewedStingException {
|
2011-01-18 05:23:09 +08:00
|
|
|
if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
|
|
|
|
|
if(! GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
|
2010-12-10 03:51:48 +08:00
|
|
|
throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc");
|
|
|
|
|
return UNMAPPED;
|
|
|
|
|
}
|
|
|
|
|
|
2010-07-23 00:00:30 +08:00
|
|
|
if (!(this.overlapsP(that))) {
|
2010-09-12 23:07:38 +08:00
|
|
|
throw new ReviewedStingException("GenomeLoc::intersect(): The two genome loc's need to overlap");
|
2010-07-23 00:00:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new GenomeLoc(getContig(), this.contigIndex,
|
|
|
|
|
Math.max(getStart(), that.getStart()),
|
|
|
|
|
Math.min( getStop(), that.getStop()) );
|
|
|
|
|
}
|
|
|
|
|
|
2011-11-02 22:49:40 +08:00
|
|
|
@Requires("that != null")
|
|
|
|
|
public final List<GenomeLoc> subtract( final GenomeLoc that ) {
|
|
|
|
|
if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
|
|
|
|
|
if(! GenomeLoc.isUnmapped(this) || !GenomeLoc.isUnmapped(that))
|
|
|
|
|
throw new ReviewedStingException("Tried to intersect a mapped and an unmapped genome loc");
|
|
|
|
|
return Arrays.asList(UNMAPPED);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!(this.overlapsP(that))) {
|
|
|
|
|
throw new ReviewedStingException("GenomeLoc::minus(): The two genome loc's need to overlap");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (equals(that)) {
|
|
|
|
|
return Collections.emptyList();
|
|
|
|
|
} else if (containsP(that)) {
|
|
|
|
|
List<GenomeLoc> l = new ArrayList<GenomeLoc>(2);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* we have to create two new region, one for the before part, one for the after
|
|
|
|
|
* The old region:
|
|
|
|
|
* |----------------- old region (g) -------------|
|
|
|
|
|
* |----- to delete (e) ------|
|
|
|
|
|
*
|
|
|
|
|
* product (two new regions):
|
|
|
|
|
* |------| + |--------|
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
int afterStop = this.getStop(), afterStart = that.getStop() + 1;
|
|
|
|
|
int beforeStop = that.getStart() - 1, beforeStart = this.getStart();
|
|
|
|
|
if (afterStop - afterStart >= 0) {
|
|
|
|
|
GenomeLoc after = new GenomeLoc(this.getContig(), getContigIndex(), afterStart, afterStop);
|
|
|
|
|
l.add(after);
|
|
|
|
|
}
|
|
|
|
|
if (beforeStop - beforeStart >= 0) {
|
|
|
|
|
GenomeLoc before = new GenomeLoc(this.getContig(), getContigIndex(), beforeStart, beforeStop);
|
|
|
|
|
l.add(before);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return l;
|
|
|
|
|
} else if (that.containsP(this)) {
|
|
|
|
|
/**
|
|
|
|
|
* e completely contains g, delete g, but keep looking, there may be more regions
|
|
|
|
|
* i.e.:
|
|
|
|
|
* |--------------------- e --------------------|
|
|
|
|
|
* |--- g ---| |---- others ----|
|
|
|
|
|
*/
|
|
|
|
|
return Collections.emptyList(); // don't need to do anything
|
|
|
|
|
} else {
|
|
|
|
|
/**
|
|
|
|
|
* otherwise e overlaps some part of g
|
|
|
|
|
*
|
|
|
|
|
* figure out which region occurs first on the genome. I.e., is it:
|
|
|
|
|
* |------------- g ----------|
|
|
|
|
|
* |------------- e ----------|
|
|
|
|
|
*
|
|
|
|
|
* or:
|
|
|
|
|
* |------------- g ----------|
|
|
|
|
|
* |------------ e -----------|
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
GenomeLoc n;
|
|
|
|
|
if (that.getStart() < this.getStart()) {
|
|
|
|
|
n = new GenomeLoc(this.getContig(), getContigIndex(), that.getStop() + 1, this.getStop());
|
|
|
|
|
} else {
|
|
|
|
|
n = new GenomeLoc(this.getContig(), getContigIndex(), this.getStart(), that.getStart() - 1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// replace g with the new region
|
|
|
|
|
return Arrays.asList(n);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-03-29 04:37:27 +08:00
|
|
|
public final boolean containsP(GenomeLoc that) {
|
2009-06-23 00:01:59 +08:00
|
|
|
return onSameContig(that) && getStart() <= that.getStart() && getStop() >= that.getStop();
|
2009-03-29 04:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-03-03 02:18:48 +08:00
|
|
|
public final boolean onSameContig(GenomeLoc that) {
|
2009-04-12 10:25:17 +08:00
|
|
|
return (this.contigIndex == that.contigIndex);
|
2009-03-03 02:18:48 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2011-11-02 22:49:40 +08:00
|
|
|
@Ensures("result >= 0")
|
|
|
|
|
public final int distance( final GenomeLoc that ) {
|
2010-10-21 03:12:34 +08:00
|
|
|
if ( this.onSameContig(that) )
|
2011-11-02 22:49:40 +08:00
|
|
|
return Math.abs(this.getStart() - that.getStart());
|
2009-03-13 07:30:19 +08:00
|
|
|
else
|
|
|
|
|
return Integer.MAX_VALUE;
|
|
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires({"left != null", "right != null"})
|
2009-03-13 07:30:19 +08:00
|
|
|
public final boolean isBetween( final GenomeLoc left, final GenomeLoc right ) {
|
|
|
|
|
return this.compareTo(left) > -1 && this.compareTo(right) < 1;
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-11 01:59:50 +08:00
|
|
|
/**
|
|
|
|
|
* Tests whether this contig is completely before contig 'that'.
|
|
|
|
|
* @param that Contig to test against.
|
|
|
|
|
* @return true if this contig ends before 'that' starts; false if this is completely after or overlaps 'that'.
|
|
|
|
|
*/
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-05-14 02:51:16 +08:00
|
|
|
public final boolean isBefore( GenomeLoc that ) {
|
|
|
|
|
int comparison = this.compareContigs(that);
|
|
|
|
|
return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));
|
|
|
|
|
}
|
|
|
|
|
|
2011-03-08 07:00:17 +08:00
|
|
|
/**
|
|
|
|
|
* Tests whether any portion of this contig is before that contig.
|
|
|
|
|
* @param that Other contig to test.
|
|
|
|
|
* @return True if the start of this contig is before the start of the that contig.
|
|
|
|
|
*/
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2011-03-08 07:00:17 +08:00
|
|
|
public final boolean startsBefore(final GenomeLoc that) {
|
|
|
|
|
int comparison = this.compareContigs(that);
|
|
|
|
|
return ( comparison == -1 || ( comparison == 0 && this.getStart() < that.getStart() ));
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-11 01:59:50 +08:00
|
|
|
/**
|
|
|
|
|
* Tests whether this contig is completely after contig 'that'.
|
|
|
|
|
* @param that Contig to test against.
|
|
|
|
|
* @return true if this contig starts after 'that' ends; false if this is completely before or overlaps 'that'.
|
|
|
|
|
*/
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
2009-03-29 04:37:27 +08:00
|
|
|
public final boolean isPast( GenomeLoc that ) {
|
2009-04-04 04:05:24 +08:00
|
|
|
int comparison = this.compareContigs(that);
|
|
|
|
|
return ( comparison == 1 || ( comparison == 0 && this.getStart() > that.getStop() ));
|
2009-03-29 04:37:27 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
/**
|
|
|
|
|
* Return the minimum distance between any pair of bases in this and that GenomeLocs:
|
|
|
|
|
*/
|
|
|
|
|
@Requires("that != null")
|
|
|
|
|
@Ensures("result >= 0")
|
2010-10-21 03:12:34 +08:00
|
|
|
public final int minDistance( final GenomeLoc that ) {
|
|
|
|
|
if (!this.onSameContig(that))
|
|
|
|
|
return Integer.MAX_VALUE;
|
|
|
|
|
|
|
|
|
|
int minDistance;
|
|
|
|
|
if (this.isBefore(that))
|
|
|
|
|
minDistance = distanceFirstStopToSecondStart(this, that);
|
|
|
|
|
else if (that.isBefore(this))
|
|
|
|
|
minDistance = distanceFirstStopToSecondStart(that, this);
|
|
|
|
|
else // this and that overlap [and possibly one contains the other]:
|
|
|
|
|
minDistance = 0;
|
|
|
|
|
|
|
|
|
|
return minDistance;
|
|
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires({
|
|
|
|
|
"locFirst != null",
|
|
|
|
|
"locSecond != null",
|
|
|
|
|
"locSecond.isPast(locFirst)"
|
|
|
|
|
})
|
|
|
|
|
@Ensures("result >= 0")
|
2010-10-21 03:12:34 +08:00
|
|
|
private static int distanceFirstStopToSecondStart(GenomeLoc locFirst, GenomeLoc locSecond) {
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
return locSecond.getStart() - locFirst.getStop();
|
2010-10-21 03:12:34 +08:00
|
|
|
}
|
|
|
|
|
|
2010-07-23 00:00:30 +08:00
|
|
|
|
2009-05-01 06:14:26 +08:00
|
|
|
|
2009-05-14 02:51:16 +08:00
|
|
|
/**
|
|
|
|
|
* Check to see whether two genomeLocs are equal.
|
|
|
|
|
* Note that this implementation ignores the contigInfo object.
|
|
|
|
|
* @param other Other contig to compare.
|
|
|
|
|
*/
|
|
|
|
|
@Override
|
|
|
|
|
public boolean equals(Object other) {
|
|
|
|
|
if(other == null)
|
|
|
|
|
return false;
|
|
|
|
|
if(other instanceof GenomeLoc) {
|
|
|
|
|
GenomeLoc otherGenomeLoc = (GenomeLoc)other;
|
2009-06-18 04:19:47 +08:00
|
|
|
return this.contigIndex == otherGenomeLoc.contigIndex &&
|
2009-05-14 02:51:16 +08:00
|
|
|
this.start == otherGenomeLoc.start &&
|
|
|
|
|
this.stop == otherGenomeLoc.stop;
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2009-06-09 00:52:02 +08:00
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public int hashCode() {
|
2011-09-10 02:23:13 +08:00
|
|
|
return start << 16 | stop << 4 | contigIndex;
|
2009-06-09 00:52:02 +08:00
|
|
|
}
|
|
|
|
|
|
2009-05-14 02:51:16 +08:00
|
|
|
|
2009-06-22 22:39:41 +08:00
|
|
|
/**
|
|
|
|
|
* conpare this genomeLoc's contig to another genome loc
|
2009-06-23 00:01:59 +08:00
|
|
|
* @param that the genome loc to compare contigs with
|
|
|
|
|
* @return 0 if equal, -1 if that.contig is greater, 1 if this.contig is greater
|
2009-06-22 22:39:41 +08:00
|
|
|
*/
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
|
|
|
|
@Ensures("result == 0 || result == 1 || result == -1")
|
2009-04-12 10:25:17 +08:00
|
|
|
public final int compareContigs( GenomeLoc that ) {
|
2009-06-18 04:19:47 +08:00
|
|
|
if (this.contigIndex == that.contigIndex)
|
|
|
|
|
return 0;
|
|
|
|
|
else if (this.contigIndex > that.contigIndex)
|
|
|
|
|
return 1;
|
|
|
|
|
return -1;
|
2009-03-03 02:18:48 +08:00
|
|
|
}
|
|
|
|
|
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Requires("that != null")
|
|
|
|
|
@Ensures("result == 0 || result == 1 || result == -1")
|
2009-03-03 02:18:48 +08:00
|
|
|
public int compareTo( GenomeLoc that ) {
|
2009-09-13 03:10:35 +08:00
|
|
|
int result = 0;
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
|
2009-09-13 03:10:35 +08:00
|
|
|
if ( this == that ) {
|
|
|
|
|
result = 0;
|
2010-12-10 03:51:48 +08:00
|
|
|
}
|
2011-01-18 05:23:09 +08:00
|
|
|
else if(GenomeLoc.isUnmapped(this))
|
2010-12-10 03:51:48 +08:00
|
|
|
result = 1;
|
2011-01-18 05:23:09 +08:00
|
|
|
else if(GenomeLoc.isUnmapped(that))
|
2010-12-10 03:51:48 +08:00
|
|
|
result = -1;
|
|
|
|
|
else {
|
2009-09-13 03:10:35 +08:00
|
|
|
final int cmpContig = compareContigs(that);
|
2009-03-03 02:18:48 +08:00
|
|
|
|
2009-09-13 03:10:35 +08:00
|
|
|
if ( cmpContig != 0 ) {
|
|
|
|
|
result = cmpContig;
|
|
|
|
|
} else {
|
|
|
|
|
if ( this.getStart() < that.getStart() ) result = -1;
|
2012-08-31 03:07:02 +08:00
|
|
|
else if ( this.getStart() > that.getStart() ) result = 1;
|
|
|
|
|
// these have the same start, so check the ends
|
|
|
|
|
else if ( this.getStop() < that.getStop() ) result = -1;
|
|
|
|
|
else if ( this.getStop() > that.getStop() ) result = 1;
|
2009-09-13 03:10:35 +08:00
|
|
|
}
|
|
|
|
|
}
|
2009-04-04 03:53:33 +08:00
|
|
|
|
2009-09-13 03:10:35 +08:00
|
|
|
return result;
|
2009-03-03 02:18:48 +08:00
|
|
|
}
|
2009-04-16 02:29:38 +08:00
|
|
|
|
2011-06-23 06:53:37 +08:00
|
|
|
@Requires("that != null")
|
|
|
|
|
public boolean endsAt(GenomeLoc that) {
|
|
|
|
|
return (this.compareContigs(that) == 0) && ( this.getStop() == that.getStop() );
|
|
|
|
|
}
|
|
|
|
|
|
2009-08-06 00:29:15 +08:00
|
|
|
/**
|
|
|
|
|
* How many BPs are covered by this locus?
|
|
|
|
|
* @return Number of BPs covered by this locus. According to the semantics of GenomeLoc, this should
|
|
|
|
|
* never be < 1.
|
|
|
|
|
*/
|
Contracts for Java now write for GenomeLoc and GenomeLocParser. The semantics of GenomeLoc are now much clearer. It is no longer allowed to create invalid GenomeLocs -- you can only create them with well formed start, end, and contigs, with respect to the mater dictionary. Where one previously created an invalid GenomeLoc, and asked is this valid, you must now provide the raw arguments to helper functions to assess this. Providing bad arguments to GenomeLoc generates UserExceptions now. Added utilty functions contigIsInDictionary and indexIsInDictionary to help with this.
Refactored several Interval utilties from GenomeLocParser to IntervalUtils, as one might expect they go
Removed GenomeLoc.clone() method, as this was not correctly implemented, and actually unnecessary, as GenomeLocs are immutable. Several iterator classes have changed to remove their use of clone()
Removed misc. unnecessary imports
Disabled, temporarily, the validating pileup integration test, as it uses reads mapped to an different reference sequence for ecoli, and this now does not satisfy the contracts for GenomeLoc
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5827 348d0f76-0448-11de-a6fe-93d51630548a
2011-05-20 23:43:27 +08:00
|
|
|
@Ensures("result > 0")
|
2012-02-02 08:34:39 +08:00
|
|
|
public int size() {
|
2009-08-06 00:29:15 +08:00
|
|
|
return stop - start + 1;
|
|
|
|
|
}
|
2009-06-03 02:14:46 +08:00
|
|
|
|
2011-12-01 06:05:16 +08:00
|
|
|
/**
|
|
|
|
|
* reciprocialOverlap: what is the min. percent of gl1 and gl2 covered by both
|
|
|
|
|
*
|
|
|
|
|
* gl1.s ---------- gk1.e
|
|
|
|
|
* gl2.s ---------- gl2.e
|
|
|
|
|
* 100%
|
|
|
|
|
*
|
|
|
|
|
* gl1.s ---------- gk1.e
|
|
|
|
|
* gl2.s ---------- gl2.e
|
|
|
|
|
* 50%
|
|
|
|
|
*
|
|
|
|
|
* gl1.s ---------- gk1.e
|
|
|
|
|
* gl2.s -------------------- gl2.e
|
|
|
|
|
* 25% (50% for gl1 but only 25% for gl2)
|
|
|
|
|
*/
|
|
|
|
|
public final double reciprocialOverlapFraction(final GenomeLoc o) {
|
|
|
|
|
if ( overlapsP(o) )
|
|
|
|
|
return Math.min(overlapPercent(this, o), overlapPercent(o, this));
|
|
|
|
|
else
|
|
|
|
|
return 0.0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private final static double overlapPercent(final GenomeLoc gl1, final GenomeLoc gl2) {
|
|
|
|
|
return (1.0 * gl1.intersect(gl2).size()) / gl1.size();
|
|
|
|
|
}
|
2012-01-05 06:03:21 +08:00
|
|
|
|
|
|
|
|
public long sizeOfOverlap( final GenomeLoc that ) {
|
2012-01-20 11:05:08 +08:00
|
|
|
return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) + 1L : 0L );
|
2012-01-05 06:03:21 +08:00
|
|
|
}
|
2009-03-22 23:36:56 +08:00
|
|
|
}
|