added methods to validate an interval before you try to make a GenomeLoc: boolean validGenomeLoc().
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2846 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f02e94ab6f
commit
653f70efa2
|
|
@ -89,11 +89,12 @@ public class GenomeLocParser {
|
||||||
* Returns the contig index of a specified string version of the contig
|
* Returns the contig index of a specified string version of the contig
|
||||||
*
|
*
|
||||||
* @param contig the contig string
|
* @param contig the contig string
|
||||||
|
* @param exceptionOut in some cases we don't want to exception out if the contig isn't valid
|
||||||
*
|
*
|
||||||
* @return the contig index, -1 if not found
|
* @return the contig index, -1 if not found
|
||||||
*/
|
*/
|
||||||
public static int getContigIndex(final String contig) {
|
public static int getContigIndex(final String contig, boolean exceptionOut) {
|
||||||
if (contigInfo.getSequenceIndex(contig) == -1)
|
if (contigInfo.getSequenceIndex(contig) == -1 && exceptionOut)
|
||||||
Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
Utils.scareUser(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
||||||
|
|
||||||
return contigInfo.getSequenceIndex(contig);
|
return contigInfo.getSequenceIndex(contig);
|
||||||
|
|
@ -300,7 +301,7 @@ public class GenomeLocParser {
|
||||||
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
|
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
|
||||||
if (!isContigValid(contig))
|
if (!isContigValid(contig))
|
||||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
|
||||||
return new GenomeLoc(contig, getContigIndex(contig), start, stop);
|
return new GenomeLoc(contig, getContigIndex(contig,true), start, stop);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -339,7 +340,7 @@ public class GenomeLocParser {
|
||||||
// iterate through the list of merged intervals and add then as GenomeLocs
|
// iterate through the list of merged intervals and add then as GenomeLocs
|
||||||
ret = new ArrayList<GenomeLoc>();
|
ret = new ArrayList<GenomeLoc>();
|
||||||
for (Interval interval : il.getUniqueIntervals()) {
|
for (Interval interval : il.getUniqueIntervals()) {
|
||||||
ret.add(new GenomeLoc(interval.getSequence(), getContigIndex(interval.getSequence()), interval.getStart(), interval.getEnd()));
|
ret.add(new GenomeLoc(interval.getSequence(), getContigIndex(interval.getSequence(),true), interval.getStart(), interval.getEnd()));
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
|
@ -351,7 +352,7 @@ public class GenomeLocParser {
|
||||||
String locStr = Utils.join(";", lines);
|
String locStr = Utils.join(";", lines);
|
||||||
ret = parseGenomeLocs(locStr, rule);
|
ret = parseGenomeLocs(locStr, rule);
|
||||||
for(GenomeLoc locus: ret)
|
for(GenomeLoc locus: ret)
|
||||||
verifyGenomeLocBounds(locus);
|
exceptionOnInvalidGenomeLocBounds(locus);
|
||||||
return ret;
|
return ret;
|
||||||
} catch (Exception e2) {
|
} catch (Exception e2) {
|
||||||
logger.error("Attempt to parse interval file in GATK format failed: " + e2.getMessage());
|
logger.error("Attempt to parse interval file in GATK format failed: " + e2.getMessage());
|
||||||
|
|
@ -383,7 +384,7 @@ public class GenomeLocParser {
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) {
|
public static GenomeLoc createGenomeLoc(String contig, final long start, final long stop) {
|
||||||
checkSetup();
|
checkSetup();
|
||||||
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), start, stop));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), start, stop));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -417,7 +418,7 @@ public class GenomeLocParser {
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(final SAMRecord read) {
|
public static GenomeLoc createGenomeLoc(final SAMRecord read) {
|
||||||
checkSetup();
|
checkSetup();
|
||||||
return verifyGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), read.getAlignmentEnd()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -431,7 +432,7 @@ public class GenomeLocParser {
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(final int contig, final long pos) {
|
public static GenomeLoc createGenomeLoc(final int contig, final long pos) {
|
||||||
checkSetup();
|
checkSetup();
|
||||||
return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contig), contig, pos, pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -444,12 +445,12 @@ public class GenomeLocParser {
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc createGenomeLoc(final String contig, final long pos) {
|
public static GenomeLoc createGenomeLoc(final String contig, final long pos) {
|
||||||
checkSetup();
|
checkSetup();
|
||||||
return verifyGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig), pos, pos));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig,true), pos, pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) {
|
public static GenomeLoc createGenomeLoc(final GenomeLoc toCopy) {
|
||||||
checkSetup();
|
checkSetup();
|
||||||
return verifyGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(toCopy.getContig(), toCopy.getContigIndex(), toCopy.getStart(), toCopy.getStop()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -460,7 +461,7 @@ public class GenomeLocParser {
|
||||||
*
|
*
|
||||||
* @return the genome loc if it's valid, otherwise we throw an exception
|
* @return the genome loc if it's valid, otherwise we throw an exception
|
||||||
*/
|
*/
|
||||||
private static GenomeLoc verifyGenomeLoc(GenomeLoc toReturn) {
|
private static GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
|
||||||
if (toReturn.getStart() < 0) {
|
if (toReturn.getStart() < 0) {
|
||||||
throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
|
throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0");
|
||||||
}
|
}
|
||||||
|
|
@ -482,8 +483,8 @@ public class GenomeLocParser {
|
||||||
* Verify the locus against the bounds of the contig.
|
* Verify the locus against the bounds of the contig.
|
||||||
* @param locus Locus to verify.
|
* @param locus Locus to verify.
|
||||||
*/
|
*/
|
||||||
private static void verifyGenomeLocBounds(GenomeLoc locus) {
|
private static void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
|
||||||
verifyGenomeLoc(locus);
|
exceptionOnInvalidGenomeLoc(locus);
|
||||||
|
|
||||||
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
|
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
|
||||||
if(locus.getStart() > contigSize)
|
if(locus.getStart() > contigSize)
|
||||||
|
|
@ -492,6 +493,60 @@ public class GenomeLocParser {
|
||||||
throw new StingException(String.format("GenomeLoc is invalid: locus stop %d is after the end of contig %s",locus.getStop(),locus.getContig()));
|
throw new StingException(String.format("GenomeLoc is invalid: locus stop %d is after the end of contig %s",locus.getStop(),locus.getContig()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a method for validating genome locs as valid
|
||||||
|
*
|
||||||
|
* @param loc the location to validate
|
||||||
|
*
|
||||||
|
* @return true if the passed in GenomeLoc represents a valid location
|
||||||
|
*/
|
||||||
|
public static boolean validGenomeLoc(GenomeLoc loc) {
|
||||||
|
checkSetup();
|
||||||
|
// quick check before we get the contig size, is the contig number valid
|
||||||
|
if ((loc.getContigIndex() < 0) || // the contig index has to be positive
|
||||||
|
(loc.getContigIndex() >= contigInfo.getSequences().size())) // the contig must be in the integer range of contigs)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
int contigSize = contigInfo.getSequence(loc.getContigIndex()).getSequenceLength();
|
||||||
|
if ((loc.getStart() < 0) || // start must be greater than 0
|
||||||
|
((loc.getStop() != -1) && (loc.getStop() < 0)) || // the stop can be -1, but no other neg number
|
||||||
|
(loc.getStart() > contigSize) || // the start must be before or equal to the contig end
|
||||||
|
(loc.getStop() > contigSize)) // the stop must also be before or equal to the contig end
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// we passed
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* validate a position or interval on the genome as valid
|
||||||
|
*
|
||||||
|
* @param contig the contig name
|
||||||
|
* @param start the start position
|
||||||
|
* @param stop the stop position
|
||||||
|
*
|
||||||
|
* @return true if it's valid, false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean validGenomeLoc(String contig, long start, long stop) {
|
||||||
|
checkSetup();
|
||||||
|
return validGenomeLoc(new GenomeLoc(contig, GenomeLocParser.getContigIndex(contig, false), start, stop));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* validate a position or interval on the genome as valid
|
||||||
|
*
|
||||||
|
* @param contigIndex the contig name
|
||||||
|
* @param start the start position
|
||||||
|
* @param stop the stop position
|
||||||
|
*
|
||||||
|
* @return true if it's valid, false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean validGenomeLoc(int contigIndex, long start, long stop) {
|
||||||
|
checkSetup();
|
||||||
|
if (contigIndex < 0 || contigIndex >= contigInfo.size()) return false;
|
||||||
|
return validGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(contigIndex), contigIndex, start, stop));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Move this Genome loc to the next contig, with a start
|
* Move this Genome loc to the next contig, with a start
|
||||||
|
|
@ -504,7 +559,7 @@ public class GenomeLocParser {
|
||||||
if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) {
|
if (current.getContigIndex() + 1 >= contigInfo.getSequences().size()) {
|
||||||
return null;
|
return null;
|
||||||
} else
|
} else
|
||||||
return verifyGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(getSequenceNameFromIndex(current.getContigIndex() + 1), current.getContigIndex() + 1, 1, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -522,7 +577,7 @@ public class GenomeLocParser {
|
||||||
if ((index = contigInfo.getSequenceIndex(contig)) < 0) {
|
if ((index = contigInfo.getSequenceIndex(contig)) < 0) {
|
||||||
throw new StingException("Contig name ( " + contig + " ) not in the set sequence dictionary.");
|
throw new StingException("Contig name ( " + contig + " ) not in the set sequence dictionary.");
|
||||||
}
|
}
|
||||||
return verifyGenomeLoc(new GenomeLoc(contig, index, loc.start, loc.getStop()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, index, loc.start, loc.getStop()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -535,7 +590,7 @@ public class GenomeLocParser {
|
||||||
if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) {
|
if ((contig >= GenomeLocParser.contigInfo.getSequences().size()) || (contig < 0)) {
|
||||||
throw new StingException("Contig index ( " + contig + " ) is not in the sequence dictionary set.");
|
throw new StingException("Contig index ( " + contig + " ) is not in the sequence dictionary set.");
|
||||||
}
|
}
|
||||||
return verifyGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(GenomeLocParser.contigInfo.getSequence(contig).getSequenceName(), contig, loc.start, loc.getStop()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -556,7 +611,7 @@ public class GenomeLocParser {
|
||||||
if (start > length) {
|
if (start > length) {
|
||||||
throw new StingException("start value of " + start + " is greater than the contig length, and is not -1. (length = " + length + ").");
|
throw new StingException("start value of " + start + " is greater than the contig length, and is not -1. (length = " + length + ").");
|
||||||
}
|
}
|
||||||
return verifyGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -575,7 +630,7 @@ public class GenomeLocParser {
|
||||||
if ((stop != -1) && (stop > GenomeLocParser.contigInfo.getSequences().get(loc.getContigIndex()).getSequenceLength())) {
|
if ((stop != -1) && (stop > GenomeLocParser.contigInfo.getSequences().get(loc.getContigIndex()).getSequenceLength())) {
|
||||||
throw new StingException("stop value of " + stop + " is greater than the contig length, and is not -1.");
|
throw new StingException("stop value of " + stop + " is greater than the contig length, and is not -1.");
|
||||||
}
|
}
|
||||||
return verifyGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -598,7 +653,7 @@ public class GenomeLocParser {
|
||||||
* @return a new genome loc
|
* @return a new genome loc
|
||||||
*/
|
*/
|
||||||
public static GenomeLoc incPos(GenomeLoc loc, long by) {
|
public static GenomeLoc incPos(GenomeLoc loc, long by) {
|
||||||
return verifyGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
|
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -629,8 +684,8 @@ public class GenomeLocParser {
|
||||||
*/
|
*/
|
||||||
public static int compareContigs(String firstContig, String secondContig) {
|
public static int compareContigs(String firstContig, String secondContig) {
|
||||||
checkSetup();
|
checkSetup();
|
||||||
Integer ref1 = GenomeLocParser.getContigIndex(firstContig);
|
Integer ref1 = GenomeLocParser.getContigIndex(firstContig,true);
|
||||||
Integer ref2 = GenomeLocParser.getContigIndex(secondContig);
|
Integer ref2 = GenomeLocParser.getContigIndex(secondContig,true);
|
||||||
return ref1.compareTo(ref2);
|
return ref1.compareTo(ref2);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
package org.broadinstitute.sting.utils;
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
import static junit.framework.Assert.assertTrue;
|
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
|
@ -8,6 +7,7 @@ import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -47,13 +47,13 @@ public class GenomeLocParserTest extends BaseTest {
|
||||||
|
|
||||||
@Test(expected = RuntimeException.class)
|
@Test(expected = RuntimeException.class)
|
||||||
public void testGetContigIndex() {
|
public void testGetContigIndex() {
|
||||||
assertEquals(-1, GenomeLocParser.getContigIndex("blah")); // should be in the reference
|
assertEquals(-1, GenomeLocParser.getContigIndex("blah",true)); // should not be in the reference
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetContigIndexValid() {
|
public void testGetContigIndexValid() {
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
|
||||||
assertEquals(0, GenomeLocParser.getContigIndex("chr1")); // should be in the reference
|
assertEquals(0, GenomeLocParser.getContigIndex("chr1",true)); // should be in the reference
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -219,4 +219,23 @@ public class GenomeLocParserTest extends BaseTest {
|
||||||
assertEquals(10, loc.getStop()); // the size
|
assertEquals(10, loc.getStop()); // the size
|
||||||
assertEquals(1, loc.getStart());
|
assertEquals(1, loc.getStart());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test out the validating methods
|
||||||
|
@Test
|
||||||
|
public void testValidationOfGenomeLocs() {
|
||||||
|
assertTrue(GenomeLocParser.validGenomeLoc("chr1",1,1));
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end
|
||||||
|
|
||||||
|
assertTrue(GenomeLocParser.validGenomeLoc(0,1,1));
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc(-1,0,10)); // bad start
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc(0,1,-2)); // bad stop
|
||||||
|
assertTrue(!GenomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue