Contracts for GenomeLocParser and GenomeLoc are now fully implemented.
GenomeLocs can officially have any start/stop values from -Inf - +Inf. Bounds w.r.t. the reference are enforced, optionally, by GenomeLocParser. General code cleanup throughout the subsystem. All validation code for GLs is now centralized, and all I/O systems now validate their inputs. Because of this, the Picard interval processing code has been changed to examine whether an interval is valid, and only keep the valid intervals. Note that the scatter/gather test was changed, because the original hg18 chr20 interval files as actually malformed (all records for some reason where on chr20). Many interval processing routines were moved to IntervalUtils, as this is their natural home. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5830 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3aa56037af
commit
e234589240
|
|
@ -655,10 +655,6 @@ public class GenomeAnalysisEngine {
|
|||
argCollection.unsafe == ValidationExclusion.TYPE.ALL);
|
||||
|
||||
List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
|
||||
|
||||
genomeLocParser.validateGenomeLocList(nonRODIntervals);
|
||||
genomeLocParser.validateGenomeLocList(rodIntervals);
|
||||
|
||||
List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
|
||||
|
||||
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
||||
|
|
|
|||
|
|
@ -795,7 +795,7 @@ public class VariantContextUtils {
|
|||
* @return the genomeLoc
|
||||
*/
|
||||
public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) {
|
||||
return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd());
|
||||
return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd(), true);
|
||||
}
|
||||
|
||||
public abstract static class AlleleMergeRule {
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<
|
|||
GenomeLoc loc;
|
||||
String chr = st.nextToken();
|
||||
if ( chr.indexOf(":") != -1 ) {
|
||||
loc = genomeLocParser.parseGenomeInterval(chr);
|
||||
loc = genomeLocParser.parseGenomeLoc(chr);
|
||||
} else {
|
||||
if ( st.countTokens() < 3 )
|
||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
||||
|
|
@ -123,7 +123,7 @@ public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<
|
|||
|
||||
GenomeLoc loc;
|
||||
if ( values.get(0).indexOf(":") != -1 )
|
||||
loc = genomeLocParser.parseGenomeInterval(values.get(0));
|
||||
loc = genomeLocParser.parseGenomeLoc(values.get(0));
|
||||
else
|
||||
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
|
||||
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec<BeagleFeature
|
|||
|
||||
BeagleFeature bglFeature = new BeagleFeature();
|
||||
|
||||
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
|
||||
final GenomeLoc loc = genomeLocParser.parseGenomeLoc(tokens[markerPosition]); //GenomeLocParser.parseGenomeLoc(values.get(0)); - TODO switch to this
|
||||
|
||||
//parse the location: common to all readers
|
||||
bglFeature.setChr(loc.getContig());
|
||||
|
|
|
|||
|
|
@ -55,8 +55,8 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
|
|||
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
||||
|
||||
|
||||
feature.setTranscript_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||
feature.setTranscript_coding_interval(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
|
||||
feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||
feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
|
||||
feature.setGene_name(fields[12]);
|
||||
String[] exon_starts = fields[9].split(",");
|
||||
String[] exon_stops = fields[10].split(",");
|
||||
|
|
@ -71,7 +71,7 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
|
|||
ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
|
||||
|
||||
for ( int i = 0 ; i < exon_starts.length ; i++ ) {
|
||||
exons.add(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||
exons.add(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
|
||||
exon_frames.add(Integer.decode(eframes[i]));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class StringToGenomeLocIteratorAdapter implements Iterator<GenomeLoc> {
|
|||
|
||||
public GenomeLoc next() {
|
||||
|
||||
if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeInterval( it.next() );
|
||||
if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeLoc(it.next());
|
||||
return BedParser.parseLocation( genomeLocParser,it.next() );
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
|
|||
if (IntervalUtils.isIntervalFile(fileOrInterval)) {
|
||||
merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) );
|
||||
} else {
|
||||
rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeInterval(fileOrInterval));
|
||||
rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeLoc(fileOrInterval));
|
||||
}
|
||||
}
|
||||
if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator());
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ public class RealignerTargetCreator extends RodWalker<RealignerTargetCreator.Eve
|
|||
}
|
||||
|
||||
public boolean isReportableEvent() {
|
||||
return getToolkit().getGenomeLocParser().validGenomeLoc(loc.getContig(), eventStartPos, eventStopPos) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
|
||||
return getToolkit().getGenomeLocParser().isValidGenomeLoc(loc.getContig(), eventStartPos, eventStopPos, true) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
|
|
|
|||
|
|
@ -101,7 +101,7 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
|
|||
if (waitingHaplotype == null) {// changed to a new contig:
|
||||
// Set the new haplotype to extend from [1, prevPosition]
|
||||
if (prevPosition >= 1) {
|
||||
GenomeLoc startInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), 1, prevPosition);
|
||||
GenomeLoc startInterval = getToolkit().getGenomeLocParser().createGenomeLoc(curLocus.getContig(), 1, prevPosition);
|
||||
waitingHaplotype = new Haplotype(startInterval, sampleHapEntry.getKey());
|
||||
sampleHapEntry.setValue(waitingHaplotype);
|
||||
}
|
||||
|
|
@ -201,7 +201,7 @@ public class CalcFullHaplotypesWalker extends RodWalker<Integer, Integer> {
|
|||
|
||||
public void extend(int stop) {
|
||||
if (stop > interval.getStop())
|
||||
interval = getToolkit().getGenomeLocParser().parseGenomeLoc(interval.getContig(), interval.getStart(), stop);
|
||||
interval = getToolkit().getGenomeLocParser().createGenomeLoc(interval.getContig(), interval.getStart(), stop);
|
||||
}
|
||||
|
||||
public void incrementHetCount() {
|
||||
|
|
|
|||
|
|
@ -13,7 +13,9 @@ import java.io.Serializable;
|
|||
* Date: Mar 2, 2009
|
||||
* Time: 8:50:11 AM
|
||||
*
|
||||
* Genome location representation. It is *** 1 *** based closed.
|
||||
* Genome location representation. It is *** 1 *** based closed. Note that GenomeLocs start and stop values
|
||||
* can be any positive or negative number, by design. Bound validation is a feature of the GenomeLocParser,
|
||||
* and not a fundamental constraint of the GenomeLoc
|
||||
*/
|
||||
public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenomeLocation {
|
||||
/**
|
||||
|
|
@ -48,7 +50,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
@Requires({
|
||||
"contig != null",
|
||||
"contigIndex >= 0", // I believe we aren't allowed to create GenomeLocs without a valid contigIndex
|
||||
"start >= 0 && start <= stop"})
|
||||
"start <= stop"})
|
||||
protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) {
|
||||
this.contigName = contig;
|
||||
this.contigIndex = contigIndex;
|
||||
|
|
@ -122,8 +124,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
|
|||
*/
|
||||
@Requires({
|
||||
"that != null",
|
||||
"! isUnmapped(this)",
|
||||
"! isUnmapped(that)"})
|
||||
"isUnmapped(this) == isUnmapped(that)"})
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc merge( GenomeLoc that ) throws ReviewedStingException {
|
||||
if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) {
|
||||
|
|
|
|||
|
|
@ -25,8 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import com.google.java.contract.*;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
|
@ -178,6 +176,21 @@ public class GenomeLocParser {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
*
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
public boolean contigIsInDictionary(String contig) {
|
||||
return contig != null && contigInfo.hasContig(contig);
|
||||
}
|
||||
|
||||
public boolean indexIsInDictionary(final int index) {
|
||||
return index >= 0 && contigInfo.hasContig(index);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the contig's SAMSequenceRecord
|
||||
*
|
||||
|
|
@ -185,27 +198,14 @@ public class GenomeLocParser {
|
|||
*
|
||||
* @return the sam sequence record
|
||||
*/
|
||||
@Requires({"contig != null", "contigIsInDictionary(contig)"})
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!contigIsInDictionary(contig) || contig == null"})
|
||||
public SAMSequenceRecord getContigInfo(final String contig) {
|
||||
if ( contig == null || ! contigIsInDictionary(contig) )
|
||||
throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
||||
return contigInfo.getSequence(contig);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the given contig is valid with respect to the sequence dictionary
|
||||
* already installed in the GenomeLoc.
|
||||
*
|
||||
* @return True if the contig is valid. False otherwise.
|
||||
*/
|
||||
@Requires({"contig != null"})
|
||||
public boolean contigIsInDictionary(String contig) {
|
||||
return contigInfo.hasContig(contig);
|
||||
}
|
||||
|
||||
public boolean indexIsInDictionary(final int index) {
|
||||
return contigInfo.hasContig(index);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contig index of a specified string version of the contig
|
||||
*
|
||||
|
|
@ -213,47 +213,126 @@ public class GenomeLocParser {
|
|||
*
|
||||
* @return the contig index, -1 if not found
|
||||
*/
|
||||
@Requires("contig != null")
|
||||
@Ensures("result >= 0")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!contigIsInDictionary(contig) || contig == null"})
|
||||
public int getContigIndex(final String contig) {
|
||||
if (! contigInfo.hasContig(contig) )
|
||||
throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig));
|
||||
return contigInfo.getSequenceIndex(contig);
|
||||
return getContigInfo(contig).getSequenceIndex();
|
||||
}
|
||||
|
||||
@Requires("contig != null")
|
||||
protected int getContigIndexWithoutException(final String contig) {
|
||||
if (! contigInfo.hasContig(contig))
|
||||
if ( contig == null || ! contigInfo.hasContig(contig) )
|
||||
return -1;
|
||||
return contigInfo.getSequenceIndex(contig);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Low-level creation functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
/**
|
||||
* create a genome loc, given the contig name, start, and stop
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the starting position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return a new genome loc
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop, boolean mustBeOnReference) {
|
||||
return createGenomeLoc(contig, getContigIndex(contig), start, stop, mustBeOnReference);
|
||||
}
|
||||
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop, false)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop) {
|
||||
return createGenomeLoc(contig, index, start, stop, false);
|
||||
}
|
||||
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop,mustBeOnReference)"})
|
||||
public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop, boolean mustBeOnReference) {
|
||||
validateGenomeLoc(contig, index, start, stop, mustBeOnReference, true);
|
||||
return new GenomeLoc(contig, index, start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* validate a position or interval on the genome as valid
|
||||
*
|
||||
* Requires that contig exist in the master sequence dictionary, and that contig index be valid as well. Requires
|
||||
* that start <= stop.
|
||||
*
|
||||
* if mustBeOnReference is true,
|
||||
* performs boundary validation for genome loc INTERVALS:
|
||||
* start and stop are on contig and start <= stop
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the start position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return true if it's valid, false otherwise. If exceptOnError, then throws a UserException if invalid
|
||||
*/
|
||||
private boolean validateGenomeLoc(String contig, int contigIndex, int start, int stop, boolean mustBeOnReference, boolean exceptOnError) {
|
||||
if ( ! contigInfo.hasContig(contig) )
|
||||
return vglHelper(exceptOnError, String.format("Unknown contig %s", contig));
|
||||
|
||||
if (stop < start)
|
||||
return vglHelper(exceptOnError, String.format("The stop position %d is less than start %d", stop, start));
|
||||
|
||||
if (contigIndex < 0)
|
||||
return vglHelper(exceptOnError, String.format("The contig index %d is less than 0", contigIndex));
|
||||
|
||||
if (contigIndex >= contigInfo.getNSequences())
|
||||
return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, contigInfo.getNSequences()));
|
||||
|
||||
if ( mustBeOnReference ) {
|
||||
if (start < 0)
|
||||
return vglHelper(exceptOnError, String.format("The start position %d is less than 0", start));
|
||||
|
||||
if (stop < 0)
|
||||
return vglHelper(exceptOnError, String.format("The stop position %d is less than 0", stop));
|
||||
|
||||
int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength();
|
||||
if (start > contigSize || stop > contigSize)
|
||||
return vglHelper(exceptOnError, String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize));
|
||||
}
|
||||
|
||||
// we passed
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean isValidGenomeLoc(String contig, int start, int stop, boolean mustBeOnReference ) {
|
||||
return validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, mustBeOnReference, false);
|
||||
}
|
||||
|
||||
public boolean isValidGenomeLoc(String contig, int start, int stop ) {
|
||||
return validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, true, false);
|
||||
}
|
||||
|
||||
private boolean vglHelper(boolean exceptOnError, String msg) {
|
||||
if ( exceptOnError )
|
||||
throw new UserException.MalformedGenomeLoc("Parameters to GenomeLocParser are incorrect:" + msg);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Parsing genome locs
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* parse a genome interval, from a location string
|
||||
*
|
||||
* Performs interval-style validation:
|
||||
*
|
||||
* contig is valid; start and stop less than the end; start <= sto
|
||||
* @param str the string to parse
|
||||
*
|
||||
* @return a GenomeLoc representing the String
|
||||
*
|
||||
*/
|
||||
@Requires("str != null")
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc parseGenomeInterval(final String str) {
|
||||
GenomeLoc ret = parseGenomeLoc(str);
|
||||
exceptionOnInvalidGenomeLocBounds(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* parse a genome location, from a location string
|
||||
*
|
||||
* Performs read-style validation:
|
||||
* checks that start and stop are positive, start < stop, and the contig is valid
|
||||
* does not check that genomeLoc is actually on the contig
|
||||
*
|
||||
* contig is valid; start and stop less than the end; start <= stop, and start/stop are on the contig
|
||||
* @param str the string to parse
|
||||
*
|
||||
* @return a GenomeLoc representing the String
|
||||
|
|
@ -296,22 +375,15 @@ public class GenomeLocParser {
|
|||
|
||||
// is the contig valid?
|
||||
if (!contigIsInDictionary(contig))
|
||||
throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
||||
throw new UserException.MalformedGenomeLoc("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
||||
|
||||
if (stop == Integer.MAX_VALUE)
|
||||
// lookup the actually stop position!
|
||||
stop = getContigInfo(contig).getSequenceLength();
|
||||
|
||||
GenomeLoc locus = new GenomeLoc(contig, getContigIndex(contig), start, stop);
|
||||
exceptionOnInvalidGenomeLoc(locus);
|
||||
return locus;
|
||||
return createGenomeLoc(contig, getContigIndex(contig), start, stop, true);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Parsing string representations
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
/**
|
||||
* Parses a number like 1,000,000 into a long.
|
||||
* @param pos
|
||||
|
|
@ -342,49 +414,11 @@ public class GenomeLocParser {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this static constructor when the input data is under limited control (i.e. parsing user data).
|
||||
*
|
||||
* @param contig Contig to parse.
|
||||
* @param start Starting point.
|
||||
* @param stop Stop point.
|
||||
*
|
||||
* @return The genome location, or a MalformedGenomeLocException if unparseable.
|
||||
*
|
||||
* Validation: only checks that contig is valid
|
||||
* start/stop could be anything
|
||||
*/
|
||||
public GenomeLoc parseGenomeLoc(final String contig, int start, int stop) {
|
||||
if (!contigIsInDictionary(contig))
|
||||
throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?");
|
||||
return new GenomeLoc(contig, getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the sequence name from a sequence index
|
||||
*
|
||||
* @param contigIndex get the contig index
|
||||
*
|
||||
* @return the string that represents that contig name
|
||||
*/
|
||||
@Requires("indexIsInDictionary(contigIndex)")
|
||||
@Ensures("result != null")
|
||||
private String getSequenceNameFromIndex(int contigIndex) {
|
||||
return contigInfo.getSequence(contigIndex).getSequenceName();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome loc, given the contig name, start, and stop
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the starting position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return a new genome loc
|
||||
*/
|
||||
public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) {
|
||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig), start, stop));
|
||||
}
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Parsing string representations
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* create a genome loc, given a read. If the read is unmapped, *and* yet the read has a contig and start position,
|
||||
|
|
@ -394,183 +428,44 @@ public class GenomeLocParser {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
@Requires("read != null")
|
||||
@Ensures("result != null")
|
||||
public GenomeLoc createGenomeLoc(final SAMRecord read) {
|
||||
GenomeLoc loc;
|
||||
|
||||
if ( read.getReadUnmappedFlag() && read.getReferenceIndex() == -1 )
|
||||
// read is unmapped and not placed anywhere on the genome
|
||||
loc = GenomeLoc.UNMAPPED;
|
||||
return GenomeLoc.UNMAPPED;
|
||||
else {
|
||||
int end = read.getReadUnmappedFlag() ? read.getAlignmentStart() : read.getAlignmentEnd();
|
||||
loc = new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end);
|
||||
return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end, false);
|
||||
}
|
||||
|
||||
return exceptionOnInvalidGenomeLoc(loc);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a new genome loc, given the contig name, and a single position
|
||||
* create a new genome loc, given the contig name, and a single position. Must be on the reference
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param pos the postion
|
||||
*
|
||||
* @return a genome loc representing a single base at the specified postion on the contig
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
@ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, pos, pos, true)"})
|
||||
public GenomeLoc createGenomeLoc(final String contig, final int pos) {
|
||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig), pos, pos));
|
||||
return createGenomeLoc(contig, getContigIndex(contig), pos, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new GenomeLoc without performing any validation on its contig or bounds.
|
||||
* FOR UNIT TESTING PURPOSES ONLY!
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start start position of the interval
|
||||
* @param stop stop position of the interval
|
||||
* @return a new GenomeLoc representing the specified location
|
||||
*/
|
||||
public GenomeLoc createGenomeLocWithoutValidation( String contig, int start, int stop ) {
|
||||
return new GenomeLoc(contig, getContigIndexWithoutException(contig), start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* verify the specified genome loc is valid, if it's not, throw an exception
|
||||
* Will not verify the location against contig bounds.
|
||||
*
|
||||
*
|
||||
* Validation:
|
||||
* checks that start and stop are positive, start < stop, and the contig is valid
|
||||
* does not check that genomeLoc is actually on the contig, so start could be > end of contig
|
||||
*
|
||||
* @param toReturn the genome loc we're about to return
|
||||
*
|
||||
* @return the genome loc if it's valid, otherwise we throw an exception
|
||||
*
|
||||
*/
|
||||
private GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
|
||||
if ( GenomeLoc.isUnmapped(toReturn) ) {
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
if (toReturn.getStart() < 0) {
|
||||
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0 " +
|
||||
"in interval: " + toReturn);
|
||||
}
|
||||
if ((toReturn.getStop() != -1) && (toReturn.getStop() < 0)) {
|
||||
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the stop position is less than 0 " +
|
||||
"in interval: " + toReturn);
|
||||
}
|
||||
if (toReturn.getContigIndex() < 0) {
|
||||
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the contig index is less than 0 " +
|
||||
"in interval: " + toReturn);
|
||||
}
|
||||
if (toReturn.getContigIndex() >= contigInfo.getNSequences()) {
|
||||
throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the contig index is greater than " +
|
||||
"the stored sequence count in interval: " + toReturn);
|
||||
|
||||
}
|
||||
return toReturn;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify the locus against the bounds of the contig.
|
||||
*
|
||||
* performs boundary validation for genome loc INTERVALS:
|
||||
* start and stop are on contig and start <= stop
|
||||
* does NOT check that start and stop > 0, or that contig is valid
|
||||
* for that reason, this function should only be called AFTER exceptionOnInvalidGenomeLoc()
|
||||
* exceptionOnInvalidGenomeLoc isn't included in this function to save time
|
||||
*
|
||||
* @param locus Locus to verify.
|
||||
*/
|
||||
private void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
|
||||
if ( GenomeLoc.isUnmapped(locus) ) {
|
||||
return;
|
||||
}
|
||||
|
||||
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
|
||||
if(locus.getStart() > contigSize)
|
||||
throw new UserException.MalformedGenomeLoc("GenomeLoc is invalid: locus start is after the end of contig",locus);
|
||||
if(locus.getStop() > contigSize)
|
||||
throw new UserException.MalformedGenomeLoc("GenomeLoc is invalid: locus stop is after the end of contig",locus);
|
||||
if (locus.getStart() > locus.getStop()) {
|
||||
throw new UserException.MalformedGenomeLoc("Parameters to GenomeLocParser are incorrect: the start position is " +
|
||||
"greater than the end position", locus);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates each of the GenomeLocs in the list passed as an argument: each GenomeLoc must refer to a
|
||||
* valid contig, and its bounds must be within the bounds of its contig. Throws a MalformedGenomeLoc
|
||||
* exception if an invalid GenomeLoc is found.
|
||||
*
|
||||
* @param genomeLocList The list of GenomeLocs to validate
|
||||
*/
|
||||
public void validateGenomeLocList( List<GenomeLoc> genomeLocList ) {
|
||||
if ( genomeLocList == null ) {
|
||||
return;
|
||||
}
|
||||
|
||||
for ( GenomeLoc loc : genomeLocList ) {
|
||||
try {
|
||||
exceptionOnInvalidGenomeLoc(loc);
|
||||
exceptionOnInvalidGenomeLocBounds(loc);
|
||||
}
|
||||
catch ( UserException e ) {
|
||||
throw e;
|
||||
}
|
||||
catch ( ReviewedStingException e ) {
|
||||
throw new UserException.MalformedGenomeLoc(e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* validate a position or interval on the genome as valid
|
||||
*
|
||||
* @param contig the contig name
|
||||
* @param start the start position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return true if it's valid, false otherwise
|
||||
*
|
||||
* performs interval-style validation: contig is valid and atart and stop less than the end
|
||||
*/
|
||||
public boolean validGenomeLoc(String contig, int start, int stop) {
|
||||
if ( ! contigInfo.hasContig(contig) )
|
||||
return false;
|
||||
else
|
||||
return validGenomeLoc(getContigIndex(contig), start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
* validate a position or interval on the genome as valid
|
||||
*
|
||||
* @param contigIndex the contig name
|
||||
* @param start the start position
|
||||
* @param stop the stop position
|
||||
*
|
||||
* @return true if it's valid, false otherwise
|
||||
*
|
||||
* performs interval-style validation: contig is valid and atart and stop less than the end
|
||||
*/
|
||||
public boolean validGenomeLoc(int contigIndex, int start, int stop) {
|
||||
// quick check before we get the contig size, is the contig number valid
|
||||
if ((contigIndex < 0) || // the contig index has to be positive
|
||||
(contigIndex >= contigInfo.getNSequences())) // the contig must be in the integer range of contigs)
|
||||
return false;
|
||||
|
||||
int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength();
|
||||
if ((start < 0) || // start must be greater than 0
|
||||
(stop < 0 ) || // the stop must be greater than 0
|
||||
(start > contigSize) || // the start must be before or equal to the contig end
|
||||
(stop > contigSize)) // the stop must also be before or equal to the contig end
|
||||
return false;
|
||||
|
||||
// we passed
|
||||
return true;
|
||||
}
|
||||
// /**
|
||||
// * Creates a new GenomeLoc without performing any validation on its contig or bounds.
|
||||
// * FOR UNIT TESTING PURPOSES ONLY!
|
||||
// *
|
||||
// * @param contig the contig name
|
||||
// * @param start start position of the interval
|
||||
// * @param stop stop position of the interval
|
||||
// * @return a new GenomeLoc representing the specified location
|
||||
// */
|
||||
// public GenomeLoc createGenomeLocWithoutValidation( String contig, int start, int stop ) {
|
||||
// return new GenomeLoc(contig, getContigIndexWithoutException(contig), start, stop);
|
||||
// }
|
||||
|
||||
/**
|
||||
* create a new genome loc from an existing loc, with a new start position
|
||||
|
|
@ -579,12 +474,11 @@ public class GenomeLocParser {
|
|||
*
|
||||
* @param loc the old location
|
||||
* @param start a new start position
|
||||
* TODO -- move to me GenomeLoc class itself
|
||||
*
|
||||
* @return the newly created genome loc
|
||||
*/
|
||||
public GenomeLoc setStart(GenomeLoc loc, int start) {
|
||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()));
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -595,11 +489,10 @@ public class GenomeLocParser {
|
|||
* @param loc the old location
|
||||
* @param stop a new stop position
|
||||
*
|
||||
* TODO -- move to me GenomeLoc class itself
|
||||
* @return
|
||||
*/
|
||||
public GenomeLoc setStop(GenomeLoc loc, int stop) {
|
||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop));
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -607,7 +500,6 @@ public class GenomeLocParser {
|
|||
*
|
||||
* @param loc the old location
|
||||
*
|
||||
* TODO -- move to me GenomeLoc class itself
|
||||
* @return a new genome loc
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc) {
|
||||
|
|
@ -620,11 +512,10 @@ public class GenomeLocParser {
|
|||
* @param loc the old location
|
||||
* @param by how much to move the start and stop by
|
||||
*
|
||||
* TODO -- move to me GenomeLoc class itself
|
||||
* @return a new genome loc
|
||||
*/
|
||||
public GenomeLoc incPos(GenomeLoc loc, int by) {
|
||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by));
|
||||
return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -636,6 +527,7 @@ public class GenomeLocParser {
|
|||
@Ensures("result != null")
|
||||
public GenomeLoc createOverEntireContig(String contigName) {
|
||||
SAMSequenceRecord contig = contigInfo.getSequence(contigName);
|
||||
return exceptionOnInvalidGenomeLoc(new GenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength()));
|
||||
}
|
||||
return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -253,12 +253,14 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
* |------| + |--------|
|
||||
*
|
||||
*/
|
||||
GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1);
|
||||
GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop());
|
||||
if (after.getStop() - after.getStart() >= 0) {
|
||||
int afterStop = g.getStop(), afterStart = e.getStop() + 1;
|
||||
int beforeStop = e.getStart() - 1, beforeStart = g.getStart();
|
||||
if (afterStop - afterStart >= 0) {
|
||||
GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), afterStart, afterStop);
|
||||
l.add(after);
|
||||
}
|
||||
if (before.getStop() - before.getStart() >= 0) {
|
||||
if (beforeStop - beforeStart >= 0) {
|
||||
GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), beforeStart, beforeStop);
|
||||
l.add(before);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,39 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
/**
|
||||
* User: hanna
|
||||
* Date: Jun 2, 2009
|
||||
* Time: 11:43:48 AM
|
||||
* BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT
|
||||
* Software and documentation are copyright 2005 by the Broad Institute.
|
||||
* All rights are reserved.
|
||||
*
|
||||
* Users acknowledge that this software is supplied without any warranty or support.
|
||||
* The Broad Institute is not responsible for its use, misuse, or
|
||||
* functionality.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Indicates that something was wrong with in the parameters passed to create a GenomeLoc...
|
||||
* bad sequence id out of bounds, etc.
|
||||
*/
|
||||
|
||||
public class MalformedGenomeLocException extends ReviewedStingException {
|
||||
/**
|
||||
* Create a new MalformedGenomeLocException with the given message. Does not preserve the existing stack trace.
|
||||
* @param message The message.
|
||||
*/
|
||||
public MalformedGenomeLocException( String message ) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new MalformedGenomeLocException with the given message and root cause.
|
||||
* @param message The message.
|
||||
* @param t The root cause.
|
||||
*/
|
||||
public MalformedGenomeLocException( String message, Throwable t ) {
|
||||
super(message,t);
|
||||
}
|
||||
}
|
||||
|
|
@ -88,7 +88,7 @@ public class BedParser {
|
|||
}
|
||||
|
||||
// we currently drop the rest of the bed record, which can contain names, scores, etc
|
||||
return genomeLocParser.createGenomeLoc(contig, start, stop);
|
||||
return genomeLocParser.createGenomeLoc(contig, start, stop, true);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.interval;
|
|||
import net.sf.picard.util.Interval;
|
||||
import net.sf.picard.util.IntervalList;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -24,6 +25,8 @@ import java.io.File;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class IntervalUtils {
|
||||
private static Logger logger = Logger.getLogger(IntervalUtils.class);
|
||||
|
||||
/**
|
||||
* Turns a set of strings describing intervals into a parsed set of intervals. Valid string elements can be files,
|
||||
* intervals in samtools notation (chrA:B-C), or some combination of the above separated by semicolons. Additionally,
|
||||
|
|
@ -70,7 +73,7 @@ public class IntervalUtils {
|
|||
|
||||
// otherwise treat as an interval -> parse and add to raw interval list
|
||||
else {
|
||||
rawIntervals.add(parser.parseGenomeInterval(fileOrInterval));
|
||||
rawIntervals.add(parser.parseGenomeLoc(fileOrInterval));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -103,28 +106,41 @@ public class IntervalUtils {
|
|||
* first try to read it as a Picard interval file since that's well structured
|
||||
* we'll fail quickly if it's not a valid file.
|
||||
*/
|
||||
boolean isPicardInterval = false;
|
||||
try {
|
||||
// Note: Picard will skip over intervals with contigs not in the sequence dictionary
|
||||
IntervalList il = IntervalList.fromFile(inputFile);
|
||||
isPicardInterval = true;
|
||||
|
||||
int nInvalidIntervals = 0;
|
||||
for (Interval interval : il.getIntervals()) {
|
||||
ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd()));
|
||||
if ( glParser.isValidGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true))
|
||||
ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true));
|
||||
else {
|
||||
nInvalidIntervals++;
|
||||
}
|
||||
}
|
||||
if ( nInvalidIntervals > 0 )
|
||||
logger.warn("Ignoring " + nInvalidIntervals + " invalid intervals from " + inputFile);
|
||||
}
|
||||
|
||||
// if that didn't work, try parsing file as a GATK interval file
|
||||
catch (Exception e) {
|
||||
try {
|
||||
XReadLines reader = new XReadLines(new File(file_name));
|
||||
for(String line: reader) {
|
||||
if ( line.trim().length() > 0 ) {
|
||||
ret.add(glParser.parseGenomeInterval(line));
|
||||
if ( isPicardInterval ) // definitely a picard file, but we failed to parse
|
||||
throw new UserException.CouldNotReadInputFile(inputFile, e);
|
||||
else {
|
||||
try {
|
||||
XReadLines reader = new XReadLines(new File(file_name));
|
||||
for(String line: reader) {
|
||||
if ( line.trim().length() > 0 ) {
|
||||
ret.add(glParser.parseGenomeLoc(line));
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
catch (IOException e2) {
|
||||
throw new UserException.CouldNotReadInputFile(inputFile, e2);
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
catch (IOException e2) {
|
||||
throw new UserException.CouldNotReadInputFile(inputFile, e2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
String contig, int intervalStart, int intervalEnd ) throws Exception {
|
||||
|
||||
List<String> intervalArgs = new ArrayList<String>();
|
||||
List<GenomeLoc> rodIntervals = Arrays.asList(genomeLocParser.createGenomeLocWithoutValidation(contig, intervalStart, intervalEnd));
|
||||
List<GenomeLoc> rodIntervals = Arrays.asList(genomeLocParser.createGenomeLoc(contig, intervalStart, intervalEnd, true));
|
||||
|
||||
testEngine.loadIntervals(intervalArgs, rodIntervals);
|
||||
}
|
||||
|
|
@ -124,7 +124,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
// We need to adjust intervalStart, since BED intervals are 0-based. We don't need to adjust intervalEnd,
|
||||
// since the ending point is an open interval.
|
||||
File bedFile = createTempFile("testInvalidBedIntervalHandling", ".bed",
|
||||
String.format("%s %d %d", contig, intervalStart - 1, intervalEnd));
|
||||
String.format("%s %d %d", contig, intervalStart -1, intervalEnd));
|
||||
|
||||
List<String> intervalArgs = Arrays.asList(bedFile.getAbsolutePath());
|
||||
List<GenomeLoc> rodIntervals = new ArrayList<GenomeLoc>();
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ public class VariantJEXLContextUnitTest extends BaseTest {
|
|||
} catch (Exception e) {
|
||||
Assert.fail("Unable to create expression" + e.getMessage());
|
||||
}
|
||||
snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10);
|
||||
snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10, true);
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@ public abstract class LocusViewTemplate extends BaseTest {
|
|||
private static ReferenceSequenceFile fakeReferenceSequenceFile() {
|
||||
return new ReferenceSequenceFile() {
|
||||
public SAMSequenceDictionary getSequenceDictionary() {
|
||||
SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("chr1");
|
||||
SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("chr1", 1000000);
|
||||
SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Collections.singletonList(sequenceRecord));
|
||||
return dictionary;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=RuntimeException.class)
|
||||
@Test(expectedExceptions=UserException.MalformedGenomeLoc.class)
|
||||
public void testGetContigIndex() {
|
||||
assertEquals(genomeLocParser.getContigIndex("blah"), -1); // should not be in the reference
|
||||
}
|
||||
|
|
@ -78,9 +78,9 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testParseGoodString() {
|
||||
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-100");
|
||||
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10");
|
||||
assertEquals(0, loc.getContigIndex());
|
||||
assertEquals(loc.getStop(), 100);
|
||||
assertEquals(loc.getStop(), 10);
|
||||
assertEquals(loc.getStart(), 1);
|
||||
}
|
||||
|
||||
|
|
@ -165,7 +165,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
assertEquals(loc.getStart(), 1);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=ReviewedStingException.class)
|
||||
@Test(expectedExceptions=UserException.class)
|
||||
public void testGenomeLocBad2() {
|
||||
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0");
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
|
|
@ -173,7 +173,7 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
assertEquals(loc.getStart(), 1);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions=ReviewedStingException.class)
|
||||
@Test(expectedExceptions=UserException.class)
|
||||
public void testGenomeLocBad3() {
|
||||
GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0");
|
||||
assertEquals(loc.getContigIndex(), 0);
|
||||
|
|
@ -184,19 +184,11 @@ public class GenomeLocParserUnitTest extends BaseTest {
|
|||
// test out the validating methods
|
||||
@Test
|
||||
public void testValidationOfGenomeLocs() {
|
||||
assertTrue(genomeLocParser.validGenomeLoc("chr1",1,1));
|
||||
assertTrue(!genomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry
|
||||
assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig
|
||||
assertTrue(!genomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start
|
||||
assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop
|
||||
assertTrue(!genomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end
|
||||
|
||||
assertTrue(genomeLocParser.validGenomeLoc(0,1,1));
|
||||
assertTrue(!genomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry
|
||||
assertTrue(!genomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig
|
||||
assertTrue(!genomeLocParser.validGenomeLoc(-1,0,10)); // bad start
|
||||
assertTrue(!genomeLocParser.validGenomeLoc(0,1,-2)); // bad stop
|
||||
assertTrue(!genomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end
|
||||
|
||||
assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1));
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
|
||||
assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
return hg18ReferenceLocs;
|
||||
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
|
||||
for (String interval: intervals)
|
||||
locs.add(hg18GenomeLocParser.parseGenomeInterval(interval));
|
||||
locs.add(hg18GenomeLocParser.parseGenomeLoc(interval));
|
||||
return locs;
|
||||
}
|
||||
|
||||
|
|
@ -167,9 +167,9 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testFixedScatterIntervalsBasic() {
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
|
||||
|
||||
List<File> files = testFiles("basic.", 3, ".intervals");
|
||||
|
||||
|
|
@ -192,10 +192,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testScatterFixedIntervalsLessFiles() {
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
||||
GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeInterval("chr4");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
|
||||
GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4");
|
||||
|
||||
List<File> files = testFiles("less.", 3, ".intervals");
|
||||
|
||||
|
|
@ -234,10 +234,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterFixedIntervalsStart() {
|
||||
List<String> intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
|
||||
GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeInterval("chr1:1-2");
|
||||
GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeInterval("chr1:4-5");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
||||
GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2");
|
||||
GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
|
||||
|
||||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
|
|
@ -262,10 +262,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterFixedIntervalsMiddle() {
|
||||
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
||||
GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeInterval("chr2:1-2");
|
||||
GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeInterval("chr2:4-5");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
|
||||
GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2");
|
||||
GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
|
||||
|
||||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
|
|
@ -290,10 +290,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterFixedIntervalsEnd() {
|
||||
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-2");
|
||||
GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeInterval("chr3:1-2");
|
||||
GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeInterval("chr3:4-5");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2");
|
||||
GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2");
|
||||
GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5");
|
||||
|
||||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
|
|
@ -322,10 +322,13 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
List<Integer> splits = IntervalUtils.splitFixedIntervals(locs, files.size());
|
||||
|
||||
int[] counts = {
|
||||
5169, 5573, 10017, 10567, 10551,
|
||||
5087, 4908, 10120, 10435, 10399,
|
||||
5391, 4735, 10621, 10352, 10654,
|
||||
5227, 5256, 10151, 9649, 9825
|
||||
125, 138, 287, 291, 312, 105, 155, 324,
|
||||
295, 298, 141, 121, 285, 302, 282, 88,
|
||||
116, 274, 282, 248
|
||||
// 5169, 5573, 10017, 10567, 10551,
|
||||
// 5087, 4908, 10120, 10435, 10399,
|
||||
// 5391, 4735, 10621, 10352, 10654,
|
||||
// 5227, 5256, 10151, 9649, 9825
|
||||
};
|
||||
|
||||
//String splitCounts = "";
|
||||
|
|
@ -368,9 +371,9 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterContigIntervalsOrder() {
|
||||
List<String> intervals = Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
|
||||
|
||||
List<File> files = testFiles("split.", 3, ".intervals");
|
||||
|
||||
|
|
@ -391,9 +394,9 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testScatterContigIntervalsBasic() {
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
|
||||
|
||||
List<File> files = testFiles("contig_basic.", 3, ".intervals");
|
||||
|
||||
|
|
@ -414,10 +417,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testScatterContigIntervalsLessFiles() {
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3");
|
||||
GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeInterval("chr4");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3");
|
||||
GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4");
|
||||
|
||||
List<File> files = testFiles("contig_less.", 3, ".intervals");
|
||||
|
||||
|
|
@ -446,10 +449,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterContigIntervalsStart() {
|
||||
List<String> intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2");
|
||||
GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeInterval("chr1:1-2");
|
||||
GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeInterval("chr1:4-5");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
||||
GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2");
|
||||
GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
|
||||
|
||||
List<File> files = testFiles("contig_split_start.", 3, ".intervals");
|
||||
|
||||
|
|
@ -472,10 +475,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterContigIntervalsMiddle() {
|
||||
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
||||
GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeInterval("chr2:1-2");
|
||||
GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeInterval("chr2:4-5");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
|
||||
GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2");
|
||||
GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5");
|
||||
GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2");
|
||||
|
||||
List<File> files = testFiles("contig_split_middle.", 3, ".intervals");
|
||||
|
||||
|
|
@ -498,10 +501,10 @@ public class IntervalUtilsUnitTest extends BaseTest {
|
|||
@Test
|
||||
public void testScatterContigIntervalsEnd() {
|
||||
List<String> intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-2");
|
||||
GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeInterval("chr3:1-2");
|
||||
GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeInterval("chr3:4-5");
|
||||
GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1");
|
||||
GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2");
|
||||
GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2");
|
||||
GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5");
|
||||
|
||||
List<File> files = testFiles("contig_split_end.", 3 ,".intervals");
|
||||
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ip
|
|||
def parseGenomeInterval( s : String ) : GenomeLoc = {
|
||||
val sp = s.split("\\s+")
|
||||
// todo -- maybe specify whether the bed format [0,6) --> (1,2,3,4,5) is what's wanted
|
||||
if ( s.contains(":") ) parser.parseGenomeInterval(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn)
|
||||
if ( s.contains(":") ) parser.parseGenomeLoc(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn)
|
||||
}
|
||||
|
||||
object IntervalFormatType extends Enumeration("INTERVALS","BED","TDF") {
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class VCFSimpleMerge extends InProcessFunction {
|
|||
|
||||
def genomeLoc(xrl : PeekableXRL, p : GenomeLocParser ) : GenomeLoc = {
|
||||
var slp = xrl.peek.split("\t",3)
|
||||
return p.parseGenomeLoc(slp(0),Integer.parseInt(slp(1)),Integer.parseInt(slp(1)))
|
||||
return p.createGenomeLoc(slp(0),Integer.parseInt(slp(1)),Integer.parseInt(slp(1)))
|
||||
}
|
||||
|
||||
def run = {
|
||||
|
|
|
|||
|
|
@ -46,9 +46,9 @@ class GATKIntervalsUnitTest {
|
|||
|
||||
@Test
|
||||
def testWithIntervals() {
|
||||
val chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1")
|
||||
val chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-3")
|
||||
val chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:3-5")
|
||||
val chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1")
|
||||
val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3")
|
||||
val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5")
|
||||
|
||||
val gi = new GATKIntervals(hg18Reference, List("chr1:1-1", "chr2:2-3", "chr3:3-5"))
|
||||
Assert.assertEquals(gi.locs.toList, List(chr1, chr2, chr3))
|
||||
|
|
|
|||
Loading…
Reference in New Issue