diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 15c375369..af355ab03 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -655,10 +655,6 @@ public class GenomeAnalysisEngine { argCollection.unsafe == ValidationExclusion.TYPE.ALL); List nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList); - - genomeLocParser.validateGenomeLocList(nonRODIntervals); - genomeLocParser.validateGenomeLocList(rodIntervals); - List allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule); return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 212a09937..fe6495b64 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -795,7 +795,7 @@ public class VariantContextUtils { * @return the genomeLoc */ public static final GenomeLoc getLocation(GenomeLocParser genomeLocParser,VariantContext vc) { - return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd()); + return genomeLocParser.createGenomeLoc(vc.getChr(),(int)vc.getStart(),(int)vc.getEnd(), true); } public abstract static class AlleleMergeRule { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java index 964d61be9..59cd14a22 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableCodec.java @@ -93,7 +93,7 @@ public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec< GenomeLoc loc; String chr = st.nextToken(); if ( chr.indexOf(":") != -1 ) { - loc = genomeLocParser.parseGenomeInterval(chr); + loc = genomeLocParser.parseGenomeLoc(chr); } else { if ( st.countTokens() < 3 ) throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line); @@ -123,7 +123,7 @@ public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec< GenomeLoc loc; if ( values.get(0).indexOf(":") != -1 ) - loc = genomeLocParser.parseGenomeInterval(values.get(0)); + loc = genomeLocParser.parseGenomeLoc(values.get(0)); else loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2))); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java index 580ed08a5..7f97451cf 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/beagle/BeagleCodec.java @@ -177,7 +177,7 @@ public class BeagleCodec implements ReferenceDependentFeatureCodec exon_frames = new ArrayList(eframes.length); for ( int i = 0 ; i < exon_starts.length ; i++ ) { - exons.add(genomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); + exons.add(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) ); exon_frames.add(Integer.decode(eframes[i])); } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java b/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java index 3405f6f92..101784d97 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/utils/StringToGenomeLocIteratorAdapter.java @@ -84,7 +84,7 @@ public class StringToGenomeLocIteratorAdapter implements Iterator { public GenomeLoc next() { - if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeInterval( it.next() ); + if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeLoc(it.next()); return BedParser.parseLocation( genomeLocParser,it.next() ); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 052e6363b..484981e44 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -245,7 +245,7 @@ public class IndelRealigner extends ReadWalker { if (IntervalUtils.isIntervalFile(fileOrInterval)) { merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); } else { - rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeInterval(fileOrInterval)); + rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeLoc(fileOrInterval)); } } if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java index 5f6a342ae..b536a0cb5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java @@ -275,7 +275,7 @@ public class RealignerTargetCreator extends RodWalker= 0 && eventStopPos - eventStartPos < maxIntervalSize; + return getToolkit().getGenomeLocParser().isValidGenomeLoc(loc.getContig(), eventStartPos, eventStopPos, true) && eventStopPos >= 0 && eventStopPos - eventStartPos < maxIntervalSize; } public String toString() { diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java index e9e333d2e..4f8d84965 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/phasing/CalcFullHaplotypesWalker.java @@ -101,7 +101,7 @@ public class CalcFullHaplotypesWalker extends RodWalker { if (waitingHaplotype == null) {// changed to a new contig: // Set the new haplotype to extend from [1, prevPosition] if (prevPosition >= 1) { - GenomeLoc startInterval = getToolkit().getGenomeLocParser().parseGenomeLoc(curLocus.getContig(), 1, prevPosition); + GenomeLoc startInterval = getToolkit().getGenomeLocParser().createGenomeLoc(curLocus.getContig(), 1, prevPosition); waitingHaplotype = new Haplotype(startInterval, sampleHapEntry.getKey()); sampleHapEntry.setValue(waitingHaplotype); } @@ -201,7 +201,7 @@ public class CalcFullHaplotypesWalker extends RodWalker { public void extend(int stop) { if (stop > interval.getStop()) - interval = getToolkit().getGenomeLocParser().parseGenomeLoc(interval.getContig(), interval.getStart(), stop); + interval = getToolkit().getGenomeLocParser().createGenomeLoc(interval.getContig(), interval.getStart(), stop); } public void incrementHetCount() { diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/java/src/org/broadinstitute/sting/utils/GenomeLoc.java index f94586183..a8b774986 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLoc.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLoc.java @@ -13,7 +13,9 @@ import java.io.Serializable; * Date: Mar 2, 2009 * Time: 8:50:11 AM * - * Genome location representation. It is *** 1 *** based closed. + * Genome location representation. It is *** 1 *** based closed. Note that GenomeLocs start and stop values + * can be any positive or negative number, by design. Bound validation is a feature of the GenomeLocParser, + * and not a fundamental constraint of the GenomeLoc */ public class GenomeLoc implements Comparable, Serializable, HasGenomeLocation { /** @@ -48,7 +50,7 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome @Requires({ "contig != null", "contigIndex >= 0", // I believe we aren't allowed to create GenomeLocs without a valid contigIndex - "start >= 0 && start <= stop"}) + "start <= stop"}) protected GenomeLoc( final String contig, final int contigIndex, final int start, final int stop ) { this.contigName = contig; this.contigIndex = contigIndex; @@ -122,8 +124,7 @@ public class GenomeLoc implements Comparable, Serializable, HasGenome */ @Requires({ "that != null", - "! isUnmapped(this)", - "! isUnmapped(that)"}) + "isUnmapped(this) == isUnmapped(that)"}) @Ensures("result != null") public GenomeLoc merge( GenomeLoc that ) throws ReviewedStingException { if(GenomeLoc.isUnmapped(this) || GenomeLoc.isUnmapped(that)) { diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java index 42c17f104..732d52b2b 100644 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java @@ -25,8 +25,6 @@ package org.broadinstitute.sting.utils; -import java.util.List; - import com.google.java.contract.*; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMRecord; @@ -178,6 +176,21 @@ public class GenomeLocParser { } } + /** + * Determines whether the given contig is valid with respect to the sequence dictionary + * already installed in the GenomeLoc. + * + * @return True if the contig is valid. False otherwise. + */ + public boolean contigIsInDictionary(String contig) { + return contig != null && contigInfo.hasContig(contig); + } + + public boolean indexIsInDictionary(final int index) { + return index >= 0 && contigInfo.hasContig(index); + } + + /** * get the contig's SAMSequenceRecord * @@ -185,27 +198,14 @@ public class GenomeLocParser { * * @return the sam sequence record */ - @Requires({"contig != null", "contigIsInDictionary(contig)"}) @Ensures("result != null") + @ThrowEnsures({"UserException.MalformedGenomeLoc", "!contigIsInDictionary(contig) || contig == null"}) public SAMSequenceRecord getContigInfo(final String contig) { + if ( contig == null || ! contigIsInDictionary(contig) ) + throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); return contigInfo.getSequence(contig); } - /** - * Determines whether the given contig is valid with respect to the sequence dictionary - * already installed in the GenomeLoc. - * - * @return True if the contig is valid. False otherwise. - */ - @Requires({"contig != null"}) - public boolean contigIsInDictionary(String contig) { - return contigInfo.hasContig(contig); - } - - public boolean indexIsInDictionary(final int index) { - return contigInfo.hasContig(index); - } - /** * Returns the contig index of a specified string version of the contig * @@ -213,47 +213,126 @@ public class GenomeLocParser { * * @return the contig index, -1 if not found */ - @Requires("contig != null") @Ensures("result >= 0") + @ThrowEnsures({"UserException.MalformedGenomeLoc", "!contigIsInDictionary(contig) || contig == null"}) public int getContigIndex(final String contig) { - if (! contigInfo.hasContig(contig) ) - throw new UserException.MalformedGenomeLoc(String.format("Contig %s given as location, but this contig isn't present in the Fasta sequence dictionary", contig)); - return contigInfo.getSequenceIndex(contig); + return getContigInfo(contig).getSequenceIndex(); } @Requires("contig != null") protected int getContigIndexWithoutException(final String contig) { - if (! contigInfo.hasContig(contig)) + if ( contig == null || ! contigInfo.hasContig(contig) ) return -1; return contigInfo.getSequenceIndex(contig); } + // -------------------------------------------------------------------------------------------------------------- + // + // Low-level creation functions + // + // -------------------------------------------------------------------------------------------------------------- + /** + * create a genome loc, given the contig name, start, and stop + * + * @param contig the contig name + * @param start the starting position + * @param stop the stop position + * + * @return a new genome loc + */ + @Ensures("result != null") + @ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop)"}) + public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) { + return createGenomeLoc(contig, getContigIndex(contig), start, stop); + } + + public GenomeLoc createGenomeLoc(String contig, final int start, final int stop, boolean mustBeOnReference) { + return createGenomeLoc(contig, getContigIndex(contig), start, stop, mustBeOnReference); + } + + @ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop, false)"}) + public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop) { + return createGenomeLoc(contig, index, start, stop, false); + } + + @ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, start, stop,mustBeOnReference)"}) + public GenomeLoc createGenomeLoc(String contig, int index, final int start, final int stop, boolean mustBeOnReference) { + validateGenomeLoc(contig, index, start, stop, mustBeOnReference, true); + return new GenomeLoc(contig, index, start, stop); + } + + /** + * validate a position or interval on the genome as valid + * + * Requires that contig exist in the master sequence dictionary, and that contig index be valid as well. Requires + * that start <= stop. + * + * if mustBeOnReference is true, + * performs boundary validation for genome loc INTERVALS: + * start and stop are on contig and start <= stop + * + * @param contig the contig name + * @param start the start position + * @param stop the stop position + * + * @return true if it's valid, false otherwise. If exceptOnError, then throws a UserException if invalid + */ + private boolean validateGenomeLoc(String contig, int contigIndex, int start, int stop, boolean mustBeOnReference, boolean exceptOnError) { + if ( ! contigInfo.hasContig(contig) ) + return vglHelper(exceptOnError, String.format("Unknown contig %s", contig)); + + if (stop < start) + return vglHelper(exceptOnError, String.format("The stop position %d is less than start %d", stop, start)); + + if (contigIndex < 0) + return vglHelper(exceptOnError, String.format("The contig index %d is less than 0", contigIndex)); + + if (contigIndex >= contigInfo.getNSequences()) + return vglHelper(exceptOnError, String.format("The contig index %d is greater than the stored sequence count (%d)", contigIndex, contigInfo.getNSequences())); + + if ( mustBeOnReference ) { + if (start < 0) + return vglHelper(exceptOnError, String.format("The start position %d is less than 0", start)); + + if (stop < 0) + return vglHelper(exceptOnError, String.format("The stop position %d is less than 0", stop)); + + int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength(); + if (start > contigSize || stop > contigSize) + return vglHelper(exceptOnError, String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize)); + } + + // we passed + return true; + } + + public boolean isValidGenomeLoc(String contig, int start, int stop, boolean mustBeOnReference ) { + return validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, mustBeOnReference, false); + } + + public boolean isValidGenomeLoc(String contig, int start, int stop ) { + return validateGenomeLoc(contig, getContigIndexWithoutException(contig), start, stop, true, false); + } + + private boolean vglHelper(boolean exceptOnError, String msg) { + if ( exceptOnError ) + throw new UserException.MalformedGenomeLoc("Parameters to GenomeLocParser are incorrect:" + msg); + else + return false; + } + + // -------------------------------------------------------------------------------------------------------------- + // + // Parsing genome locs + // + // -------------------------------------------------------------------------------------------------------------- + /** * parse a genome interval, from a location string * * Performs interval-style validation: * - * contig is valid; start and stop less than the end; start <= sto - * @param str the string to parse - * - * @return a GenomeLoc representing the String - * - */ - @Requires("str != null") - @Ensures("result != null") - public GenomeLoc parseGenomeInterval(final String str) { - GenomeLoc ret = parseGenomeLoc(str); - exceptionOnInvalidGenomeLocBounds(ret); - return ret; - } - - /** - * parse a genome location, from a location string - * - * Performs read-style validation: - * checks that start and stop are positive, start < stop, and the contig is valid - * does not check that genomeLoc is actually on the contig - * + * contig is valid; start and stop less than the end; start <= stop, and start/stop are on the contig * @param str the string to parse * * @return a GenomeLoc representing the String @@ -296,22 +375,15 @@ public class GenomeLocParser { // is the contig valid? if (!contigIsInDictionary(contig)) - throw new UserException("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); + throw new UserException.MalformedGenomeLoc("Contig '" + contig + "' does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); if (stop == Integer.MAX_VALUE) // lookup the actually stop position! stop = getContigInfo(contig).getSequenceLength(); - GenomeLoc locus = new GenomeLoc(contig, getContigIndex(contig), start, stop); - exceptionOnInvalidGenomeLoc(locus); - return locus; + return createGenomeLoc(contig, getContigIndex(contig), start, stop, true); } - // -------------------------------------------------------------------------------------------------------------- - // - // Parsing string representations - // - // -------------------------------------------------------------------------------------------------------------- /** * Parses a number like 1,000,000 into a long. * @param pos @@ -342,49 +414,11 @@ public class GenomeLocParser { } } - /** - * Use this static constructor when the input data is under limited control (i.e. parsing user data). - * - * @param contig Contig to parse. - * @param start Starting point. - * @param stop Stop point. - * - * @return The genome location, or a MalformedGenomeLocException if unparseable. - * - * Validation: only checks that contig is valid - * start/stop could be anything - */ - public GenomeLoc parseGenomeLoc(final String contig, int start, int stop) { - if (!contigIsInDictionary(contig)) - throw new MalformedGenomeLocException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference; are you sure you are using the correct reference fasta file?"); - return new GenomeLoc(contig, getContigIndex(contig), start, stop); - } - - /** - * get the sequence name from a sequence index - * - * @param contigIndex get the contig index - * - * @return the string that represents that contig name - */ - @Requires("indexIsInDictionary(contigIndex)") - @Ensures("result != null") - private String getSequenceNameFromIndex(int contigIndex) { - return contigInfo.getSequence(contigIndex).getSequenceName(); - } - - /** - * create a genome loc, given the contig name, start, and stop - * - * @param contig the contig name - * @param start the starting position - * @param stop the stop position - * - * @return a new genome loc - */ - public GenomeLoc createGenomeLoc(String contig, final int start, final int stop) { - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig), start, stop)); - } + // -------------------------------------------------------------------------------------------------------------- + // + // Parsing string representations + // + // -------------------------------------------------------------------------------------------------------------- /** * create a genome loc, given a read. If the read is unmapped, *and* yet the read has a contig and start position, @@ -394,183 +428,44 @@ public class GenomeLocParser { * * @return */ + @Requires("read != null") + @Ensures("result != null") public GenomeLoc createGenomeLoc(final SAMRecord read) { - GenomeLoc loc; - if ( read.getReadUnmappedFlag() && read.getReferenceIndex() == -1 ) // read is unmapped and not placed anywhere on the genome - loc = GenomeLoc.UNMAPPED; + return GenomeLoc.UNMAPPED; else { int end = read.getReadUnmappedFlag() ? read.getAlignmentStart() : read.getAlignmentEnd(); - loc = new GenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end); + return createGenomeLoc(read.getReferenceName(), read.getReferenceIndex(), read.getAlignmentStart(), end, false); } - - return exceptionOnInvalidGenomeLoc(loc); } /** - * create a new genome loc, given the contig name, and a single position + * create a new genome loc, given the contig name, and a single position. Must be on the reference * * @param contig the contig name * @param pos the postion * * @return a genome loc representing a single base at the specified postion on the contig */ + @Ensures("result != null") + @ThrowEnsures({"UserException.MalformedGenomeLoc", "!isValidGenomeLoc(contig, pos, pos, true)"}) public GenomeLoc createGenomeLoc(final String contig, final int pos) { - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contig, getContigIndex(contig), pos, pos)); + return createGenomeLoc(contig, getContigIndex(contig), pos, pos); } - /** - * Creates a new GenomeLoc without performing any validation on its contig or bounds. - * FOR UNIT TESTING PURPOSES ONLY! - * - * @param contig the contig name - * @param start start position of the interval - * @param stop stop position of the interval - * @return a new GenomeLoc representing the specified location - */ - public GenomeLoc createGenomeLocWithoutValidation( String contig, int start, int stop ) { - return new GenomeLoc(contig, getContigIndexWithoutException(contig), start, stop); - } - - /** - * verify the specified genome loc is valid, if it's not, throw an exception - * Will not verify the location against contig bounds. - * - * - * Validation: - * checks that start and stop are positive, start < stop, and the contig is valid - * does not check that genomeLoc is actually on the contig, so start could be > end of contig - * - * @param toReturn the genome loc we're about to return - * - * @return the genome loc if it's valid, otherwise we throw an exception - * - */ - private GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) { - if ( GenomeLoc.isUnmapped(toReturn) ) { - return toReturn; - } - - if (toReturn.getStart() < 0) { - throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the start position is less than 0 " + - "in interval: " + toReturn); - } - if ((toReturn.getStop() != -1) && (toReturn.getStop() < 0)) { - throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the stop position is less than 0 " + - "in interval: " + toReturn); - } - if (toReturn.getContigIndex() < 0) { - throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the contig index is less than 0 " + - "in interval: " + toReturn); - } - if (toReturn.getContigIndex() >= contigInfo.getNSequences()) { - throw new ReviewedStingException("Parameters to GenomeLocParser are incorrect: the contig index is greater than " + - "the stored sequence count in interval: " + toReturn); - - } - return toReturn; - - } - - /** - * Verify the locus against the bounds of the contig. - * - * performs boundary validation for genome loc INTERVALS: - * start and stop are on contig and start <= stop - * does NOT check that start and stop > 0, or that contig is valid - * for that reason, this function should only be called AFTER exceptionOnInvalidGenomeLoc() - * exceptionOnInvalidGenomeLoc isn't included in this function to save time - * - * @param locus Locus to verify. - */ - private void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) { - if ( GenomeLoc.isUnmapped(locus) ) { - return; - } - - int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength(); - if(locus.getStart() > contigSize) - throw new UserException.MalformedGenomeLoc("GenomeLoc is invalid: locus start is after the end of contig",locus); - if(locus.getStop() > contigSize) - throw new UserException.MalformedGenomeLoc("GenomeLoc is invalid: locus stop is after the end of contig",locus); - if (locus.getStart() > locus.getStop()) { - throw new UserException.MalformedGenomeLoc("Parameters to GenomeLocParser are incorrect: the start position is " + - "greater than the end position", locus); - } - } - - /** - * Validates each of the GenomeLocs in the list passed as an argument: each GenomeLoc must refer to a - * valid contig, and its bounds must be within the bounds of its contig. Throws a MalformedGenomeLoc - * exception if an invalid GenomeLoc is found. - * - * @param genomeLocList The list of GenomeLocs to validate - */ - public void validateGenomeLocList( List genomeLocList ) { - if ( genomeLocList == null ) { - return; - } - - for ( GenomeLoc loc : genomeLocList ) { - try { - exceptionOnInvalidGenomeLoc(loc); - exceptionOnInvalidGenomeLocBounds(loc); - } - catch ( UserException e ) { - throw e; - } - catch ( ReviewedStingException e ) { - throw new UserException.MalformedGenomeLoc(e.getMessage()); - } - } - } - - /** - * validate a position or interval on the genome as valid - * - * @param contig the contig name - * @param start the start position - * @param stop the stop position - * - * @return true if it's valid, false otherwise - * - * performs interval-style validation: contig is valid and atart and stop less than the end - */ - public boolean validGenomeLoc(String contig, int start, int stop) { - if ( ! contigInfo.hasContig(contig) ) - return false; - else - return validGenomeLoc(getContigIndex(contig), start, stop); - } - - /** - * validate a position or interval on the genome as valid - * - * @param contigIndex the contig name - * @param start the start position - * @param stop the stop position - * - * @return true if it's valid, false otherwise - * - * performs interval-style validation: contig is valid and atart and stop less than the end - */ - public boolean validGenomeLoc(int contigIndex, int start, int stop) { - // quick check before we get the contig size, is the contig number valid - if ((contigIndex < 0) || // the contig index has to be positive - (contigIndex >= contigInfo.getNSequences())) // the contig must be in the integer range of contigs) - return false; - - int contigSize = contigInfo.getSequence(contigIndex).getSequenceLength(); - if ((start < 0) || // start must be greater than 0 - (stop < 0 ) || // the stop must be greater than 0 - (start > contigSize) || // the start must be before or equal to the contig end - (stop > contigSize)) // the stop must also be before or equal to the contig end - return false; - - // we passed - return true; - } +// /** +// * Creates a new GenomeLoc without performing any validation on its contig or bounds. +// * FOR UNIT TESTING PURPOSES ONLY! +// * +// * @param contig the contig name +// * @param start start position of the interval +// * @param stop stop position of the interval +// * @return a new GenomeLoc representing the specified location +// */ +// public GenomeLoc createGenomeLocWithoutValidation( String contig, int start, int stop ) { +// return new GenomeLoc(contig, getContigIndexWithoutException(contig), start, stop); +// } /** * create a new genome loc from an existing loc, with a new start position @@ -579,12 +474,11 @@ public class GenomeLocParser { * * @param loc the old location * @param start a new start position - * TODO -- move to me GenomeLoc class itself * * @return the newly created genome loc */ public GenomeLoc setStart(GenomeLoc loc, int start) { - return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop())); + return createGenomeLoc(loc.getContig(), loc.getContigIndex(), start, loc.getStop()); } /** @@ -595,11 +489,10 @@ public class GenomeLocParser { * @param loc the old location * @param stop a new stop position * - * TODO -- move to me GenomeLoc class itself * @return */ public GenomeLoc setStop(GenomeLoc loc, int stop) { - return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop)); + return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start, stop); } /** @@ -607,7 +500,6 @@ public class GenomeLocParser { * * @param loc the old location * - * TODO -- move to me GenomeLoc class itself * @return a new genome loc */ public GenomeLoc incPos(GenomeLoc loc) { @@ -620,11 +512,10 @@ public class GenomeLocParser { * @param loc the old location * @param by how much to move the start and stop by * - * TODO -- move to me GenomeLoc class itself * @return a new genome loc */ public GenomeLoc incPos(GenomeLoc loc, int by) { - return exceptionOnInvalidGenomeLoc(new GenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by)); + return createGenomeLoc(loc.getContig(), loc.getContigIndex(), loc.start + by, loc.stop + by); } /** @@ -636,6 +527,7 @@ public class GenomeLocParser { @Ensures("result != null") public GenomeLoc createOverEntireContig(String contigName) { SAMSequenceRecord contig = contigInfo.getSequence(contigName); - return exceptionOnInvalidGenomeLoc(new GenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength())); - } + return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true); + } + } diff --git a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java index efcc27cf2..fd7a79f48 100755 --- a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java +++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java @@ -253,12 +253,14 @@ public class GenomeLocSortedSet extends AbstractSet { * |------| + |--------| * */ - GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), g.getStart(), e.getStart() - 1); - GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), e.getStop() + 1, g.getStop()); - if (after.getStop() - after.getStart() >= 0) { + int afterStop = g.getStop(), afterStart = e.getStop() + 1; + int beforeStop = e.getStart() - 1, beforeStart = g.getStart(); + if (afterStop - afterStart >= 0) { + GenomeLoc after = genomeLocParser.createGenomeLoc(g.getContig(), afterStart, afterStop); l.add(after); } - if (before.getStop() - before.getStart() >= 0) { + if (beforeStop - beforeStart >= 0) { + GenomeLoc before = genomeLocParser.createGenomeLoc(g.getContig(), beforeStart, beforeStop); l.add(before); } diff --git a/java/src/org/broadinstitute/sting/utils/MalformedGenomeLocException.java b/java/src/org/broadinstitute/sting/utils/MalformedGenomeLocException.java deleted file mode 100644 index ce554397c..000000000 --- a/java/src/org/broadinstitute/sting/utils/MalformedGenomeLocException.java +++ /dev/null @@ -1,39 +0,0 @@ -package org.broadinstitute.sting.utils; - -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -/** - * User: hanna - * Date: Jun 2, 2009 - * Time: 11:43:48 AM - * BROAD INSTITUTE SOFTWARE COPYRIGHT NOTICE AND AGREEMENT - * Software and documentation are copyright 2005 by the Broad Institute. - * All rights are reserved. - * - * Users acknowledge that this software is supplied without any warranty or support. - * The Broad Institute is not responsible for its use, misuse, or - * functionality. - */ - -/** - * Indicates that something was wrong with in the parameters passed to create a GenomeLoc... - * bad sequence id out of bounds, etc. - */ - -public class MalformedGenomeLocException extends ReviewedStingException { - /** - * Create a new MalformedGenomeLocException with the given message. Does not preserve the existing stack trace. - * @param message The message. - */ - public MalformedGenomeLocException( String message ) { - super(message); - } - - /** - * Create a new MalformedGenomeLocException with the given message and root cause. - * @param message The message. - * @param t The root cause. - */ - public MalformedGenomeLocException( String message, Throwable t ) { - super(message,t); - } -} diff --git a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java index b7ca11f9c..abcae066f 100644 --- a/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ b/java/src/org/broadinstitute/sting/utils/bed/BedParser.java @@ -88,7 +88,7 @@ public class BedParser { } // we currently drop the rest of the bed record, which can contain names, scores, etc - return genomeLocParser.createGenomeLoc(contig, start, stop); + return genomeLocParser.createGenomeLoc(contig, start, stop, true); } diff --git a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index fea7d08d8..80dc35455 100644 --- a/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.utils.interval; import net.sf.picard.util.Interval; import net.sf.picard.util.IntervalList; import net.sf.samtools.SAMFileHeader; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.GenomeLoc; @@ -24,6 +25,8 @@ import java.io.File; * @version 0.1 */ public class IntervalUtils { + private static Logger logger = Logger.getLogger(IntervalUtils.class); + /** * Turns a set of strings describing intervals into a parsed set of intervals. Valid string elements can be files, * intervals in samtools notation (chrA:B-C), or some combination of the above separated by semicolons. Additionally, @@ -70,7 +73,7 @@ public class IntervalUtils { // otherwise treat as an interval -> parse and add to raw interval list else { - rawIntervals.add(parser.parseGenomeInterval(fileOrInterval)); + rawIntervals.add(parser.parseGenomeLoc(fileOrInterval)); } } } @@ -103,28 +106,41 @@ public class IntervalUtils { * first try to read it as a Picard interval file since that's well structured * we'll fail quickly if it's not a valid file. */ + boolean isPicardInterval = false; try { // Note: Picard will skip over intervals with contigs not in the sequence dictionary IntervalList il = IntervalList.fromFile(inputFile); + isPicardInterval = true; + int nInvalidIntervals = 0; for (Interval interval : il.getIntervals()) { - ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd())); + if ( glParser.isValidGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true)) + ret.add(glParser.createGenomeLoc(interval.getSequence(), interval.getStart(), interval.getEnd(), true)); + else { + nInvalidIntervals++; + } } + if ( nInvalidIntervals > 0 ) + logger.warn("Ignoring " + nInvalidIntervals + " invalid intervals from " + inputFile); } // if that didn't work, try parsing file as a GATK interval file catch (Exception e) { - try { - XReadLines reader = new XReadLines(new File(file_name)); - for(String line: reader) { - if ( line.trim().length() > 0 ) { - ret.add(glParser.parseGenomeInterval(line)); + if ( isPicardInterval ) // definitely a picard file, but we failed to parse + throw new UserException.CouldNotReadInputFile(inputFile, e); + else { + try { + XReadLines reader = new XReadLines(new File(file_name)); + for(String line: reader) { + if ( line.trim().length() > 0 ) { + ret.add(glParser.parseGenomeLoc(line)); + } } + reader.close(); + } + catch (IOException e2) { + throw new UserException.CouldNotReadInputFile(inputFile, e2); } - reader.close(); - } - catch (IOException e2) { - throw new UserException.CouldNotReadInputFile(inputFile, e2); } } } diff --git a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index e4a714e4a..a17c162f5 100644 --- a/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -113,7 +113,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { String contig, int intervalStart, int intervalEnd ) throws Exception { List intervalArgs = new ArrayList(); - List rodIntervals = Arrays.asList(genomeLocParser.createGenomeLocWithoutValidation(contig, intervalStart, intervalEnd)); + List rodIntervals = Arrays.asList(genomeLocParser.createGenomeLoc(contig, intervalStart, intervalEnd, true)); testEngine.loadIntervals(intervalArgs, rodIntervals); } @@ -124,7 +124,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { // We need to adjust intervalStart, since BED intervals are 0-based. We don't need to adjust intervalEnd, // since the ending point is an open interval. File bedFile = createTempFile("testInvalidBedIntervalHandling", ".bed", - String.format("%s %d %d", contig, intervalStart - 1, intervalEnd)); + String.format("%s %d %d", contig, intervalStart -1, intervalEnd)); List intervalArgs = Arrays.asList(bedFile.getAbsolutePath()); List rodIntervals = new ArrayList(); diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java index 6fce044c9..4de413334 100644 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantJEXLContextUnitTest.java @@ -80,7 +80,7 @@ public class VariantJEXLContextUnitTest extends BaseTest { } catch (Exception e) { Assert.fail("Unable to create expression" + e.getMessage()); } - snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10); + snpLoc = genomeLocParser.createGenomeLoc("chr1", 10, 10, true); } @BeforeMethod diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java index 372018365..acfefd627 100755 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java @@ -275,7 +275,7 @@ public abstract class LocusViewTemplate extends BaseTest { private static ReferenceSequenceFile fakeReferenceSequenceFile() { return new ReferenceSequenceFile() { public SAMSequenceDictionary getSequenceDictionary() { - SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("chr1"); + SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("chr1", 1000000); SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Collections.singletonList(sequenceRecord)); return dictionary; } diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java index 99653094d..f1f849bf5 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java @@ -29,7 +29,7 @@ public class GenomeLocParserUnitTest extends BaseTest { genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); } - @Test(expectedExceptions=RuntimeException.class) + @Test(expectedExceptions=UserException.MalformedGenomeLoc.class) public void testGetContigIndex() { assertEquals(genomeLocParser.getContigIndex("blah"), -1); // should not be in the reference } @@ -78,9 +78,9 @@ public class GenomeLocParserUnitTest extends BaseTest { @Test public void testParseGoodString() { - GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-100"); + GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-10"); assertEquals(0, loc.getContigIndex()); - assertEquals(loc.getStop(), 100); + assertEquals(loc.getStop(), 10); assertEquals(loc.getStart(), 1); } @@ -165,7 +165,7 @@ public class GenomeLocParserUnitTest extends BaseTest { assertEquals(loc.getStart(), 1); } - @Test(expectedExceptions=ReviewedStingException.class) + @Test(expectedExceptions=UserException.class) public void testGenomeLocBad2() { GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1-500-0"); assertEquals(loc.getContigIndex(), 0); @@ -173,7 +173,7 @@ public class GenomeLocParserUnitTest extends BaseTest { assertEquals(loc.getStart(), 1); } - @Test(expectedExceptions=ReviewedStingException.class) + @Test(expectedExceptions=UserException.class) public void testGenomeLocBad3() { GenomeLoc loc = genomeLocParser.parseGenomeLoc("chr1:1--0"); assertEquals(loc.getContigIndex(), 0); @@ -184,19 +184,11 @@ public class GenomeLocParserUnitTest extends BaseTest { // test out the validating methods @Test public void testValidationOfGenomeLocs() { - assertTrue(genomeLocParser.validGenomeLoc("chr1",1,1)); - assertTrue(!genomeLocParser.validGenomeLoc("chr2",1,1)); // shouldn't have an entry - assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,11)); // past the end of the contig - assertTrue(!genomeLocParser.validGenomeLoc("chr1",-1,10)); // bad start - assertTrue(!genomeLocParser.validGenomeLoc("chr1",1,-2)); // bad stop - assertTrue(!genomeLocParser.validGenomeLoc("chr1",10,11)); // bad start, past end - - assertTrue(genomeLocParser.validGenomeLoc(0,1,1)); - assertTrue(!genomeLocParser.validGenomeLoc(1,1,1)); // shouldn't have an entry - assertTrue(!genomeLocParser.validGenomeLoc(0,1,11)); // past the end of the contig - assertTrue(!genomeLocParser.validGenomeLoc(-1,0,10)); // bad start - assertTrue(!genomeLocParser.validGenomeLoc(0,1,-2)); // bad stop - assertTrue(!genomeLocParser.validGenomeLoc(0,10,11)); // bad start, past end - + assertTrue(genomeLocParser.isValidGenomeLoc("chr1",1,1)); + assertTrue(!genomeLocParser.isValidGenomeLoc("chr2",1,1)); // shouldn't have an entry + assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,11)); // past the end of the contig + assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",-1,10)); // bad start + assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop + assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end } } diff --git a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java index bd2fbb357..bb892eec8 100644 --- a/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java @@ -138,7 +138,7 @@ public class IntervalUtilsUnitTest extends BaseTest { return hg18ReferenceLocs; List locs = new ArrayList(); for (String interval: intervals) - locs.add(hg18GenomeLocParser.parseGenomeInterval(interval)); + locs.add(hg18GenomeLocParser.parseGenomeLoc(interval)); return locs; } @@ -167,9 +167,9 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testFixedScatterIntervalsBasic() { - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3"); List files = testFiles("basic.", 3, ".intervals"); @@ -192,10 +192,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterFixedIntervalsLessFiles() { - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3"); - GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeInterval("chr4"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3"); + GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4"); List files = testFiles("less.", 3, ".intervals"); @@ -234,10 +234,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterFixedIntervalsStart() { List intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"); - GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeInterval("chr1:1-2"); - GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeInterval("chr1:4-5"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2"); + GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2"); + GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2"); List files = testFiles("split.", 3, ".intervals"); @@ -262,10 +262,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterFixedIntervalsMiddle() { List intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2"); - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1"); - GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeInterval("chr2:1-2"); - GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeInterval("chr2:4-5"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1"); + GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2"); + GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2"); List files = testFiles("split.", 3, ".intervals"); @@ -290,10 +290,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterFixedIntervalsEnd() { List intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5"); - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-2"); - GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeInterval("chr3:1-2"); - GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeInterval("chr3:4-5"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2"); + GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2"); + GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5"); List files = testFiles("split.", 3, ".intervals"); @@ -322,10 +322,13 @@ public class IntervalUtilsUnitTest extends BaseTest { List splits = IntervalUtils.splitFixedIntervals(locs, files.size()); int[] counts = { - 5169, 5573, 10017, 10567, 10551, - 5087, 4908, 10120, 10435, 10399, - 5391, 4735, 10621, 10352, 10654, - 5227, 5256, 10151, 9649, 9825 + 125, 138, 287, 291, 312, 105, 155, 324, + 295, 298, 141, 121, 285, 302, 282, 88, + 116, 274, 282, 248 +// 5169, 5573, 10017, 10567, 10551, +// 5087, 4908, 10120, 10435, 10399, +// 5391, 4735, 10621, 10352, 10654, +// 5227, 5256, 10151, 9649, 9825 }; //String splitCounts = ""; @@ -368,9 +371,9 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterContigIntervalsOrder() { List intervals = Arrays.asList("chr2:1-1", "chr1:1-1", "chr3:2-2"); - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2"); List files = testFiles("split.", 3, ".intervals"); @@ -391,9 +394,9 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterContigIntervalsBasic() { - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3"); List files = testFiles("contig_basic.", 3, ".intervals"); @@ -414,10 +417,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterContigIntervalsLessFiles() { - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3"); - GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeInterval("chr4"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3"); + GenomeLoc chr4 = hg18GenomeLocParser.parseGenomeLoc("chr4"); List files = testFiles("contig_less.", 3, ".intervals"); @@ -446,10 +449,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterContigIntervalsStart() { List intervals = Arrays.asList("chr1:1-2", "chr1:4-5", "chr2:1-1", "chr3:2-2"); - GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeInterval("chr1:1-2"); - GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeInterval("chr1:4-5"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:1-1"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2"); + GenomeLoc chr1a = hg18GenomeLocParser.parseGenomeLoc("chr1:1-2"); + GenomeLoc chr1b = hg18GenomeLocParser.parseGenomeLoc("chr1:4-5"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:1-1"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2"); List files = testFiles("contig_split_start.", 3, ".intervals"); @@ -472,10 +475,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterContigIntervalsMiddle() { List intervals = Arrays.asList("chr1:1-1", "chr2:1-2", "chr2:4-5", "chr3:2-2"); - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1"); - GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeInterval("chr2:1-2"); - GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeInterval("chr2:4-5"); - GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:2-2"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1"); + GenomeLoc chr2a = hg18GenomeLocParser.parseGenomeLoc("chr2:1-2"); + GenomeLoc chr2b = hg18GenomeLocParser.parseGenomeLoc("chr2:4-5"); + GenomeLoc chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:2-2"); List files = testFiles("contig_split_middle.", 3, ".intervals"); @@ -498,10 +501,10 @@ public class IntervalUtilsUnitTest extends BaseTest { @Test public void testScatterContigIntervalsEnd() { List intervals = Arrays.asList("chr1:1-1", "chr2:2-2", "chr3:1-2", "chr3:4-5"); - GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1"); - GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-2"); - GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeInterval("chr3:1-2"); - GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeInterval("chr3:4-5"); + GenomeLoc chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1"); + GenomeLoc chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-2"); + GenomeLoc chr3a = hg18GenomeLocParser.parseGenomeLoc("chr3:1-2"); + GenomeLoc chr3b = hg18GenomeLocParser.parseGenomeLoc("chr3:4-5"); List files = testFiles("contig_split_end.", 3 ,".intervals"); diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala index 8ae8b7192..77eb3ccbc 100755 --- a/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala @@ -121,7 +121,7 @@ class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ip def parseGenomeInterval( s : String ) : GenomeLoc = { val sp = s.split("\\s+") // todo -- maybe specify whether the bed format [0,6) --> (1,2,3,4,5) is what's wanted - if ( s.contains(":") ) parser.parseGenomeInterval(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn) + if ( s.contains(":") ) parser.parseGenomeLoc(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn) } object IntervalFormatType extends Enumeration("INTERVALS","BED","TDF") { diff --git a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFSimpleMerge.scala b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFSimpleMerge.scala index c193b63e8..a3c493179 100755 --- a/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFSimpleMerge.scala +++ b/scala/src/org/broadinstitute/sting/queue/library/ipf/vcf/VCFSimpleMerge.scala @@ -44,7 +44,7 @@ class VCFSimpleMerge extends InProcessFunction { def genomeLoc(xrl : PeekableXRL, p : GenomeLocParser ) : GenomeLoc = { var slp = xrl.peek.split("\t",3) - return p.parseGenomeLoc(slp(0),Integer.parseInt(slp(1)),Integer.parseInt(slp(1))) + return p.createGenomeLoc(slp(0),Integer.parseInt(slp(1)),Integer.parseInt(slp(1))) } def run = { diff --git a/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala b/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala index 52720b940..b3a2d23ae 100644 --- a/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala +++ b/scala/test/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervalsUnitTest.scala @@ -46,9 +46,9 @@ class GATKIntervalsUnitTest { @Test def testWithIntervals() { - val chr1 = hg18GenomeLocParser.parseGenomeInterval("chr1:1-1") - val chr2 = hg18GenomeLocParser.parseGenomeInterval("chr2:2-3") - val chr3 = hg18GenomeLocParser.parseGenomeInterval("chr3:3-5") + val chr1 = hg18GenomeLocParser.parseGenomeLoc("chr1:1-1") + val chr2 = hg18GenomeLocParser.parseGenomeLoc("chr2:2-3") + val chr3 = hg18GenomeLocParser.parseGenomeLoc("chr3:3-5") val gi = new GATKIntervals(hg18Reference, List("chr1:1-1", "chr2:2-3", "chr3:3-5")) Assert.assertEquals(gi.locs.toList, List(chr1, chr2, chr3))