Reorganizing the way interval arguments are processed

Most of the changes occur in GenomeAnalysisEngine.java and GenomeLocParser.java: 
-- parseIntervalRegion and parseGenomeLocs combined into parseIntervalArguments
-- initializeIntervals modified
-- some helper functions deprecated for cleanliness
Includes new set of unit tests, GenomeAnalysisEngineTest.java

New restrictions: 
-- all interval arguments are now checked to be on the reference contig
-- all interval files must have one of the following extensions: .picard, .bed, .list, .intervals, .interval_list



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3106 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
bthomas 2010-04-01 12:47:48 +00:00
parent c3c6e632d1
commit b4f6f54502
7 changed files with 239 additions and 226 deletions

View File

@ -141,7 +141,7 @@ public class GenomeAnalysisEngine {
throw new StingException("The GATKArgumentCollection passed to GenomeAnalysisEngine can not be null.");
}
// validate our parameters
// validate our parameters
if (my_walker == null)
throw new StingException("The walker passed to GenomeAnalysisEngine can not be null.");
@ -174,35 +174,105 @@ public class GenomeAnalysisEngine {
* Setup the intervals to be processed
*/
private void initializeIntervals() {
GenomeLocSortedSet excludeIntervals = null;
if (argCollection.excludeIntervals != null && argCollection.intervalMerging.check()) {
List<GenomeLoc> rawExcludeIntervals = parseIntervalRegion(argCollection.excludeIntervals, IntervalMergingRule.ALL);
excludeIntervals = GenomeLocSortedSet.createSetFromList(rawExcludeIntervals);
}
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
List <GenomeLoc> parsedIntervals = parseIntervalRegion(argCollection.intervals);
intervals = (parsedIntervals == null) ? null: GenomeLocSortedSet.createSetFromList(parsedIntervals);
}
/*
if (argCollection.intervals != null && argCollection.intervalMerging.check()) {
intervals = GenomeLocSortedSet.createSetFromList(parseIntervalRegion(argCollection.intervals));
}
*/
if ( excludeIntervals != null ) {
GenomeLocSortedSet toPrune = intervals == null ? GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getSequenceDictionary()) : intervals;
long toPruneSize = toPrune.coveredSize();
long toExcludeSize = excludeIntervals.coveredSize();
logger.info(String.format("Initial include intervals cover %d bases", toPruneSize));
logger.info(String.format("Initial exclude intervals cover %d bases", toExcludeSize));
intervals = toPrune.substractRegions( excludeIntervals );
long intervalSize = intervals.coveredSize();
logger.info(String.format("Excluding %d bases from original intervals (%.2f%% reduction)",
toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
}
// return null if no interval arguments at all
if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null))
return;
if ( intervals != null )
logger.info(String.format("Processing %d bases in intervals", intervals.coveredSize()));
else {
// if include argument isn't given, create new set of all possible intervals
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getSequenceDictionary()) :
parseIntervalArguments(argCollection.intervals, argCollection.intervalMerging));
// if no exclude arguments, can return parseIntervalArguments directly
if (argCollection.excludeIntervals == null)
intervals = includeSortedSet;
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
else {
GenomeLocSortedSet excludeSortedSet = parseIntervalArguments(argCollection.excludeIntervals, argCollection.intervalMerging);
intervals = includeSortedSet.substractRegions(excludeSortedSet);
// logging messages only printed when exclude (-XL) arguments are given
long toPruneSize = includeSortedSet.coveredSize();
long toExcludeSize = excludeSortedSet.coveredSize();
long intervalSize = intervals.coveredSize();
logger.info(String.format("Initial include intervals cover %d bases", toPruneSize));
logger.info(String.format("Excluding %d bases from original intervals (%.2f%% reduction)",
toExcludeSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
}
}
}
/**
* Creates a GenomeLocSortedSet from a set of LIKE arguments - either -L or -XL
* Set is sorted and merged
*/
public static GenomeLocSortedSet parseIntervalArguments(final List<String> intervals) {
return parseIntervalArguments(intervals, GenomeAnalysisEngine.instance.getArguments().intervalMerging);
}
/**
* Creates a GenomeLocSortedSet from a set of LIKE arguments - either -L or -XL
* Set is sorted and merged
*/
public static GenomeLocSortedSet parseIntervalArguments(List <String> argList, IntervalMergingRule mergingRule) {
List<GenomeLoc> rawIntervals = new ArrayList<GenomeLoc>(); // running list of raw GenomeLocs
for (String argument : argList) {
// if any interval argument is '-L all', consider all loci by returning no intervals
if (argument.equals("all")) {
if (argList.size() != 1) {
// throw error if '-L all' is not only interval - potentially conflicting commands
throw new StingException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
}
return null;
}
// separate argument on semicolon first
for (String fileOrInterval : argument.split(";")) {
// if it's a file, add items to raw interval list
if (isFile(fileOrInterval))
rawIntervals.addAll(GenomeLocParser.intervalFileToList(fileOrInterval, mergingRule));
// otherwise treat as an interval -> parse and add to raw interval list
else {
rawIntervals.add(GenomeLocParser.parseGenomeInterval(fileOrInterval));
}
}
}
// redundant check => default no arguments is null, not empty list
if (rawIntervals.size() == 0)
return null;
// sort raw interval list
Collections.sort(rawIntervals);
// now merge raw interval list
rawIntervals = GenomeLocParser.mergeIntervalLocations(rawIntervals, mergingRule);
return GenomeLocSortedSet.createSetFromList(rawIntervals);
}
/**
* Check if string argument was intented as a file
* Accepted file extensions: .list, .interval_list, .bed, .picard
*/
private static boolean isFile(String str) {
// should we define list of file extensions as a public array somewhere?
// is regex or endsiwth better?
if (str.toUpperCase().endsWith(".BED") || str.toUpperCase().endsWith(".LIST") ||
str.toUpperCase().endsWith(".PICARD") || str.toUpperCase().endsWith(".INTERVAL_LIST")
|| str.toUpperCase().endsWith(".INTERVALS"))
return true;
else return false;
}
/**
@ -319,53 +389,6 @@ public class GenomeAnalysisEngine {
return MicroScheduler.create(this,my_walker,readsDataSource,referenceDataSource,rodDataSources,argCollection.numberOfThreads);
}
/**
* setup the interval regions, from either the interval file of the genome region string
*
* @param intervals the list of intervals to parse
* @return a list of genomeLoc representing the interval file
*/
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals) {
return parseIntervalRegion(intervals, GenomeAnalysisEngine.instance.getArguments().intervalMerging);
}
/**
* setup the interval regions, from either the interval file of the genome region string
*
* @param intervals the list of intervals to parse
* @param mergingRule the rule for merging intervals
* @return a list of genomeLoc representing the interval file
*/
public static List<GenomeLoc> parseIntervalRegion(final List<String> intervals, IntervalMergingRule mergingRule) {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for (String interval : intervals) {
// if any interval argument is '-L all', consider all loci by returning no intervals
if (interval.equals("all")) {
if (intervals.size() != 1) {
// throw error if '-L all' is not only interval - potentially conflicting commands
throw new StingException(String.format("Conflicting arguments: Intervals given along with \"-L all\""));
}
return new ArrayList<GenomeLoc>();
}
if (new File(interval).exists()) {
// support for the bed style interval format
if (interval.toUpperCase().endsWith(".BED")) {
Utils.warnUser("Bed files are 0 based half-open intervals, which are converted to 1-based closed intervals in the GATK. " +
"Be aware that all output information and intervals are 1-based closed intervals.");
BedParser parser = new BedParser(new File(interval));
locs.addAll(parser.getSortedAndMergedLocations(mergingRule));
} else {
locs.addAll(GenomeLocParser.intervalFileToList(interval,mergingRule));
}
} else {
locs.addAll(GenomeLocParser.parseGenomeLocs(interval,mergingRule));
}
}
return locs;
}
/**
* Gets a unique identifier for the reader sourcing this read.
* @param read Read to examine.

View File

@ -5,9 +5,10 @@ import java.util.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
public class IntervalRodIterator implements Iterator<IntervalRod> {
private List<GenomeLoc> locations = null;
//private List<GenomeLoc> locations = null;
private Iterator<GenomeLoc> iter;
private String trackName = null;
@ -18,15 +19,15 @@ public class IntervalRodIterator implements Iterator<IntervalRod> {
public static IntervalRodIterator IntervalRodIteratorFromLocsFile(final String trackName, final File file) {
//System.out.printf("Parsing %s for intervals %s%n", file, trackName);
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(Collections.singletonList(file.getPath()));
GenomeLocSortedSet locs = GenomeAnalysisEngine.parseIntervalArguments(Collections.singletonList(file.getPath()));
//System.out.printf(" => got %d entries %n", locs.size());
return new IntervalRodIterator(trackName, locs);
}
public IntervalRodIterator(String trackName, List<GenomeLoc> locs) {
public IntervalRodIterator(String trackName, GenomeLocSortedSet locs) {
this.trackName = trackName;
locations = locs;
iter = locations.iterator();
//locations = locs;
iter = locs.iterator();
}
@Override

View File

@ -126,8 +126,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
throw new RuntimeException("Entropy threshold must be a fraction between 0 and 1");
// read in the intervals for cleaning
List<GenomeLoc> locs = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY);
intervals = GenomeLocSortedSet.createSetFromList(locs).iterator();
GenomeLocSortedSet locs = GenomeAnalysisEngine.parseIntervalArguments(Arrays.asList(intervalsFile), IntervalMergingRule.OVERLAPPING_ONLY);
intervals = locs.iterator();
currentInterval = intervals.hasNext() ? intervals.next() : null;
// set up the output writer(s)

View File

@ -118,13 +118,11 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
for( final VariantContext vc : tracker.getAllVariantContexts(null, context.getLocation(), false, false) ) {
if( vc != null && vc.getName().toUpperCase().startsWith("TRUTH") ) {
if( vc.isSNP() && !vc.isFiltered() ) {
if( !vc.getGenotype(sampleName).isNoCall() ) {
isInTruthSet = true;
if( !vc.getGenotype(sampleName).isNoCall() ) {
isInTruthSet = true;
if( !vc.getGenotype(sampleName).isHomRef() ) {
isTrueVariant = true;
}
if( !vc.getGenotype(sampleName).isHomRef() ) {
isTrueVariant = true;
}
}
//if( vc.isPolymorphic() ) { //BUGBUG: I don't think this is the right thing to do here, there are many polymorphic sites in the truth data because there are many samples
@ -259,7 +257,7 @@ public class VariantConcordanceROCCurveWalker extends RodWalker<ExpandingArrayLi
final double sensitivity = ((double) truePos[curveIndex]) / ((double) truePos[curveIndex] + falseNegGlobal[curveIndex] + falseNeg[curveIndex]);
final double specificity = ((double) trueNegGlobal[curveIndex] + trueNeg[curveIndex]) /
((double) falsePos[curveIndex] + trueNegGlobal[curveIndex] + trueNeg[curveIndex]);
outputFile.print( String.format("%.8f,%.8f,%.8f,", qualCut[curveIndex], sensitivity, 1.0 - specificity) );
outputFile.print( String.format("%.4f,%.4f,%.4f,", qualCut[curveIndex], sensitivity, 1.0 - specificity) );
qualCut[curveIndex] += incrementQual[curveIndex];
curveIndex++;
}

View File

@ -10,6 +10,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.IntervalMergingRule;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.bed.BedParser;
import java.io.File;
import java.util.ArrayList;
@ -133,24 +134,35 @@ public class GenomeLocParser {
return true;
}
/**
* Load one or more intervals sources, sorting and merging overlapping intervals.
* @param intervalsSource Source of intervals.
* @param rule the merging rule we're using
* @return a list of sorted, merged intervals.
/**
* parse a genome interval, from a location string
*
* Performs interval-style validation:
*
* contig is valid; start and stop less than the end; start <= sto
* @param str the string to parse
*
* @return a GenomeLoc representing the String
*
*/
public static List<GenomeLoc> parseIntervals(List<String> intervalsSource, IntervalMergingRule rule) {
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(intervalsSource);
Collections.sort(parsedIntervals);
return GenomeLocParser.mergeIntervalLocations(parsedIntervals, rule);
}
public static GenomeLoc parseGenomeInterval(final String str) {
GenomeLoc ret = parseGenomeLoc(str);
exceptionOnInvalidGenomeLocBounds(ret);
return ret;
}
/**
* parse a genome location, from a location string
*
* Performs read-style validation:
* checks that start and stop are positive, start < stop, and the contig is valid
* does not check that genomeLoc is actually on the contig
*
* @param str the string to parse
*
* @return a GenomeLoc representing the String
*
*/
public static GenomeLoc parseGenomeLoc(final String str) {
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
@ -183,53 +195,18 @@ public class GenomeLocParser {
if (bad)
throw new StingException("Failed to parse Genome Location string: " + str);
if (start < 0)
throw new StingException("Invalid Genome Location start < 0: " + str + ' ' + start);
if (stop < 0)
throw new StingException("Invalid Genome Location stop < 0: " + str + ' ' + stop);
if (contig == null)
throw new StingException("Invalid Genome Location contig == null : " + str);
if (start > stop)
throw new StingException("Invalid Genome Location string; start position comes after end position: " + str );
if (!isContigValid(contig))
// is the contig valid?
if (!isContigValid(contig))
throw new StingException("Contig " + contig + " does not match any contig in the GATK sequence dictionary derived from the reference.");
if (stop == Integer.MAX_VALUE && hasKnownContigOrdering())
// lookup the actually stop position!
stop = getContigInfo(contig).getSequenceLength();
GenomeLoc loc = parseGenomeLoc(contig, start, stop);
return loc;
}
/**
* Useful utility function that parses a location string into a coordinate-order sorted
* array of GenomeLoc objects
*
* @param str String representation of genome locs. Null string corresponds to no filter.
* @param rule the merging rule we're using
*
* @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
*/
public static List<GenomeLoc> parseGenomeLocs(final String str, IntervalMergingRule rule) {
// Null string means no filter.
if (str == null) return null;
// Of the form: loc1;loc2;...
// Where each locN can be:
// 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'
try {
List<GenomeLoc> locs = new ArrayList<GenomeLoc>();
for (String loc : str.split(";"))
locs.add(parseGenomeLoc(loc.trim()));
Collections.sort(locs);
locs = mergeIntervalLocations(locs, rule);
return locs;
} catch (Exception e) { // TODO: fix this so that it passes the message from the exception, and doesn't print it out
throw new StingException(String.format("Invalid locations string: %s, format is loc1;loc2; where loc1 < loc2. Each locN can be 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'", str), e);
}
GenomeLoc locus = new GenomeLoc(contig, getContigIndex(contig,true), start, stop);
exceptionOnInvalidGenomeLoc(locus);
return locus;
}
// --------------------------------------------------------------------------------------------------------------
@ -252,7 +229,7 @@ public class GenomeLocParser {
* @return the list of merged locations
*/
public static List<GenomeLoc> mergeIntervalLocations(final List<GenomeLoc> raw, IntervalMergingRule rule) {
if (raw.size() <= 1 || rule == IntervalMergingRule.NONE)
if (raw.size() <= 1)
return raw;
else {
ArrayList<GenomeLoc> merged = new ArrayList<GenomeLoc>();
@ -282,19 +259,7 @@ public class GenomeLocParser {
*/
private static boolean isContigValid(String contig) {
int contigIndex = contigInfo.getSequenceIndex(contig);
return isSequenceIndexValid(contigIndex);
}
/**
* Determines whether the given sequence index is valid with respect to the sequence dictionary.
*
* @param sequenceIndex sequence index
*
* @return True if the sequence index is valid, false otherwise.
*/
private static boolean isSequenceIndexValid(int sequenceIndex) {
return sequenceIndex >= 0 && sequenceIndex < contigInfo.size();
return contigIndex >= 0 && contigIndex < contigInfo.size();
}
/**
@ -305,6 +270,9 @@ public class GenomeLocParser {
* @param stop Stop point.
*
* @return The genome location, or a MalformedGenomeLocException if unparseable.
*
* Validation: only checks that contig is valid
* start/stop could be anything
*/
public static GenomeLoc parseGenomeLoc(final String contig, long start, long stop) {
if (!isContigValid(contig))
@ -324,52 +292,70 @@ public class GenomeLocParser {
* @param rule also merge abutting intervals
*/
public static List<GenomeLoc> intervalFileToList(final String file_name, IntervalMergingRule rule) {
// try to open file
File inputFile = null;
try {
inputFile = new File(file_name);
}
catch (Exception e) {
throw new StingException("Could not open file", e);
}
// check if file is empty
if (inputFile.exists() && inputFile.length() < 1) {
if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST)
return new ArrayList<GenomeLoc>();
else {
Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " +
"may want to fix (or exclude) this file.");
return null;
}
}
// case: BED file
if (file_name.toUpperCase().endsWith(".BED")) {
BedParser parser = new BedParser(inputFile);
return parser.getSortedAndMergedLocations(rule);
}
/**
* IF not a BED file:
* first try to read it as an interval file since that's well structured
* we'll fail quickly if it's not a valid file. Then try to parse it as
* a location string file
*/
List<GenomeLoc> ret = null;
try {
File inputFile = new File(file_name);
// sometimes we see an empty file passed as a parameter, if so return an empty list
if (inputFile.exists() && inputFile.length() < 1) {
if (GenomeAnalysisEngine.instance.getArguments().unsafe != ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST)
return new ArrayList<GenomeLoc>();
else {
Utils.warnUser("The interval file " + file_name + " is empty. The GATK will continue processing but you " +
"may want to fix (or exclude) this file.");
return new ArrayList<GenomeLoc>();
}
}
IntervalList il = IntervalList.fromFile(inputFile);
// iterate through the list of merged intervals and add then as GenomeLocs
ret = new ArrayList<GenomeLoc>();
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
for (Interval interval : il.getUniqueIntervals()) {
ret.add(new GenomeLoc(interval.getSequence(), getContigIndex(interval.getSequence(),true), interval.getStart(), interval.getEnd()));
}
return ret;
// always return null instead of empty list
return ret.isEmpty() ? null : ret;
} catch (Exception e) {
}
// if that didn't work, try parsing file as an old fashioned string file
catch (Exception e) {
try {
ret = new ArrayList<GenomeLoc>();
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
xReadLines reader = new xReadLines(new File(file_name));
for(String line: reader) {
List<GenomeLoc> loci = parseGenomeLocs(line, rule);
if(loci != null)
ret.addAll(loci);
try {
ret.add(parseGenomeInterval(line));
}
catch (Exception e2) {
throw new StingException(String.format("Unable to parse interval: %s in file: %s", line, file_name));
}
}
reader.close();
if(ret.isEmpty())
return null;
for(GenomeLoc locus: ret)
exceptionOnInvalidGenomeLocBounds(locus);
return ret;
} catch (Exception e2) {
// always return null instead of empty list
return ret.isEmpty() ? null : ret;
}
catch (Exception e2) {
logger.error("Attempt to parse interval file in GATK format failed: " + e2.getMessage());
e2.printStackTrace();
throw new StingException("Unable to parse out interval file in either format", e);
@ -471,10 +457,16 @@ public class GenomeLocParser {
/**
* verify the specified genome loc is valid, if it's not, throw an exception
* Will not verify the location against contig bounds.
*
*
* Validation:
* checks that start and stop are positive, start < stop, and the contig is valid
* does not check that genomeLoc is actually on the contig, so start could be > end of contig
*
* @param toReturn the genome loc we're about to return
*
* @return the genome loc if it's valid, otherwise we throw an exception
*
*/
private static GenomeLoc exceptionOnInvalidGenomeLoc(GenomeLoc toReturn) {
if (toReturn.getStart() < 0) {
@ -496,16 +488,24 @@ public class GenomeLocParser {
/**
* Verify the locus against the bounds of the contig.
*
* performs boundary validation for genome loc INTERVALS:
* start and stop are on contig and start <= stop
* does NOT check that start and stop > 0, or that contig is valid
* for that reason, this function should only be called AFTER exceptionOnInvalidGenomeLoc()
* exceptionOnInvalidGenomeLoc isn't included in this function to save time
*
* @param locus Locus to verify.
*/
private static void exceptionOnInvalidGenomeLocBounds(GenomeLoc locus) {
exceptionOnInvalidGenomeLoc(locus);
int contigSize = contigInfo.getSequence(locus.getContigIndex()).getSequenceLength();
if(locus.getStart() > contigSize)
throw new StingException(String.format("GenomeLoc is invalid: locus start %d is after the end of contig %s",locus.getStart(),locus.getContig()));
if(locus.getStop() > contigSize)
throw new StingException(String.format("GenomeLoc is invalid: locus stop %d is after the end of contig %s",locus.getStop(),locus.getContig()));
if (locus.getStart() > locus.getStop()) {
throw new StingException("Parameters to GenomeLocParser are incorrect: the start position is greater than the end position");
}
}
/**
@ -514,6 +514,8 @@ public class GenomeLocParser {
* @param loc the location to validate
*
* @return true if the passed in GenomeLoc represents a valid location
*
* performs interval-style validation: contig is valid and atart and stop less than the end
*/
public static boolean validGenomeLoc(GenomeLoc loc) {
checkSetup();
@ -541,6 +543,8 @@ public class GenomeLocParser {
* @param stop the stop position
*
* @return true if it's valid, false otherwise
*
* performs interval-style validation: contig is valid and atart and stop less than the end
*/
public static boolean validGenomeLoc(String contig, long start, long stop) {
checkSetup();
@ -556,6 +560,8 @@ public class GenomeLocParser {
* @param stop the stop position
*
* @return true if it's valid, false otherwise
*
* performs interval-style validation: contig is valid and atart and stop less than the end
*/
public static boolean validGenomeLoc(int contigIndex, long start, long stop) {
checkSetup();

View File

@ -373,4 +373,31 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
return s.toString();
}
/**
* Check to see whether two genomeLocSortedSets are equal.
* Note that this implementation ignores the contigInfo object.
*
*/ /*
@Override
public boolean equals(Object other) {
if(other == null)
return false;
if(other instanceof GenomeLocSortedSet) {
// send to a list, so we can ensure order correct
List otherList = ((GenomeLocSortedSet)other).toList();
List thisList = this.toList();
if (otherList.size() != this.size())
return false;
for (Integer i=0;i<thisList.size();i++) {
if (otherList.get(i).equals(thisList.get(i)))
return false;
}
return true;
}
return false;
} */
}

View File

@ -48,7 +48,7 @@ public class GenomeLocParserTest extends BaseTest {
@Test(expected = RuntimeException.class)
public void testGetContigIndex() {
assertEquals(-1, GenomeLocParser.getContigIndex("blah",true)); // should not be in the reference
}
}
@Test
public void testGetContigIndexValid() {
@ -80,39 +80,7 @@ public class GenomeLocParserTest extends BaseTest {
assertEquals(100, loc.getStop());
assertEquals(1, loc.getStart());
}
@Test(expected = RuntimeException.class)
public void testParseBadLocations() {
GenomeLocParser.parseGenomeLocs("chr1:1-1;badChr:1-0", IntervalMergingRule.ALL);
}
@Test
public void testParseGoodLocations() {
GenomeLocParser.parseGenomeLocs("chr1:1-1;chr1:5-9", IntervalMergingRule.ALL);
}
@Test(expected = RuntimeException.class)
public void testParseGoodLocationsTooManySemiColons() {
GenomeLocParser.parseGenomeLocs("chr1:1-1;;chr1:5-9;", IntervalMergingRule.ALL);
}
@Test
public void testOverlappingGoodLocationsWithAbuttingFlag() {
List<GenomeLoc> locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", IntervalMergingRule.OVERLAPPING_ONLY);
assertEquals(1, locs.size());
}
@Test
public void testAbuttingGoodLocationsWithAbuttingOffFlag() {
List<GenomeLoc> locs = GenomeLocParser.parseGenomeLocs("chr1:1-4;chr1:5-9", IntervalMergingRule.OVERLAPPING_ONLY);
assertEquals(2, locs.size());
}
@Test
public void testAbuttingGoodLocationsWithNoneFlag() {
List<GenomeLoc> locs = GenomeLocParser.parseGenomeLocs("chr1:1-8;chr1:5-9", IntervalMergingRule.NONE);
assertEquals(2, locs.size());
}
@Test
public void testCreateGenomeLoc1() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 100);
@ -162,16 +130,6 @@ public class GenomeLocParserTest extends BaseTest {
assertEquals(1, copy.getStart());
}
/*@Test // - uncomment if you want to test speed
public void testGenomeLocParserList() {
long start = System.currentTimeMillis();
List<GenomeLoc> parsedIntervals = GenomeAnalysisEngine.parseIntervalRegion(Arrays.asList(new String[]{"/humgen/gsa-scr1/GATK_Data/Validation_Data/bigChr1IntervalList.list"}));
Collections.sort(parsedIntervals);
LinkedList<GenomeLoc> loc = new LinkedList<GenomeLoc>(GenomeLocParser.mergeIntervalLocations(parsedIntervals));
long stop = System.currentTimeMillis();
logger.warn("Elapsed time = " + (stop - start));
}*/
@Test
public void testGenomeLocPlusSign() {
GenomeLoc loc = GenomeLocParser.parseGenomeLoc("chr1:1+");