diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java index e515227a6..53525d27a 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java @@ -25,14 +25,11 @@ package org.broadinstitute.sting.commandline; import com.google.java.contract.Requires; -import net.sf.samtools.util.CloseableIterator; import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index b99b3cf09..071a6c337 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -89,18 +89,18 @@ public class GATKArgumentCollection { * One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals). * Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf). */ - @ElementList(required = false) + //@ElementList(required = false) @Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false) - public List> intervals = null; + public List> intervals = Collections.emptyList(); /** * Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome. This argument can be specified multiple times. * One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals). * Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf). */ - @ElementList(required = false) + //@ElementList(required = false) @Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false) - public List> excludeIntervals = null; + public List> excludeIntervals = Collections.emptyList(); /** * How should the intervals specified by multiple -L or -XL arguments be combined? Using this argument one can, for example, traverse over all of the positions diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index a17956008..ddd029e5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -229,14 +229,6 @@ public class IndelRealigner extends ReadWalker { @Argument(fullName="noOriginalAlignmentTags", shortName="noTags", required=false, doc="Don't output the original cigar or alignment start tags for each realigned read in the output bam") protected boolean NO_ORIGINAL_ALIGNMENT_TAGS = false; - /** - * For expert users only! This tool assumes that the target interval list is sorted; if the list turns out to be unsorted, it will throw an exception. - * Use this argument when your interval list is not sorted to instruct the Realigner to first sort it in memory. - */ - @Advanced - @Argument(fullName="targetIntervalsAreNotSorted", shortName="targetNotSorted", required=false, doc="The target intervals are not sorted") - protected boolean TARGET_NOT_SORTED = false; - /** * Reads from all input files will be realigned together, but then each read will be saved in the output file corresponding to the input file that * the read came from. There are two ways to generate output bam file names: 1) if the value of this argument is a general string (e.g. '.cleaned.bam'), @@ -366,30 +358,24 @@ public class IndelRealigner extends ReadWalker { catch(FileNotFoundException ex) { throw new UserException.CouldNotReadInputFile(getToolkit().getArguments().referenceFile,ex); } - - if ( !TARGET_NOT_SORTED ) { - NwayIntervalMergingIterator merger = new NwayIntervalMergingIterator(IntervalMergingRule.OVERLAPPING_ONLY); - List rawIntervals = new ArrayList(); - // separate argument on semicolon first - for (String fileOrInterval : intervalsFile.split(";")) { - // if it's a file, add items to raw interval list - if (IntervalUtils.isIntervalFile(fileOrInterval)) { - merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); - } else { - rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeLoc(fileOrInterval)); - } + NwayIntervalMergingIterator merger = new NwayIntervalMergingIterator(IntervalMergingRule.OVERLAPPING_ONLY); + List rawIntervals = new ArrayList(); + // separate argument on semicolon first + for (String fileOrInterval : intervalsFile.split(";")) { + // if it's a file, add items to raw interval list + if (IntervalUtils.isIntervalFile(fileOrInterval)) { + merger.add(new IntervalFileMergingIterator( getToolkit().getGenomeLocParser(), new java.io.File(fileOrInterval), IntervalMergingRule.OVERLAPPING_ONLY ) ); + } else { + rawIntervals.add(getToolkit().getGenomeLocParser().parseGenomeLoc(fileOrInterval)); } - if ( ! rawIntervals.isEmpty() ) merger.add(rawIntervals.iterator()); - // prepare to read intervals one-by-one, as needed (assuming they are sorted). - intervals = merger; - } else { - // read in the whole list of intervals for cleaning - GenomeLocSortedSet locs = IntervalUtils.sortAndMergeIntervals(getToolkit().getGenomeLocParser(), - IntervalUtils.parseIntervalArguments(getToolkit().getGenomeLocParser(),Arrays.asList(intervalsFile)), - IntervalMergingRule.OVERLAPPING_ONLY); - intervals = locs.iterator(); } + if ( ! rawIntervals.isEmpty() ) + merger.add(rawIntervals.iterator()); + + // prepare to read intervals one-by-one, as needed + intervals = merger; + currentInterval = intervals.hasNext() ? intervals.next() : null; writerToUse = writer; diff --git a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java b/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java deleted file mode 100644 index b95165841..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/bed/BedParser.java +++ /dev/null @@ -1,104 +0,0 @@ -package org.broadinstitute.sting.utils.bed; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.exceptions.UserException; - -import java.io.*; -import java.util.ArrayList; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * User: aaron - * Date: Oct 5, 2009 - * Time: 5:46:45 PM - */ -public class BedParser { - // the GATk operates as a one based location, bed files are 0 based - static final int TO_ONE_BASED_ADDITION = 1; - - // the buffered reader input - private final BufferedReader mIn; - - private GenomeLocParser genomeLocParser; - - // our array of locations - private List mLocations; - - /** - * parse a bed file, given it's location - * - * @param fl - */ - public BedParser(GenomeLocParser genomeLocParser,File fl) { - this.genomeLocParser = genomeLocParser; - try { - mIn = new BufferedReader(new FileReader(fl)); - } catch (FileNotFoundException e) { - throw new UserException.CouldNotReadInputFile(fl, e); - } - mLocations = parseLocations(); - } - - /** - * parse a bed file, given an input reader - * - * @param fl the bed file - */ - public BedParser(BufferedReader fl) { - mIn = fl; - mLocations = parseLocations(); - } - - /** - * parse out the locations - * - * @return a list of GenomeLocs, sorted and merged - */ - private List parseLocations() { - String line = null; - List locArray = new ArrayList(); - try { - while ((line = mIn.readLine()) != null) { - locArray.add(parseLocation(genomeLocParser,line)); - } - } catch (IOException e) { - throw new UserException.MalformedFile("Unable to parse line in BED file."); - } - return locArray; - } - - /** - * parse a single location - * - * @param line the line, as a string - * @return a parsed genome loc - */ - public static GenomeLoc parseLocation(GenomeLocParser genomeLocParser,String line) { - String contig; - int start; - int stop; - try { - String parts[] = line.split("\\s+"); - contig = parts[0]; - start = Integer.valueOf(parts[1]) + TO_ONE_BASED_ADDITION; - stop = Integer.valueOf(parts[2]); // the ending point is an open interval - } catch (Exception e) { - throw new UserException.MalformedFile("Unable to process bed file line = " + line, e); - } - - // we currently drop the rest of the bed record, which can contain names, scores, etc - return genomeLocParser.createGenomeLoc(contig, start, stop, true); - - } - - /** - * return the sorted, and merged (for overlapping regions) - * - * @return an arraylist - */ - public List getLocations() { - return mLocations; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java index 2bc3fa284..063fef7d7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalFileMergingIterator.java @@ -61,14 +61,7 @@ public class IntervalFileMergingIterator implements Iterator { try { XReadLines reader = new XReadLines(f); - - if (f.getName().toUpperCase().endsWith(".BED")) { - it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(), - StringToGenomeLocIteratorAdapter.FORMAT.BED ) ) ; - } else { - it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator(), - StringToGenomeLocIteratorAdapter.FORMAT.GATK ) ) ; - } + it = new PushbackIterator( new StringToGenomeLocIteratorAdapter( genomeLocParser,reader.iterator() )); } catch ( FileNotFoundException e ) { throw new UserException.CouldNotReadInputFile(f, e); } diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java index c9fc39aa6..2e191511b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java @@ -8,8 +8,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.bed.BedParser; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; @@ -94,9 +94,9 @@ public class IntervalUtils { List ret = new ArrayList(); // case: BED file - if (file_name.toUpperCase().endsWith(".BED")) { - BedParser parser = new BedParser(glParser,inputFile); - ret.addAll(parser.getLocations()); + if ( file_name.toUpperCase().endsWith(".BED") ) { + // this is now supported in Tribble + throw new ReviewedStingException("BED files must be parsed through Tribble; parsing them as intervals through the GATK engine is no longer supported"); } else { /** diff --git a/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java b/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java index 659260345..aa919b0ea 100644 --- a/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java +++ b/public/java/src/org/broadinstitute/sting/utils/interval/StringToGenomeLocIteratorAdapter.java @@ -28,7 +28,6 @@ package org.broadinstitute.sting.utils.interval; import org.broadinstitute.sting.gatk.iterators.PushbackIterator; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.bed.BedParser; import java.util.Iterator; @@ -52,22 +51,13 @@ public class StringToGenomeLocIteratorAdapter implements Iterator { private PushbackIterator it = null; - public enum FORMAT { BED, GATK }; - - FORMAT myFormat = FORMAT.GATK; - - public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator it, FORMAT format) { + public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser, Iterator it) { this.genomeLocParser = genomeLocParser; this.it = new PushbackIterator(it); - myFormat = format; - } - - public StringToGenomeLocIteratorAdapter(GenomeLocParser genomeLocParser,Iterator it ) { - this(genomeLocParser,it,FORMAT.GATK); } public boolean hasNext() { - String s = null; + String s; boolean success = false; // skip empty lines: @@ -83,9 +73,7 @@ public class StringToGenomeLocIteratorAdapter implements Iterator { } public GenomeLoc next() { - - if ( myFormat == FORMAT.GATK ) return genomeLocParser.parseGenomeLoc(it.next()); - return BedParser.parseLocation( genomeLocParser,it.next() ); + return genomeLocParser.parseGenomeLoc(it.next()); } public void remove() { diff --git a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java index f37890ee3..8be849cf3 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollectionUnitTest.java @@ -87,7 +87,7 @@ public class GATKArgumentCollectionUnitTest extends BaseTest { collect.downsampleFraction = null; collect.downsampleCoverage = null; collect.intervals = new ArrayList>(); - collect.intervals.add(new IntervalBinding("intervals".toLowerCase())); + //collect.intervals.add(new IntervalBinding("intervals".toLowerCase())); collect.excludeIntervals = new ArrayList>(); collect.numberOfThreads = 1; } diff --git a/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java deleted file mode 100644 index 56bf66f53..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/bed/BedParserUnitTest.java +++ /dev/null @@ -1,68 +0,0 @@ -package org.broadinstitute.sting.utils.bed; - -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.testng.Assert; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.Test; - - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.List; - -import net.sf.picard.reference.IndexedFastaSequenceFile; - - -public class BedParserUnitTest extends BaseTest { - - private static IndexedFastaSequenceFile seq; - private GenomeLocParser genomeLocParser; - private File bedFile = new File("public/testdata/sampleBedFile.bed"); - - @BeforeClass - public void beforeTests() { - File referenceFile = new File(b36KGReference); - try { - seq = new CachingIndexedFastaSequenceFile(referenceFile); - } - catch(FileNotFoundException ex) { - throw new UserException.CouldNotReadInputFile(referenceFile,ex); - } - - genomeLocParser = new GenomeLocParser(seq); - } - - @Test - public void testLoadBedFile() { - BedParser parser = new BedParser(genomeLocParser,bedFile); - List location = parser.getLocations(); - Assert.assertEquals(location.size(), 4); - } - - @Test - public void testBedParsing() { - BedParser parser = new BedParser(genomeLocParser,bedFile); - List location = parser.getLocations(); - Assert.assertEquals(location.size(), 4); - Assert.assertTrue(location.get(0).getContig().equals("20")); - Assert.assertTrue(location.get(1).getContig().equals("20")); - Assert.assertTrue(location.get(2).getContig().equals("22")); - Assert.assertTrue(location.get(3).getContig().equals("22")); - - // now check the the start positions - Assert.assertEquals(location.get(0).getStart(), 1); - Assert.assertEquals(location.get(1).getStart(), 1002); - Assert.assertEquals(location.get(2).getStart(), 1001); - Assert.assertEquals(location.get(3).getStart(), 2001); - - // now check the the stop positions - Assert.assertEquals(location.get(0).getStop(), 999); - Assert.assertEquals(location.get(1).getStop(), 2000); - Assert.assertEquals(location.get(2).getStop(), 5000); - Assert.assertEquals(location.get(3).getStop(), 6000); - } -}