diff --git a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java index 752fdaa3b..dc0ea8ab6 100755 --- a/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/AbstractGenomeAnalysisEngine.java @@ -494,14 +494,9 @@ public abstract class AbstractGenomeAnalysisEngine { return method; } - protected void validateSuppliedReads() { - } - - protected void validateSuppliedReference() { - } - - protected void validateSuppliedReferenceOrderedData(List rods) { - } + protected abstract void validateSuppliedReads(); + protected abstract void validateSuppliedReference(); + protected abstract void validateSuppliedReferenceOrderedData(List rods); /** * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available). diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 2a006e7de..914cbcc05 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -96,7 +96,9 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { // create the output streams " initializeOutputStreams(microScheduler.getOutputTracker()); + // initialize and validate the interval list initializeIntervals(); + validateSuppliedIntervals(); ShardStrategy shardStrategy = getShardStrategy(microScheduler.getReference()); @@ -254,6 +256,15 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine { } } + protected void validateSuppliedIntervals() { + // Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped. + if(!(walker instanceof ReadWalker)) { + GenomeLocSortedSet intervals = getIntervals(); + if(intervals != null && getIntervals().contains(GenomeLoc.UNMAPPED)) + throw new ArgumentException("Interval list specifies unmapped region. Only read walkers may include the unmapped region."); + } + } + /** * Get the sharding strategy given a driving data source. * diff --git a/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java b/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java index 60062583f..29c085b70 100644 --- a/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/GenomeLocUnitTest.java @@ -5,6 +5,8 @@ package org.broadinstitute.sting.utils; // the imports for unit testing. +import org.broadinstitute.sting.utils.interval.IntervalMergingRule; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -13,6 +15,9 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import java.io.File; import java.io.FileNotFoundException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile; @@ -83,5 +88,66 @@ public class GenomeLocUnitTest extends BaseTest { } + @Test + public void testUnmappedSort() { + GenomeLoc chr1 = genomeLocParser.createGenomeLoc("chr1",1,10000000); + GenomeLoc chr2 = genomeLocParser.createGenomeLoc("chr2",1,10000000); + GenomeLoc unmapped = GenomeLoc.UNMAPPED; + List unmappedOnly = Arrays.asList(unmapped); + Collections.sort(unmappedOnly); + Assert.assertEquals(unmappedOnly.size(),1,"Wrong number of elements in unmapped-only list."); + Assert.assertEquals(unmappedOnly.get(0),unmapped,"List sorted in wrong order"); + + List chr1Presorted = Arrays.asList(chr1,unmapped); + Collections.sort(chr1Presorted); + Assert.assertEquals(chr1Presorted.size(),2,"Wrong number of elements in chr1,unmapped list."); + Assert.assertEquals(chr1Presorted,Arrays.asList(chr1,unmapped),"List sorted in wrong order"); + + List chr1Inverted = Arrays.asList(unmapped,chr1); + Collections.sort(chr1Inverted); + Assert.assertEquals(chr1Inverted.size(),2,"Wrong number of elements in chr1,unmapped list."); + Assert.assertEquals(chr1Inverted,Arrays.asList(chr1,unmapped),"List sorted in wrong order"); + + List chr1and2Presorted = Arrays.asList(chr1,chr2,unmapped); + Collections.sort(chr1and2Presorted); + Assert.assertEquals(chr1and2Presorted.size(),3,"Wrong number of elements in chr1,chr2,unmapped list."); + Assert.assertEquals(chr1and2Presorted,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order"); + + List chr1and2UnmappedInFront = Arrays.asList(unmapped,chr1,chr2); + Collections.sort(chr1and2UnmappedInFront); + Assert.assertEquals(chr1and2UnmappedInFront.size(),3,"Wrong number of elements in unmapped,chr1,chr2 list."); + Assert.assertEquals(chr1and2UnmappedInFront,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order"); + + List chr1and2UnmappedSandwiched = Arrays.asList(chr1,unmapped,chr2); + Collections.sort(chr1and2UnmappedSandwiched); + Assert.assertEquals(chr1and2UnmappedSandwiched.size(),3,"Wrong number of elements in chr1,unmapped,chr2 list."); + Assert.assertEquals(chr1and2UnmappedSandwiched,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order"); + } + + @Test + public void testUnmappedMerge() { + GenomeLoc chr1 = genomeLocParser.createGenomeLoc("chr1",1,10000000); + GenomeLoc unmapped = GenomeLoc.UNMAPPED; + + List oneUnmappedOnly = Arrays.asList(unmapped); + oneUnmappedOnly = IntervalUtils.sortAndMergeIntervals(genomeLocParser,oneUnmappedOnly, IntervalMergingRule.OVERLAPPING_ONLY).toList(); + Assert.assertEquals(oneUnmappedOnly.size(),1,"Wrong number of elements in list."); + Assert.assertEquals(oneUnmappedOnly.get(0),unmapped,"List sorted in wrong order"); + + List twoUnmapped = Arrays.asList(unmapped,unmapped); + twoUnmapped = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmapped,IntervalMergingRule.OVERLAPPING_ONLY).toList(); + Assert.assertEquals(twoUnmapped.size(),1,"Wrong number of elements in list."); + Assert.assertEquals(twoUnmapped.get(0),unmapped,"List sorted in wrong order"); + + List twoUnmappedAtEnd = Arrays.asList(chr1,unmapped,unmapped); + twoUnmappedAtEnd = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmappedAtEnd,IntervalMergingRule.OVERLAPPING_ONLY).toList(); + Assert.assertEquals(twoUnmappedAtEnd.size(),2,"Wrong number of elements in list."); + Assert.assertEquals(twoUnmappedAtEnd,Arrays.asList(chr1,unmapped),"List sorted in wrong order"); + + List twoUnmappedMixed = Arrays.asList(unmapped,chr1,unmapped); + twoUnmappedMixed = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmappedMixed,IntervalMergingRule.OVERLAPPING_ONLY).toList(); + Assert.assertEquals(twoUnmappedMixed.size(),2,"Wrong number of elements in list."); + Assert.assertEquals(twoUnmappedMixed,Arrays.asList(chr1,unmapped),"List sorted in wrong order"); + } }