Getting back to '-L unmapped':

- basic unit tests for interval sorting and merging with mix of mapped/unmapped.
- validation to ensure that locus walkers (really all non-read walkers) blow up with a user error when -L unmapped is specified.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4837 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-12-14 18:24:18 +00:00
parent 4dbdf7a13d
commit 526ae92093
3 changed files with 80 additions and 8 deletions

View File

@ -494,14 +494,9 @@ public abstract class AbstractGenomeAnalysisEngine {
return method;
}
protected void validateSuppliedReads() {
}
protected void validateSuppliedReference() {
}
protected void validateSuppliedReferenceOrderedData(List<RMDTrack> rods) {
}
protected abstract void validateSuppliedReads();
protected abstract void validateSuppliedReference();
protected abstract void validateSuppliedReferenceOrderedData(List<RMDTrack> rods);
/**
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).

View File

@ -96,7 +96,9 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
// create the output streams "
initializeOutputStreams(microScheduler.getOutputTracker());
// initialize and validate the interval list
initializeIntervals();
validateSuppliedIntervals();
ShardStrategy shardStrategy = getShardStrategy(microScheduler.getReference());
@ -254,6 +256,15 @@ public class GenomeAnalysisEngine extends AbstractGenomeAnalysisEngine {
}
}
protected void validateSuppliedIntervals() {
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
if(!(walker instanceof ReadWalker)) {
GenomeLocSortedSet intervals = getIntervals();
if(intervals != null && getIntervals().contains(GenomeLoc.UNMAPPED))
throw new ArgumentException("Interval list specifies unmapped region. Only read walkers may include the unmapped region.");
}
}
/**
* Get the sharding strategy given a driving data source.
*

View File

@ -5,6 +5,8 @@ package org.broadinstitute.sting.utils;
// the imports for unit testing.
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -13,6 +15,9 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.IndexedFastaSequenceFile;
@ -83,5 +88,66 @@ public class GenomeLocUnitTest extends BaseTest {
}
@Test
public void testUnmappedSort() {
GenomeLoc chr1 = genomeLocParser.createGenomeLoc("chr1",1,10000000);
GenomeLoc chr2 = genomeLocParser.createGenomeLoc("chr2",1,10000000);
GenomeLoc unmapped = GenomeLoc.UNMAPPED;
List<GenomeLoc> unmappedOnly = Arrays.asList(unmapped);
Collections.sort(unmappedOnly);
Assert.assertEquals(unmappedOnly.size(),1,"Wrong number of elements in unmapped-only list.");
Assert.assertEquals(unmappedOnly.get(0),unmapped,"List sorted in wrong order");
List<GenomeLoc> chr1Presorted = Arrays.asList(chr1,unmapped);
Collections.sort(chr1Presorted);
Assert.assertEquals(chr1Presorted.size(),2,"Wrong number of elements in chr1,unmapped list.");
Assert.assertEquals(chr1Presorted,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1Inverted = Arrays.asList(unmapped,chr1);
Collections.sort(chr1Inverted);
Assert.assertEquals(chr1Inverted.size(),2,"Wrong number of elements in chr1,unmapped list.");
Assert.assertEquals(chr1Inverted,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1and2Presorted = Arrays.asList(chr1,chr2,unmapped);
Collections.sort(chr1and2Presorted);
Assert.assertEquals(chr1and2Presorted.size(),3,"Wrong number of elements in chr1,chr2,unmapped list.");
Assert.assertEquals(chr1and2Presorted,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1and2UnmappedInFront = Arrays.asList(unmapped,chr1,chr2);
Collections.sort(chr1and2UnmappedInFront);
Assert.assertEquals(chr1and2UnmappedInFront.size(),3,"Wrong number of elements in unmapped,chr1,chr2 list.");
Assert.assertEquals(chr1and2UnmappedInFront,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order");
List<GenomeLoc> chr1and2UnmappedSandwiched = Arrays.asList(chr1,unmapped,chr2);
Collections.sort(chr1and2UnmappedSandwiched);
Assert.assertEquals(chr1and2UnmappedSandwiched.size(),3,"Wrong number of elements in chr1,unmapped,chr2 list.");
Assert.assertEquals(chr1and2UnmappedSandwiched,Arrays.asList(chr1,chr2,unmapped),"List sorted in wrong order");
}
@Test
public void testUnmappedMerge() {
GenomeLoc chr1 = genomeLocParser.createGenomeLoc("chr1",1,10000000);
GenomeLoc unmapped = GenomeLoc.UNMAPPED;
List<GenomeLoc> oneUnmappedOnly = Arrays.asList(unmapped);
oneUnmappedOnly = IntervalUtils.sortAndMergeIntervals(genomeLocParser,oneUnmappedOnly, IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(oneUnmappedOnly.size(),1,"Wrong number of elements in list.");
Assert.assertEquals(oneUnmappedOnly.get(0),unmapped,"List sorted in wrong order");
List<GenomeLoc> twoUnmapped = Arrays.asList(unmapped,unmapped);
twoUnmapped = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmapped,IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(twoUnmapped.size(),1,"Wrong number of elements in list.");
Assert.assertEquals(twoUnmapped.get(0),unmapped,"List sorted in wrong order");
List<GenomeLoc> twoUnmappedAtEnd = Arrays.asList(chr1,unmapped,unmapped);
twoUnmappedAtEnd = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmappedAtEnd,IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(twoUnmappedAtEnd.size(),2,"Wrong number of elements in list.");
Assert.assertEquals(twoUnmappedAtEnd,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
List<GenomeLoc> twoUnmappedMixed = Arrays.asList(unmapped,chr1,unmapped);
twoUnmappedMixed = IntervalUtils.sortAndMergeIntervals(genomeLocParser,twoUnmappedMixed,IntervalMergingRule.OVERLAPPING_ONLY).toList();
Assert.assertEquals(twoUnmappedMixed.size(),2,"Wrong number of elements in list.");
Assert.assertEquals(twoUnmappedMixed,Arrays.asList(chr1,unmapped),"List sorted in wrong order");
}
}