providing a way to specify how you'd like -BTI combined with your -L options; set BTIMR to either UNION (default) or INTERSECTION.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3983 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f3883585d0
commit
30178c05c5
|
|
@ -227,16 +227,14 @@ public class GenomeAnalysisEngine {
|
|||
* @param additionalIntervals a list of additional intervals to add to the returned set. Can be null.
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
private GenomeLocSortedSet loadIntervals(List<String> argList, IntervalMergingRule mergingRule, List<GenomeLoc> additionalIntervals) {
|
||||
List<GenomeLoc> rawIntervals = (additionalIntervals == null) ? new ArrayList<GenomeLoc>() : additionalIntervals; // running list of raw GenomeLocs
|
||||
private GenomeLocSortedSet loadIntervals(List<String> argList,
|
||||
IntervalMergingRule mergingRule,
|
||||
List<GenomeLoc> additionalIntervals) {
|
||||
|
||||
rawIntervals.addAll(IntervalUtils.parseIntervalArguments(argList));
|
||||
|
||||
// redundant check => default no arguments is null, not empty list
|
||||
if (rawIntervals.size() == 0)
|
||||
return null;
|
||||
|
||||
return IntervalUtils.sortAndMergeIntervals(rawIntervals,mergingRule);
|
||||
return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals,
|
||||
IntervalUtils.parseIntervalArguments(argList),
|
||||
argCollection.BTIMergeRule),
|
||||
mergingRule);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.StingException;
|
|||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.simpleframework.xml.*;
|
||||
import org.simpleframework.xml.core.Persister;
|
||||
import org.simpleframework.xml.stream.Format;
|
||||
|
|
@ -92,7 +93,11 @@ public class GATKArgumentCollection {
|
|||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
|
||||
public String RODToInterval = null;
|
||||
public String RODToInterval = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
|
||||
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
||||
|
|
@ -351,6 +356,9 @@ public class GATKArgumentCollection {
|
|||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
||||
return false;
|
||||
}
|
||||
if (BTIMergeRule != other.BTIMergeRule)
|
||||
return false;
|
||||
|
||||
// if (other.enableRodWalkers != this.enableRodWalkers) {
|
||||
// return false;
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
|
|||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
|
|
@ -51,20 +52,23 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
|
|||
// our sequence dictionary, which can be null
|
||||
private final SAMSequenceDictionary dictionary;
|
||||
|
||||
// our codec type
|
||||
private final FeatureCodec codec;
|
||||
/**
|
||||
* Create a track
|
||||
*
|
||||
* @param type the type of track, used for track lookup
|
||||
* @param recordType the type of record we produce
|
||||
* @param name the name of this specific track
|
||||
* @param file the associated file, for reference or recreating the reader
|
||||
* @param reader the feature reader to use as the underlying data source
|
||||
* @param dict the sam sequence dictionary
|
||||
* @param codec the feature codec we use to decode this type
|
||||
*/
|
||||
public TribbleTrack(Class type, Class recordType, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict) {
|
||||
super(type, recordType, name, file);
|
||||
public TribbleTrack(Class type, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) {
|
||||
super(type, codec.getFeatureType(), name, file);
|
||||
this.reader = reader;
|
||||
this.dictionary = dict;
|
||||
this.codec = codec;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -135,4 +139,8 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
|
|||
public Object getHeader() {
|
||||
return reader.getHeader();
|
||||
}
|
||||
|
||||
public FeatureCodec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
|||
// return a feature reader track
|
||||
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
|
||||
if (pair == null) throw new StingException("Unable to make the feature reader for input file " + inputFile);
|
||||
return new TribbleTrack(targetClass, createCodec(targetClass, name).getFeatureType(), name, inputFile, pair.first, pair.second);
|
||||
return new TribbleTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name));
|
||||
}
|
||||
|
||||
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,9 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
/**
|
||||
* set operators for combining lists of intervals
|
||||
*/
|
||||
public enum IntervalSetRule {
|
||||
UNION,
|
||||
INTERSECTION;
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
|
|
@ -69,6 +70,49 @@ public class IntervalUtils {
|
|||
return rawIntervals;
|
||||
}
|
||||
|
||||
/**
|
||||
* merge two interval lists, using an interval set rule
|
||||
* @param setOne a list of genomeLocs, in order (cannot be NULL)
|
||||
* @param setTwo a list of genomeLocs, also in order (cannot be NULL)
|
||||
* @param rule the rule to use for merging, i.e. union, intersection, etc
|
||||
* @return a list, correctly merged using the specified rule
|
||||
*/
|
||||
public static List<GenomeLoc> mergeListsBySetOperator(List<GenomeLoc> setOne, List<GenomeLoc> setTwo, IntervalSetRule rule) {
|
||||
// shortcut, if either set is zero, return the other set
|
||||
if (setOne.size() == 0 || setTwo.size() == 0) return (setOne.size() == 0) ? setTwo : setOne;
|
||||
|
||||
// if we're set to UNION, just add them all
|
||||
if (rule == IntervalSetRule.UNION) {
|
||||
setOne.addAll(setTwo);
|
||||
return setOne;
|
||||
}
|
||||
|
||||
// else we're INTERSECTION, create two indexes into the lists
|
||||
int iOne = 0;
|
||||
int iTwo = 0;
|
||||
|
||||
// our master list, since we can't guarantee removal time in a generic list
|
||||
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
|
||||
|
||||
// merge the second into the first using the rule
|
||||
while (iTwo < setTwo.size() && iOne < setOne.size())
|
||||
// if the first list is ahead, drop items off the second until we overlap
|
||||
if (setTwo.get(iTwo).isBefore(setOne.get(iOne)))
|
||||
iTwo++;
|
||||
// if the second is ahead, drop intervals off the first until we overlap
|
||||
else if (setOne.get(iOne).isBefore(setTwo.get(iTwo)))
|
||||
iOne++;
|
||||
// we overlap, intersect the two intervals and add the result. Then remove the interval that ends first.
|
||||
else {
|
||||
retList.add(setOne.get(iOne).intersect(setTwo.get(iTwo)));
|
||||
if (setOne.get(iOne).getStop() < setTwo.get(iTwo).getStop()) iOne++;
|
||||
else iTwo++;
|
||||
}
|
||||
|
||||
// we don't need to add the rest of remaining locations, since we know they don't overlap. return what we have
|
||||
return retList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sorts and merges an interval list. Multiple techniques are available for merging: ALL, which combines
|
||||
* all overlapping and abutting intervals into an interval that spans the union of all covered bases, and
|
||||
|
|
|
|||
|
|
@ -0,0 +1,91 @@
|
|||
package org.broadinstitute.sting.utils.interval;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* test out the interval utility methods
|
||||
*/
|
||||
public class IntervalUtilsTest extends BaseTest {
|
||||
// used to seed the genome loc parser with a sequence dictionary
|
||||
private static ReferenceSequenceFile seq;
|
||||
|
||||
|
||||
|
||||
@BeforeClass
|
||||
public static void init() throws FileNotFoundException {
|
||||
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeListsBySetOperatorNoOverlap() {
|
||||
// a couple of lists we'll use for the testing
|
||||
List<GenomeLoc> listEveryTwoFromOne = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
||||
|
||||
// create the two lists we'll use
|
||||
for (int x = 1; x < 101; x++) {
|
||||
if (x % 2 == 0)
|
||||
listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
else
|
||||
listEveryTwoFromOne.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(100,ret.size());
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(0,ret.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeListsBySetOperatorAllOverlap() {
|
||||
// a couple of lists we'll use for the testing
|
||||
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
||||
|
||||
// create the two lists we'll use
|
||||
for (int x = 1; x < 101; x++) {
|
||||
if (x % 2 == 0)
|
||||
listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(150,ret.size());
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(50,ret.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMergeListsBySetOperator() {
|
||||
// a couple of lists we'll use for the testing
|
||||
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
||||
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
||||
|
||||
// create the two lists we'll use
|
||||
for (int x = 1; x < 101; x++) {
|
||||
if (x % 5 == 0) {
|
||||
listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||
}
|
||||
}
|
||||
|
||||
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||
Assert.assertEquals(40,ret.size());
|
||||
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||
Assert.assertEquals(20,ret.size());
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue