providing a way to specify how you'd like -BTI combined with your -L options; set BTIMR to either UNION (default) or INTERSECTION.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3983 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f3883585d0
commit
30178c05c5
|
|
@ -227,16 +227,14 @@ public class GenomeAnalysisEngine {
|
||||||
* @param additionalIntervals a list of additional intervals to add to the returned set. Can be null.
|
* @param additionalIntervals a list of additional intervals to add to the returned set. Can be null.
|
||||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||||
*/
|
*/
|
||||||
private GenomeLocSortedSet loadIntervals(List<String> argList, IntervalMergingRule mergingRule, List<GenomeLoc> additionalIntervals) {
|
private GenomeLocSortedSet loadIntervals(List<String> argList,
|
||||||
List<GenomeLoc> rawIntervals = (additionalIntervals == null) ? new ArrayList<GenomeLoc>() : additionalIntervals; // running list of raw GenomeLocs
|
IntervalMergingRule mergingRule,
|
||||||
|
List<GenomeLoc> additionalIntervals) {
|
||||||
|
|
||||||
rawIntervals.addAll(IntervalUtils.parseIntervalArguments(argList));
|
return IntervalUtils.sortAndMergeIntervals(IntervalUtils.mergeListsBySetOperator(additionalIntervals,
|
||||||
|
IntervalUtils.parseIntervalArguments(argList),
|
||||||
// redundant check => default no arguments is null, not empty list
|
argCollection.BTIMergeRule),
|
||||||
if (rawIntervals.size() == 0)
|
mergingRule);
|
||||||
return null;
|
|
||||||
|
|
||||||
return IntervalUtils.sortAndMergeIntervals(rawIntervals,mergingRule);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||||
|
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||||
import org.simpleframework.xml.*;
|
import org.simpleframework.xml.*;
|
||||||
import org.simpleframework.xml.core.Persister;
|
import org.simpleframework.xml.core.Persister;
|
||||||
import org.simpleframework.xml.stream.Format;
|
import org.simpleframework.xml.stream.Format;
|
||||||
|
|
@ -94,6 +95,10 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
|
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
|
||||||
public String RODToInterval = null;
|
public String RODToInterval = null;
|
||||||
|
|
||||||
|
@Element(required = false)
|
||||||
|
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
|
||||||
|
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
|
||||||
|
|
||||||
@Element(required = false)
|
@Element(required = false)
|
||||||
@Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
@Argument(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
||||||
public String DBSNPFile = null;
|
public String DBSNPFile = null;
|
||||||
|
|
@ -351,6 +356,9 @@ public class GATKArgumentCollection {
|
||||||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (BTIMergeRule != other.BTIMergeRule)
|
||||||
|
return false;
|
||||||
|
|
||||||
// if (other.enableRodWalkers != this.enableRodWalkers) {
|
// if (other.enableRodWalkers != this.enableRodWalkers) {
|
||||||
// return false;
|
// return false;
|
||||||
// }
|
// }
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.FeatureSource;
|
import org.broad.tribble.FeatureSource;
|
||||||
import org.broad.tribble.source.BasicFeatureSource;
|
import org.broad.tribble.source.BasicFeatureSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||||
|
|
@ -51,20 +52,23 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
|
||||||
// our sequence dictionary, which can be null
|
// our sequence dictionary, which can be null
|
||||||
private final SAMSequenceDictionary dictionary;
|
private final SAMSequenceDictionary dictionary;
|
||||||
|
|
||||||
|
// our codec type
|
||||||
|
private final FeatureCodec codec;
|
||||||
/**
|
/**
|
||||||
* Create a track
|
* Create a track
|
||||||
*
|
*
|
||||||
* @param type the type of track, used for track lookup
|
* @param type the type of track, used for track lookup
|
||||||
* @param recordType the type of record we produce
|
|
||||||
* @param name the name of this specific track
|
* @param name the name of this specific track
|
||||||
* @param file the associated file, for reference or recreating the reader
|
* @param file the associated file, for reference or recreating the reader
|
||||||
* @param reader the feature reader to use as the underlying data source
|
* @param reader the feature reader to use as the underlying data source
|
||||||
* @param dict the sam sequence dictionary
|
* @param dict the sam sequence dictionary
|
||||||
|
* @param codec the feature codec we use to decode this type
|
||||||
*/
|
*/
|
||||||
public TribbleTrack(Class type, Class recordType, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict) {
|
public TribbleTrack(Class type, String name, File file, BasicFeatureSource reader, SAMSequenceDictionary dict, FeatureCodec codec) {
|
||||||
super(type, recordType, name, file);
|
super(type, codec.getFeatureType(), name, file);
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
this.dictionary = dict;
|
this.dictionary = dict;
|
||||||
|
this.codec = codec;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -135,4 +139,8 @@ public class TribbleTrack extends RMDTrack implements QueryableTrack {
|
||||||
public Object getHeader() {
|
public Object getHeader() {
|
||||||
return reader.getHeader();
|
return reader.getHeader();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public FeatureCodec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ public class TribbleRMDTrackBuilder extends PluginManager<FeatureCodec> implemen
|
||||||
// return a feature reader track
|
// return a feature reader track
|
||||||
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
|
Pair<BasicFeatureSource, SAMSequenceDictionary> pair = createFeatureReader(targetClass, name, inputFile);
|
||||||
if (pair == null) throw new StingException("Unable to make the feature reader for input file " + inputFile);
|
if (pair == null) throw new StingException("Unable to make the feature reader for input file " + inputFile);
|
||||||
return new TribbleTrack(targetClass, createCodec(targetClass, name).getFeatureType(), name, inputFile, pair.first, pair.second);
|
return new TribbleTrack(targetClass, name, inputFile, pair.first, pair.second, createCodec(targetClass, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
|
public Pair<BasicFeatureSource, SAMSequenceDictionary> createFeatureReader(Class targetClass, File inputFile) {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
package org.broadinstitute.sting.utils.interval;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* set operators for combining lists of intervals
|
||||||
|
*/
|
||||||
|
public enum IntervalSetRule {
|
||||||
|
UNION,
|
||||||
|
INTERSECTION;
|
||||||
|
}
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
|
import java.util.LinkedList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
@ -69,6 +70,49 @@ public class IntervalUtils {
|
||||||
return rawIntervals;
|
return rawIntervals;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* merge two interval lists, using an interval set rule
|
||||||
|
* @param setOne a list of genomeLocs, in order (cannot be NULL)
|
||||||
|
* @param setTwo a list of genomeLocs, also in order (cannot be NULL)
|
||||||
|
* @param rule the rule to use for merging, i.e. union, intersection, etc
|
||||||
|
* @return a list, correctly merged using the specified rule
|
||||||
|
*/
|
||||||
|
public static List<GenomeLoc> mergeListsBySetOperator(List<GenomeLoc> setOne, List<GenomeLoc> setTwo, IntervalSetRule rule) {
|
||||||
|
// shortcut, if either set is zero, return the other set
|
||||||
|
if (setOne.size() == 0 || setTwo.size() == 0) return (setOne.size() == 0) ? setTwo : setOne;
|
||||||
|
|
||||||
|
// if we're set to UNION, just add them all
|
||||||
|
if (rule == IntervalSetRule.UNION) {
|
||||||
|
setOne.addAll(setTwo);
|
||||||
|
return setOne;
|
||||||
|
}
|
||||||
|
|
||||||
|
// else we're INTERSECTION, create two indexes into the lists
|
||||||
|
int iOne = 0;
|
||||||
|
int iTwo = 0;
|
||||||
|
|
||||||
|
// our master list, since we can't guarantee removal time in a generic list
|
||||||
|
LinkedList<GenomeLoc> retList = new LinkedList<GenomeLoc>();
|
||||||
|
|
||||||
|
// merge the second into the first using the rule
|
||||||
|
while (iTwo < setTwo.size() && iOne < setOne.size())
|
||||||
|
// if the first list is ahead, drop items off the second until we overlap
|
||||||
|
if (setTwo.get(iTwo).isBefore(setOne.get(iOne)))
|
||||||
|
iTwo++;
|
||||||
|
// if the second is ahead, drop intervals off the first until we overlap
|
||||||
|
else if (setOne.get(iOne).isBefore(setTwo.get(iTwo)))
|
||||||
|
iOne++;
|
||||||
|
// we overlap, intersect the two intervals and add the result. Then remove the interval that ends first.
|
||||||
|
else {
|
||||||
|
retList.add(setOne.get(iOne).intersect(setTwo.get(iTwo)));
|
||||||
|
if (setOne.get(iOne).getStop() < setTwo.get(iTwo).getStop()) iOne++;
|
||||||
|
else iTwo++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we don't need to add the rest of remaining locations, since we know they don't overlap. return what we have
|
||||||
|
return retList;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sorts and merges an interval list. Multiple techniques are available for merging: ALL, which combines
|
* Sorts and merges an interval list. Multiple techniques are available for merging: ALL, which combines
|
||||||
* all overlapping and abutting intervals into an interval that spans the union of all covered bases, and
|
* all overlapping and abutting intervals into an interval that spans the union of all covered bases, and
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
package org.broadinstitute.sting.utils.interval;
|
||||||
|
|
||||||
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* test out the interval utility methods
|
||||||
|
*/
|
||||||
|
public class IntervalUtilsTest extends BaseTest {
|
||||||
|
// used to seed the genome loc parser with a sequence dictionary
|
||||||
|
private static ReferenceSequenceFile seq;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void init() throws FileNotFoundException {
|
||||||
|
seq = new IndexedFastaSequenceFile(new File(hg18Reference));
|
||||||
|
GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeListsBySetOperatorNoOverlap() {
|
||||||
|
// a couple of lists we'll use for the testing
|
||||||
|
List<GenomeLoc> listEveryTwoFromOne = new ArrayList<GenomeLoc>();
|
||||||
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
|
// create the two lists we'll use
|
||||||
|
for (int x = 1; x < 101; x++) {
|
||||||
|
if (x % 2 == 0)
|
||||||
|
listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||||
|
else
|
||||||
|
listEveryTwoFromOne.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.UNION);
|
||||||
|
Assert.assertEquals(100,ret.size());
|
||||||
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, listEveryTwoFromOne, IntervalSetRule.INTERSECTION);
|
||||||
|
Assert.assertEquals(0,ret.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeListsBySetOperatorAllOverlap() {
|
||||||
|
// a couple of lists we'll use for the testing
|
||||||
|
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
||||||
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
|
// create the two lists we'll use
|
||||||
|
for (int x = 1; x < 101; x++) {
|
||||||
|
if (x % 2 == 0)
|
||||||
|
listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||||
|
allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||||
|
}
|
||||||
|
|
||||||
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||||
|
Assert.assertEquals(150,ret.size());
|
||||||
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||||
|
Assert.assertEquals(50,ret.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMergeListsBySetOperator() {
|
||||||
|
// a couple of lists we'll use for the testing
|
||||||
|
List<GenomeLoc> allSites = new ArrayList<GenomeLoc>();
|
||||||
|
List<GenomeLoc> listEveryTwoFromTwo = new ArrayList<GenomeLoc>();
|
||||||
|
|
||||||
|
// create the two lists we'll use
|
||||||
|
for (int x = 1; x < 101; x++) {
|
||||||
|
if (x % 5 == 0) {
|
||||||
|
listEveryTwoFromTwo.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||||
|
allSites.add(GenomeLocParser.createGenomeLoc("chr1",x,x));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
List<GenomeLoc> ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.UNION);
|
||||||
|
Assert.assertEquals(40,ret.size());
|
||||||
|
ret = IntervalUtils.mergeListsBySetOperator(listEveryTwoFromTwo, allSites, IntervalSetRule.INTERSECTION);
|
||||||
|
Assert.assertEquals(20,ret.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue