Merge pull request #46 from broadinstitute/md_active_regions_GSA-770
ActivityProfile and ActiveRegions respects engine interval boundaries
This commit is contained in:
commit
8eda0c50df
|
|
@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGA() {
|
||||
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
|
||||
"86ceec507e70d542decdae1d20ed6f82");
|
||||
"f751363288740c6fd9179a487be61fb4");
|
||||
}
|
||||
|
||||
private void HCTestComplexGGA(String bam, String args, String md5) {
|
||||
|
|
@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
|
||||
"76d4c4a112cf60080adf74c3e116d1fb");
|
||||
"262e4c9a55baf1936a65612cfb1f6f81");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"23a4bfa0300683d8cf2ec16ce96e89ad");
|
||||
"71ef8d0217c1a73dd360413dccd05f4d");
|
||||
}
|
||||
|
||||
private void HCTestComplexVariants(String bam, String args, String md5) {
|
||||
|
|
@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void HCTestStructuralIndels() {
|
||||
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a77ac53d67937feebfba22a9336a5421"));
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a4e74226b16a7d8c5999620c2f6be1ba"));
|
||||
executeTest("HCTestStructuralIndels: ", spec);
|
||||
}
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void testReducedBamWithReadsNotFullySpanningDeletion() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
|
||||
Arrays.asList("255947f39455c87c561be4aee4cab651"));
|
||||
Arrays.asList("87bd7ac2f7d65580838c7c956ccf52b7"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,7 +109,8 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
|
||||
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
|
||||
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||
|
||||
if ( walkerHasPresetRegions ) {
|
||||
// we load all of the preset locations into the
|
||||
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {
|
||||
|
|
|
|||
|
|
@ -29,28 +29,12 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
* User: aaron
|
||||
* Date: May 22, 2009
|
||||
* Time: 10:54:40 AM
|
||||
*
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
*
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenomeLocCollection
|
||||
* <p/>
|
||||
|
|
@ -59,6 +43,10 @@ import java.util.*;
|
|||
* will also remove a region from the list, if the region to remove is a
|
||||
* partial interval of a region in the collection it will remove the region from
|
||||
* that element.
|
||||
*
|
||||
* @author aaron
|
||||
* Date: May 22, 2009
|
||||
* Time: 10:54:40 AM
|
||||
*/
|
||||
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
||||
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
|
||||
|
|
@ -66,26 +54,47 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
// our private storage for the GenomeLoc's
|
||||
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||
private final List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
|
||||
|
||||
// cache this to make overlap checking much more efficient
|
||||
private int previousOverlapSearchIndex = -1;
|
||||
|
||||
/** default constructor */
|
||||
public GenomeLocSortedSet(GenomeLocParser parser) {
|
||||
/**
|
||||
* Create a new, empty GenomeLocSortedSet
|
||||
*
|
||||
* @param parser a non-null the parser we use to create genome locs
|
||||
*/
|
||||
public GenomeLocSortedSet(final GenomeLocParser parser) {
|
||||
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
|
||||
this.genomeLocParser = parser;
|
||||
}
|
||||
|
||||
public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) {
|
||||
/**
|
||||
* Create a new GenomeLocSortedSet containing location e
|
||||
*
|
||||
* @param parser a non-null the parser we use to create genome locs
|
||||
* @param e a single genome locs to add to this set
|
||||
*/
|
||||
public GenomeLocSortedSet(final GenomeLocParser parser, final GenomeLoc e) {
|
||||
this(parser);
|
||||
add(e);
|
||||
}
|
||||
|
||||
public GenomeLocSortedSet(GenomeLocParser parser,Collection<GenomeLoc> l) {
|
||||
/**
|
||||
* Create a new GenomeLocSortedSet containing locations l
|
||||
*
|
||||
* The elements in l can be in any order, and can be overlapping. They will be sorted first and
|
||||
* overlapping (but not contiguous) elements will be merged
|
||||
*
|
||||
* @param parser a non-null the parser we use to create genome locs
|
||||
* @param l a collection of genome locs to add to this set
|
||||
*/
|
||||
public GenomeLocSortedSet(final GenomeLocParser parser, final Collection<GenomeLoc> l) {
|
||||
this(parser);
|
||||
|
||||
for ( GenomeLoc e : l )
|
||||
add(e);
|
||||
final ArrayList<GenomeLoc> sorted = new ArrayList<GenomeLoc>(l);
|
||||
Collections.sort(sorted);
|
||||
mArray.addAll(IntervalUtils.mergeIntervalLocations(sorted, IntervalMergingRule.OVERLAPPING_ONLY));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -198,9 +207,72 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
return returnValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a list of intervals overlapping loc
|
||||
*
|
||||
* @param loc the location we want overlapping intervals
|
||||
* @return a non-null list of locations that overlap loc
|
||||
*/
|
||||
public List<GenomeLoc> getOverlapping(final GenomeLoc loc) {
|
||||
// the max ensures that if loc would be the first element, that we start searching at the first element
|
||||
final int index = Collections.binarySearch(mArray, loc);
|
||||
if ( index >= 0 )
|
||||
// we can safely return a singleton because overlapping regions are merged and loc is exactly in
|
||||
// the set already
|
||||
return Collections.singletonList(loc);
|
||||
|
||||
// if loc isn't in the list index is (-(insertion point) - 1). The insertion point is defined as the point at
|
||||
// which the key would be inserted into the list: the index of the first element greater than the key, or list.size()
|
||||
// -ins - 1 = index => -ins = index + 1 => ins = -(index + 1)
|
||||
// Note that we look one before the index in this case, as loc might occur after the previous overlapping interval
|
||||
final int start = Math.max(-(index + 1) - 1, 0);
|
||||
final int size = mArray.size();
|
||||
|
||||
final List<GenomeLoc> overlapping = new LinkedList<GenomeLoc>();
|
||||
for ( int i = start; i < size; i++ ) {
|
||||
final GenomeLoc myLoc = mArray.get(i);
|
||||
if ( loc.overlapsP(myLoc) )
|
||||
overlapping.add(myLoc);
|
||||
else if ( myLoc.isPast(loc) )
|
||||
// since mArray is ordered, if myLoc is past loc that means all future
|
||||
// intervals cannot overlap loc either. So we can safely abort the search
|
||||
// note that we need to be a bit conservative on our tests since index needs to start
|
||||
// at -1 the position of index, so it's possible that myLoc and loc don't overlap but the next
|
||||
// position might
|
||||
break;
|
||||
}
|
||||
|
||||
return overlapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a list of intervals overlapping loc by enumerating all locs and testing for overlap
|
||||
*
|
||||
* Purely for testing purposes -- this is way to slow for any production code
|
||||
*
|
||||
* @param loc the location we want overlapping intervals
|
||||
* @return a non-null list of locations that overlap loc
|
||||
*/
|
||||
protected List<GenomeLoc> getOverlappingFullSearch(final GenomeLoc loc) {
|
||||
final List<GenomeLoc> overlapping = new LinkedList<GenomeLoc>();
|
||||
|
||||
// super slow, but definitely works
|
||||
for ( final GenomeLoc myLoc : mArray ) {
|
||||
if ( loc.overlapsP(myLoc) )
|
||||
overlapping.add(myLoc);
|
||||
}
|
||||
|
||||
return overlapping;
|
||||
}
|
||||
|
||||
/**
|
||||
* add a genomeLoc to the collection, simply inserting in order into the set
|
||||
*
|
||||
* TODO -- this may break the contract of the GenomeLocSortedSet if e overlaps or
|
||||
* TODO -- other locations already in the set. This code should check to see if
|
||||
* TODO -- e is overlapping with its nearby elements and merge them or alternatively
|
||||
* TODO -- throw an exception
|
||||
*
|
||||
* @param e the GenomeLoc to add
|
||||
*
|
||||
* @return true
|
||||
|
|
@ -225,6 +297,11 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
* Adds a GenomeLoc to the collection, merging it if it overlaps another region.
|
||||
* If it's not overlapping then we add it in sorted order.
|
||||
*
|
||||
* TODO TODO TODO -- this function is buggy and will not properly create a sorted
|
||||
* TODO TODO TODO -- genome loc is addRegion is called sequentially where the second
|
||||
* TODO TODO TODO -- loc added is actually before the first. So when creating
|
||||
* TODO TODO TODO -- sets make sure to sort the input locations first!
|
||||
*
|
||||
* @param e the GenomeLoc to add to the collection
|
||||
*
|
||||
* @return true, if the GenomeLoc could be added to the collection
|
||||
|
|
@ -380,31 +457,4 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
|
|||
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check to see whether two genomeLocSortedSets are equal.
|
||||
* Note that this implementation ignores the contigInfo object.
|
||||
*
|
||||
*/ /*
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if(other == null)
|
||||
return false;
|
||||
if(other instanceof GenomeLocSortedSet) {
|
||||
// send to a list, so we can ensure order correct
|
||||
List otherList = ((GenomeLocSortedSet)other).toList();
|
||||
List thisList = this.toList();
|
||||
if (otherList.size() != this.size())
|
||||
return false;
|
||||
|
||||
for (Integer i=0;i<thisList.size();i++) {
|
||||
if (otherList.get(i).equals(thisList.get(i)))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
} */
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,15 +30,13 @@ import com.google.java.contract.Invariant;
|
|||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Represents a single active region created by the Active Region Traversal for processing
|
||||
|
|
@ -382,4 +380,30 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
public boolean isActive() {
|
||||
return isActive;
|
||||
}
|
||||
|
||||
/**
|
||||
* Intersect this active region with the allowed intervals, returning a list of active regions
|
||||
* that only contain locations present in intervals
|
||||
*
|
||||
* Note that the returned list may be empty, if this active region doesn't overlap the set at all
|
||||
*
|
||||
* @param intervals a non-null set of intervals that are allowed
|
||||
* @return an ordered list of active region where each interval is contained within intervals
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
protected List<ActiveRegion> splitAndTrimToIntervals(final GenomeLocSortedSet intervals) {
|
||||
final List<GenomeLoc> allOverlapping = intervals.getOverlapping(getLocation());
|
||||
final List<ActiveRegion> clippedRegions = new LinkedList<ActiveRegion>();
|
||||
|
||||
for ( final GenomeLoc overlapping : allOverlapping ) {
|
||||
final GenomeLoc subLoc = getLocation().intersect(overlapping);
|
||||
final int subStart = subLoc.getStart() - getLocation().getStart();
|
||||
final int subEnd = subStart + subLoc.size();
|
||||
final List<ActivityProfileState> subStates = supportingStates.isEmpty() ? supportingStates : supportingStates.subList(subStart, subEnd);
|
||||
final ActiveRegion clipped = new ActiveRegion( subLoc, subStates, isActive, genomeLocParser, extension );
|
||||
clippedRegions.add(clipped);
|
||||
}
|
||||
|
||||
return clippedRegions;
|
||||
}
|
||||
}
|
||||
|
|
@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -45,6 +46,7 @@ public class ActivityProfile {
|
|||
|
||||
protected final List<ActivityProfileState> stateList;
|
||||
protected final GenomeLocParser parser;
|
||||
protected final GenomeLocSortedSet restrictToIntervals;
|
||||
|
||||
protected GenomeLoc regionStartLoc = null;
|
||||
protected GenomeLoc regionStopLoc = null;
|
||||
|
|
@ -60,10 +62,20 @@ public class ActivityProfile {
|
|||
* @param parser the parser we can use to create genome locs, cannot be null
|
||||
*/
|
||||
public ActivityProfile(final GenomeLocParser parser) {
|
||||
this(parser, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a empty ActivityProfile, restricting output to profiles overlapping intervals, if not null
|
||||
* @param parser the parser we can use to create genome locs, cannot be null
|
||||
* @param intervals only include states that are within these intervals, if not null
|
||||
*/
|
||||
public ActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet intervals) {
|
||||
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
|
||||
|
||||
this.parser = parser;
|
||||
this.stateList = new ArrayList<ActivityProfileState>();
|
||||
this.restrictToIntervals = intervals;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -224,7 +236,7 @@ public class ActivityProfile {
|
|||
|
||||
if ( position > size() )
|
||||
// should we allow this? probably not
|
||||
throw new IllegalArgumentException("Must add state contiguous to existing states");
|
||||
throw new IllegalArgumentException("Must add state contiguous to existing states: adding " + stateToAdd);
|
||||
|
||||
if ( position >= 0 ) {
|
||||
// ignore states starting before this regions start
|
||||
|
|
@ -313,7 +325,10 @@ public class ActivityProfile {
|
|||
if ( nextRegion == null )
|
||||
return regions;
|
||||
else {
|
||||
regions.add(nextRegion);
|
||||
if ( restrictToIntervals == null )
|
||||
regions.add(nextRegion);
|
||||
else
|
||||
regions.addAll(nextRegion.splitAndTrimToIntervals(restrictToIntervals));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.activeregion;
|
|||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -53,25 +54,34 @@ public class BandPassActivityProfile extends ActivityProfile {
|
|||
private final double[] GaussianKernel;
|
||||
|
||||
/**
|
||||
* Create a new BandPassActivityProfile with default sigma and file sizes
|
||||
* @param parser our genome loc parser
|
||||
* Create a new BandPassActivityProfile with default sigma and filter sizes
|
||||
*
|
||||
* @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean)
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser) {
|
||||
this(parser, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals) {
|
||||
this(parser, restrictToIntervals, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean)
|
||||
*
|
||||
* sets adaptiveFilterSize to true
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma) {
|
||||
this(parser, restrictToIntervals, maxFilterSize, sigma, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an activity profile that implements a band pass filter on the states
|
||||
*
|
||||
* @param parser our genome loc parser
|
||||
* @param restrictToIntervals only include states that are within these intervals, if not null
|
||||
* @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma
|
||||
* @param sigma the variance of the Gaussian kernel for this band pass filter
|
||||
* @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size
|
||||
*/
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) {
|
||||
this(parser, maxFilterSize, sigma, true);
|
||||
}
|
||||
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
|
||||
super(parser);
|
||||
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
|
||||
super(parser, restrictToIntervals);
|
||||
|
||||
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,9 @@ import org.broadinstitute.sting.commandline.ArgumentException;
|
|||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.gatk.walkers.readutils.PrintReads;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -70,10 +72,12 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
|
|||
|
||||
@Test
|
||||
public void testEmptyIntervalSetHandling() throws Exception {
|
||||
GenomeLocParser genomeLocParser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000).getSequenceDictionary());
|
||||
|
||||
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
|
||||
|
||||
testEngine.setWalker(new PrintReads());
|
||||
testEngine.setIntervals(new GenomeLocSortedSet(null));
|
||||
testEngine.setIntervals(new GenomeLocSortedSet(genomeLocParser));
|
||||
|
||||
testEngine.validateSuppliedIntervals();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,18 +28,21 @@ package org.broadinstitute.sting.utils;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
|
||||
import static org.testng.Assert.assertEquals;
|
||||
import static org.testng.Assert.assertFalse;
|
||||
import static org.testng.Assert.assertTrue;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Arrays;
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
@ -321,4 +324,62 @@ public class GenomeLocSortedSetUnitTest extends BaseTest {
|
|||
}
|
||||
assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Test getOverlapping
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "GetOverlapping")
|
||||
public Object[][] makeGetOverlappingTest() throws Exception {
|
||||
final GenomeLocParser genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
|
||||
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final GenomeLoc prev1 = genomeLocParser.createGenomeLoc("19", 1, 10);
|
||||
final GenomeLoc prev2 = genomeLocParser.createGenomeLoc("19", 20, 50);
|
||||
final GenomeLoc post1 = genomeLocParser.createGenomeLoc("21", 1, 10);
|
||||
final GenomeLoc post2 = genomeLocParser.createGenomeLoc("21", 20, 50);
|
||||
|
||||
final int chr20Length = genomeLocParser.getContigs().getSequence("20").getSequenceLength();
|
||||
for ( final int regionStart : Arrays.asList(1, 10, chr20Length - 10, chr20Length) ) {
|
||||
for ( final int regionSize : Arrays.asList(1, 10, 100) ) {
|
||||
final GenomeLoc region = genomeLocParser.createGenomeLocOnContig("20", regionStart, regionStart + regionSize);
|
||||
final GenomeLoc spanning = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, region.getStop() + 10);
|
||||
final GenomeLoc before_into = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, regionStart + 1);
|
||||
final GenomeLoc middle = genomeLocParser.createGenomeLocOnContig("20", regionStart + 1, regionStart + 2);
|
||||
final GenomeLoc middle_past = genomeLocParser.createGenomeLocOnContig("20", region.getStop()-1, region.getStop()+10);
|
||||
|
||||
final List<GenomeLoc> potentials = new LinkedList<GenomeLoc>();
|
||||
potentials.add(region);
|
||||
if ( spanning != null ) potentials.add(spanning);
|
||||
if ( before_into != null ) potentials.add(before_into);
|
||||
if ( middle != null ) potentials.add(middle);
|
||||
if ( middle_past != null ) potentials.add(middle_past);
|
||||
|
||||
for ( final int n : Arrays.asList(1, 2, 3) ) {
|
||||
for ( final List<GenomeLoc> regions : Utils.makePermutations(potentials, n, false) ) {
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, regions), region});
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1)), region});
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2)), region});
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1)), region});
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1, post2)), region});
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, post1)), region});
|
||||
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2, post1, post2)), region});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "GetOverlapping")
|
||||
public void testGetOverlapping(final GenomeLocSortedSet intervals, final GenomeLoc region) {
|
||||
final List<GenomeLoc> expectedOverlapping = intervals.getOverlappingFullSearch(region);
|
||||
final List<GenomeLoc> actualOverlapping = intervals.getOverlapping(region);
|
||||
Assert.assertEquals(actualOverlapping, expectedOverlapping);
|
||||
Assert.assertEquals(intervals.overlaps(region), ! expectedOverlapping.isEmpty(), "GenomeLocSortedSet.overlaps didn't return expected result");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import net.sf.samtools.SAMFileHeader;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -48,6 +49,7 @@ import java.util.*;
|
|||
|
||||
|
||||
public class ActiveRegionUnitTest extends BaseTest {
|
||||
private final static boolean DEBUG = true;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
private IndexedFastaSequenceFile seq;
|
||||
private String contig;
|
||||
|
|
@ -88,7 +90,7 @@ public class ActiveRegionUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "ActionRegionCreationTest")
|
||||
@Test(enabled = !DEBUG, dataProvider = "ActionRegionCreationTest")
|
||||
public void testCreatingActiveRegions(final GenomeLoc loc, final List<ActivityProfileState> supportingStates, final boolean isActive, final int extension) {
|
||||
final ActiveRegion region = new ActiveRegion(loc, supportingStates, isActive, genomeLocParser, extension);
|
||||
Assert.assertEquals(region.getLocation(), loc);
|
||||
|
|
@ -141,7 +143,7 @@ public class ActiveRegionUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ActiveRegionReads")
|
||||
@Test(enabled = !DEBUG, dataProvider = "ActiveRegionReads")
|
||||
public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) {
|
||||
final GenomeLoc expectedSpan = loc.union(genomeLocParser.createGenomeLoc(read));
|
||||
|
||||
|
|
@ -197,6 +199,12 @@ public class ActiveRegionUnitTest extends BaseTest {
|
|||
Assert.assertTrue(region.getReads().get(0).getAlignmentEnd() <= region.getExtendedLoc().getStop());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Make sure bad inputs are properly detected
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BadReadsTest")
|
||||
public Object[][] makeBadReadsTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
|
@ -213,11 +221,100 @@ public class ActiveRegionUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
|
||||
@Test(enabled = !DEBUG, dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
|
||||
public void testBadReads(final GATKSAMRecord read1, final GATKSAMRecord read2) {
|
||||
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read1);
|
||||
final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0);
|
||||
region.add(read1);
|
||||
region.add(read2);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Make sure we can properly cut up an active region based on engine intervals
|
||||
//
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "SplitActiveRegion")
|
||||
public Object[][] makeSplitActiveRegion() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final GenomeLoc whole_span = genomeLocParser.createGenomeLoc("20", 1, 500);
|
||||
final GenomeLoc gl_before = genomeLocParser.createGenomeLoc("20", 1, 9);
|
||||
final GenomeLoc gl_after = genomeLocParser.createGenomeLoc("20", 250, 500);
|
||||
final GenomeLoc gl_diff_contig = genomeLocParser.createGenomeLoc("19", 40, 50);
|
||||
|
||||
final int regionStart = 10;
|
||||
final int regionStop = 100;
|
||||
final GenomeLoc region = genomeLocParser.createGenomeLoc("20", regionStart, regionStop);
|
||||
|
||||
for ( final GenomeLoc noEffect : Arrays.asList(whole_span) )
|
||||
tests.add(new Object[]{
|
||||
region,
|
||||
Arrays.asList(noEffect),
|
||||
Arrays.asList(region)});
|
||||
|
||||
for ( final GenomeLoc noOverlap : Arrays.asList(gl_before, gl_after, gl_diff_contig) )
|
||||
tests.add(new Object[]{
|
||||
region,
|
||||
Arrays.asList(noOverlap),
|
||||
Arrays.asList()});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 5, 50)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 50))});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, 200)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, regionStop))});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50))});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 70, 200)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 70, regionStop))});
|
||||
|
||||
tests.add(new Object[]{region,
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, 200)),
|
||||
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, regionStop))});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SplitActiveRegion")
|
||||
public void testSplitActiveRegion(final GenomeLoc regionLoc, final List<GenomeLoc> intervalLocs, final List<GenomeLoc> expectedRegionLocs) {
|
||||
for ( final boolean addSubstates : Arrays.asList(true, false) ) {
|
||||
final List<ActivityProfileState> states;
|
||||
if ( addSubstates ) {
|
||||
states = new LinkedList<ActivityProfileState>();
|
||||
for ( int i = 0; i < regionLoc.size(); i++ )
|
||||
states.add(new ActivityProfileState(genomeLocParser.createGenomeLoc(regionLoc.getContig(), regionLoc.getStart() + i), 1.0));
|
||||
} else {
|
||||
states = null;
|
||||
}
|
||||
|
||||
final ActiveRegion region = new ActiveRegion(regionLoc, states, true, genomeLocParser, 0);
|
||||
final GenomeLocSortedSet intervals = new GenomeLocSortedSet(genomeLocParser, intervalLocs);
|
||||
final List<ActiveRegion> regions = region.splitAndTrimToIntervals(intervals);
|
||||
|
||||
Assert.assertEquals(regions.size(), expectedRegionLocs.size(), "Wrong number of split locations");
|
||||
for ( int i = 0; i < expectedRegionLocs.size(); i++ ) {
|
||||
final GenomeLoc expected = expectedRegionLocs.get(i);
|
||||
final ActiveRegion actual = regions.get(i);
|
||||
Assert.assertEquals(actual.getLocation(), expected, "Bad region after split");
|
||||
Assert.assertEquals(actual.isActive(), region.isActive());
|
||||
Assert.assertEquals(actual.getExtension(), region.getExtension());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -89,7 +89,7 @@ public class ActivityProfileUnitTest extends BaseTest {
|
|||
case Base: return new ActivityProfile(genomeLocParser);
|
||||
case BandPass:
|
||||
// zero size => equivalent to ActivityProfile
|
||||
return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false);
|
||||
return new BandPassActivityProfile(genomeLocParser, null, 0, 0.01, false);
|
||||
default: throw new IllegalStateException(type.toString());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test(enabled = ! DEBUG, dataProvider = "BandPassBasicTest")
|
||||
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, sigma, false);
|
||||
|
||||
final int expectedBandSize = bandPassSize * 2 + 1;
|
||||
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
|
||||
|
|
@ -142,7 +142,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
@Test( enabled = ! DEBUG, dataProvider = "BandPassComposition")
|
||||
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
|
||||
final int start = 1;
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
|
||||
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
|
||||
|
||||
// add a buffer so that we can get all of the band pass values
|
||||
|
|
@ -215,7 +215,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
|
||||
@Test( enabled = ! DEBUG, dataProvider = "KernelCreation")
|
||||
public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) {
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true);
|
||||
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, maxSize, sigma, true);
|
||||
|
||||
final double[] kernel = profile.getKernel();
|
||||
Assert.assertEquals(kernel.length, expectedKernel.length);
|
||||
|
|
@ -255,8 +255,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
|
|||
final Pair<VCFHeader, GATKVCFUtils.VCIterable> reader = GATKVCFUtils.readAllVCs(file, codec);
|
||||
|
||||
final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>();
|
||||
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser);
|
||||
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser);
|
||||
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null);
|
||||
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null);
|
||||
int pos = start;
|
||||
for ( final VariantContext vc : reader.getSecond() ) {
|
||||
if ( vc == null ) continue;
|
||||
|
|
|
|||
Loading…
Reference in New Issue