ActivityProfile and ActiveRegions respects engine interval boundaries

-- Active regions are created as normal, but they are split and trimmed to the engine intervals when added to the traversal, if there are intervals present.
-- UnitTests for ActiveRegion.splitAndTrimToIntervals
-- GenomeLocSortedSet.getOverlapping uses binary search to efficiently in ~ log N time find overlapping intervals
-- UnitTesting overlap function in GenomeLocSortedSet
-- Discovered fundamental implementation bug in that adding genome locs out of order (elements on 20 then on 19) produces an invalid GenomeLocSortedSet.  Created a JIRA to address this: https://jira.broadinstitute.org/browse/GSA-775
-- Constructor that takes a collection of genome locs now sorts its input and merges overlapping intervals
-- Added docs for the constructors in GLSS
-- Update HaplotypeCaller MD5s, which change because ActiveRegions are now restricted to the engine intervals, which changes slightly the regions in the tests and so the reads in the regions, and thus the md5s
-- GenomeAnalysisEngineUnitTest needs to provide non-null genome loc parser
This commit is contained in:
Mark DePristo 2013-02-14 09:36:06 -08:00
parent e919d62156
commit be45edeff2
11 changed files with 348 additions and 86 deletions

View File

@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"86ceec507e70d542decdae1d20ed6f82");
"f751363288740c6fd9179a487be61fb4");
}
private void HCTestComplexGGA(String bam, String args, String md5) {
@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"76d4c4a112cf60080adf74c3e116d1fb");
"262e4c9a55baf1936a65612cfb1f6f81");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"23a4bfa0300683d8cf2ec16ce96e89ad");
"71ef8d0217c1a73dd360413dccd05f4d");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a77ac53d67937feebfba22a9336a5421"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a4e74226b16a7d8c5999620c2f6be1ba"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("255947f39455c87c561be4aee4cab651"));
Arrays.asList("87bd7ac2f7d65580838c7c956ccf52b7"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}

View File

@ -109,7 +109,8 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
if ( walkerHasPresetRegions ) {
// we load all of the preset locations into the
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {

View File

@ -29,28 +29,12 @@ import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.util.*;
/**
*
* User: aaron
* Date: May 22, 2009
* Time: 10:54:40 AM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* <p/>
* Class GenomeLocCollection
* <p/>
@ -59,6 +43,10 @@ import java.util.*;
* will also remove a region from the list, if the region to remove is a
* partial interval of a region in the collection it will remove the region from
* that element.
*
* @author aaron
* Date: May 22, 2009
* Time: 10:54:40 AM
*/
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
@ -66,26 +54,47 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
private GenomeLocParser genomeLocParser;
// our private storage for the GenomeLoc's
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
private final List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
// cache this to make overlap checking much more efficient
private int previousOverlapSearchIndex = -1;
/** default constructor */
public GenomeLocSortedSet(GenomeLocParser parser) {
/**
* Create a new, empty GenomeLocSortedSet
*
* @param parser a non-null the parser we use to create genome locs
*/
public GenomeLocSortedSet(final GenomeLocParser parser) {
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
this.genomeLocParser = parser;
}
public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) {
/**
* Create a new GenomeLocSortedSet containing location e
*
* @param parser a non-null the parser we use to create genome locs
* @param e a single genome locs to add to this set
*/
public GenomeLocSortedSet(final GenomeLocParser parser, final GenomeLoc e) {
this(parser);
add(e);
}
public GenomeLocSortedSet(GenomeLocParser parser,Collection<GenomeLoc> l) {
/**
* Create a new GenomeLocSortedSet containing locations l
*
* The elements in l can be in any order, and can be overlapping. They will be sorted first and
* overlapping (but not contiguous) elements will be merged
*
* @param parser a non-null the parser we use to create genome locs
* @param l a collection of genome locs to add to this set
*/
public GenomeLocSortedSet(final GenomeLocParser parser, final Collection<GenomeLoc> l) {
this(parser);
for ( GenomeLoc e : l )
add(e);
final ArrayList<GenomeLoc> sorted = new ArrayList<GenomeLoc>(l);
Collections.sort(sorted);
mArray.addAll(IntervalUtils.mergeIntervalLocations(sorted, IntervalMergingRule.OVERLAPPING_ONLY));
}
/**
@ -198,9 +207,72 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
return returnValue;
}
/**
* Return a list of intervals overlapping loc
*
* @param loc the location we want overlapping intervals
* @return a non-null list of locations that overlap loc
*/
public List<GenomeLoc> getOverlapping(final GenomeLoc loc) {
// the max ensures that if loc would be the first element, that we start searching at the first element
final int index = Collections.binarySearch(mArray, loc);
if ( index >= 0 )
// we can safely return a singleton because overlapping regions are merged and loc is exactly in
// the set already
return Collections.singletonList(loc);
// if loc isn't in the list index is (-(insertion point) - 1). The insertion point is defined as the point at
// which the key would be inserted into the list: the index of the first element greater than the key, or list.size()
// -ins - 1 = index => -ins = index + 1 => ins = -(index + 1)
// Note that we look one before the index in this case, as loc might occur after the previous overlapping interval
final int start = Math.max(-(index + 1) - 1, 0);
final int size = mArray.size();
final List<GenomeLoc> overlapping = new LinkedList<GenomeLoc>();
for ( int i = start; i < size; i++ ) {
final GenomeLoc myLoc = mArray.get(i);
if ( loc.overlapsP(myLoc) )
overlapping.add(myLoc);
else if ( myLoc.isPast(loc) )
// since mArray is ordered, if myLoc is past loc that means all future
// intervals cannot overlap loc either. So we can safely abort the search
// note that we need to be a bit conservative on our tests since index needs to start
// at -1 the position of index, so it's possible that myLoc and loc don't overlap but the next
// position might
break;
}
return overlapping;
}
/**
* Return a list of intervals overlapping loc by enumerating all locs and testing for overlap
*
* Purely for testing purposes -- this is way to slow for any production code
*
* @param loc the location we want overlapping intervals
* @return a non-null list of locations that overlap loc
*/
protected List<GenomeLoc> getOverlappingFullSearch(final GenomeLoc loc) {
final List<GenomeLoc> overlapping = new LinkedList<GenomeLoc>();
// super slow, but definitely works
for ( final GenomeLoc myLoc : mArray ) {
if ( loc.overlapsP(myLoc) )
overlapping.add(myLoc);
}
return overlapping;
}
/**
* add a genomeLoc to the collection, simply inserting in order into the set
*
* TODO -- this may break the contract of the GenomeLocSortedSet if e overlaps or
* TODO -- other locations already in the set. This code should check to see if
* TODO -- e is overlapping with its nearby elements and merge them or alternatively
* TODO -- throw an exception
*
* @param e the GenomeLoc to add
*
* @return true
@ -225,6 +297,11 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
* Adds a GenomeLoc to the collection, merging it if it overlaps another region.
* If it's not overlapping then we add it in sorted order.
*
* TODO TODO TODO -- this function is buggy and will not properly create a sorted
* TODO TODO TODO -- genome loc is addRegion is called sequentially where the second
* TODO TODO TODO -- loc added is actually before the first. So when creating
* TODO TODO TODO -- sets make sure to sort the input locations first!
*
* @param e the GenomeLoc to add to the collection
*
* @return true, if the GenomeLoc could be added to the collection
@ -380,31 +457,4 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
return s.toString();
}
/**
* Check to see whether two genomeLocSortedSets are equal.
* Note that this implementation ignores the contigInfo object.
*
*/ /*
@Override
public boolean equals(Object other) {
if(other == null)
return false;
if(other instanceof GenomeLocSortedSet) {
// send to a list, so we can ensure order correct
List otherList = ((GenomeLocSortedSet)other).toList();
List thisList = this.toList();
if (otherList.size() != this.size())
return false;
for (Integer i=0;i<thisList.size();i++) {
if (otherList.get(i).equals(thisList.get(i)))
return false;
}
return true;
}
return false;
} */
}

View File

@ -30,15 +30,13 @@ import com.google.java.contract.Invariant;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
* Represents a single active region created by the Active Region Traversal for processing
@ -382,4 +380,30 @@ public class ActiveRegion implements HasGenomeLocation {
public boolean isActive() {
return isActive;
}
/**
* Intersect this active region with the allowed intervals, returning a list of active regions
* that only contain locations present in intervals
*
* Note that the returned list may be empty, if this active region doesn't overlap the set at all
*
* @param intervals a non-null set of intervals that are allowed
* @return an ordered list of active region where each interval is contained within intervals
*/
@Ensures("result != null")
protected List<ActiveRegion> splitAndTrimToIntervals(final GenomeLocSortedSet intervals) {
final List<GenomeLoc> allOverlapping = intervals.getOverlapping(getLocation());
final List<ActiveRegion> clippedRegions = new LinkedList<ActiveRegion>();
for ( final GenomeLoc overlapping : allOverlapping ) {
final GenomeLoc subLoc = getLocation().intersect(overlapping);
final int subStart = subLoc.getStart() - getLocation().getStart();
final int subEnd = subStart + subLoc.size();
final List<ActivityProfileState> subStates = supportingStates.isEmpty() ? supportingStates : supportingStates.subList(subStart, subEnd);
final ActiveRegion clipped = new ActiveRegion( subLoc, subStates, isActive, genomeLocParser, extension );
clippedRegions.add(clipped);
}
return clippedRegions;
}
}

View File

@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.*;
@ -45,6 +46,7 @@ public class ActivityProfile {
protected final List<ActivityProfileState> stateList;
protected final GenomeLocParser parser;
protected final GenomeLocSortedSet restrictToIntervals;
protected GenomeLoc regionStartLoc = null;
protected GenomeLoc regionStopLoc = null;
@ -60,10 +62,20 @@ public class ActivityProfile {
* @param parser the parser we can use to create genome locs, cannot be null
*/
public ActivityProfile(final GenomeLocParser parser) {
this(parser, null);
}
/**
* Create a empty ActivityProfile, restricting output to profiles overlapping intervals, if not null
* @param parser the parser we can use to create genome locs, cannot be null
* @param intervals only include states that are within these intervals, if not null
*/
public ActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet intervals) {
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
this.parser = parser;
this.stateList = new ArrayList<ActivityProfileState>();
this.restrictToIntervals = intervals;
}
@Override
@ -224,7 +236,7 @@ public class ActivityProfile {
if ( position > size() )
// should we allow this? probably not
throw new IllegalArgumentException("Must add state contiguous to existing states");
throw new IllegalArgumentException("Must add state contiguous to existing states: adding " + stateToAdd);
if ( position >= 0 ) {
// ignore states starting before this regions start
@ -313,7 +325,10 @@ public class ActivityProfile {
if ( nextRegion == null )
return regions;
else {
regions.add(nextRegion);
if ( restrictToIntervals == null )
regions.add(nextRegion);
else
regions.addAll(nextRegion.splitAndTrimToIntervals(restrictToIntervals));
}
}
}

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.activeregion;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.ArrayList;
@ -53,25 +54,34 @@ public class BandPassActivityProfile extends ActivityProfile {
private final double[] GaussianKernel;
/**
* Create a new BandPassActivityProfile with default sigma and file sizes
* @param parser our genome loc parser
* Create a new BandPassActivityProfile with default sigma and filter sizes
*
* @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean)
*/
public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals) {
this(parser, restrictToIntervals, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
}
/**
* @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean)
*
* sets adaptiveFilterSize to true
*/
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma) {
this(parser, restrictToIntervals, maxFilterSize, sigma, true);
}
/**
* Create an activity profile that implements a band pass filter on the states
*
* @param parser our genome loc parser
* @param restrictToIntervals only include states that are within these intervals, if not null
* @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma
* @param sigma the variance of the Gaussian kernel for this band pass filter
* @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size
*/
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) {
this(parser, maxFilterSize, sigma, true);
}
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
super(parser);
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
super(parser, restrictToIntervals);
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);

View File

@ -30,7 +30,9 @@ import org.broadinstitute.sting.commandline.ArgumentException;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.walkers.readutils.PrintReads;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.Test;
import java.io.File;
@ -70,10 +72,12 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
@Test
public void testEmptyIntervalSetHandling() throws Exception {
GenomeLocParser genomeLocParser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000).getSequenceDictionary());
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
testEngine.setWalker(new PrintReads());
testEngine.setIntervals(new GenomeLocSortedSet(null));
testEngine.setIntervals(new GenomeLocSortedSet(genomeLocParser));
testEngine.validateSuppliedIntervals();
}

View File

@ -28,18 +28,21 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Iterator;
import java.util.Arrays;
import java.io.File;
import java.util.*;
/**
*
@ -321,4 +324,62 @@ public class GenomeLocSortedSetUnitTest extends BaseTest {
}
assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
}
// -----------------------------------------------------------------------------------------------
//
// Test getOverlapping
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "GetOverlapping")
public Object[][] makeGetOverlappingTest() throws Exception {
final GenomeLocParser genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
List<Object[]> tests = new ArrayList<Object[]>();
final GenomeLoc prev1 = genomeLocParser.createGenomeLoc("19", 1, 10);
final GenomeLoc prev2 = genomeLocParser.createGenomeLoc("19", 20, 50);
final GenomeLoc post1 = genomeLocParser.createGenomeLoc("21", 1, 10);
final GenomeLoc post2 = genomeLocParser.createGenomeLoc("21", 20, 50);
final int chr20Length = genomeLocParser.getContigs().getSequence("20").getSequenceLength();
for ( final int regionStart : Arrays.asList(1, 10, chr20Length - 10, chr20Length) ) {
for ( final int regionSize : Arrays.asList(1, 10, 100) ) {
final GenomeLoc region = genomeLocParser.createGenomeLocOnContig("20", regionStart, regionStart + regionSize);
final GenomeLoc spanning = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, region.getStop() + 10);
final GenomeLoc before_into = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, regionStart + 1);
final GenomeLoc middle = genomeLocParser.createGenomeLocOnContig("20", regionStart + 1, regionStart + 2);
final GenomeLoc middle_past = genomeLocParser.createGenomeLocOnContig("20", region.getStop()-1, region.getStop()+10);
final List<GenomeLoc> potentials = new LinkedList<GenomeLoc>();
potentials.add(region);
if ( spanning != null ) potentials.add(spanning);
if ( before_into != null ) potentials.add(before_into);
if ( middle != null ) potentials.add(middle);
if ( middle_past != null ) potentials.add(middle_past);
for ( final int n : Arrays.asList(1, 2, 3) ) {
for ( final List<GenomeLoc> regions : Utils.makePermutations(potentials, n, false) ) {
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, regions), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1, post2)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, post1)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2, post1, post2)), region});
}
}
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "GetOverlapping")
public void testGetOverlapping(final GenomeLocSortedSet intervals, final GenomeLoc region) {
final List<GenomeLoc> expectedOverlapping = intervals.getOverlappingFullSearch(region);
final List<GenomeLoc> actualOverlapping = intervals.getOverlapping(region);
Assert.assertEquals(actualOverlapping, expectedOverlapping);
Assert.assertEquals(intervals.overlaps(region), ! expectedOverlapping.isEmpty(), "GenomeLocSortedSet.overlaps didn't return expected result");
}
}

View File

@ -34,6 +34,7 @@ import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -48,6 +49,7 @@ import java.util.*;
public class ActiveRegionUnitTest extends BaseTest {
private final static boolean DEBUG = true;
private GenomeLocParser genomeLocParser;
private IndexedFastaSequenceFile seq;
private String contig;
@ -88,7 +90,7 @@ public class ActiveRegionUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "ActionRegionCreationTest")
@Test(enabled = !DEBUG, dataProvider = "ActionRegionCreationTest")
public void testCreatingActiveRegions(final GenomeLoc loc, final List<ActivityProfileState> supportingStates, final boolean isActive, final int extension) {
final ActiveRegion region = new ActiveRegion(loc, supportingStates, isActive, genomeLocParser, extension);
Assert.assertEquals(region.getLocation(), loc);
@ -141,7 +143,7 @@ public class ActiveRegionUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "ActiveRegionReads")
@Test(enabled = !DEBUG, dataProvider = "ActiveRegionReads")
public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) {
final GenomeLoc expectedSpan = loc.union(genomeLocParser.createGenomeLoc(read));
@ -197,6 +199,12 @@ public class ActiveRegionUnitTest extends BaseTest {
Assert.assertTrue(region.getReads().get(0).getAlignmentEnd() <= region.getExtendedLoc().getStop());
}
// -----------------------------------------------------------------------------------------------
//
// Make sure bad inputs are properly detected
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "BadReadsTest")
public Object[][] makeBadReadsTest() {
List<Object[]> tests = new ArrayList<Object[]>();
@ -213,11 +221,100 @@ public class ActiveRegionUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
@Test(enabled = !DEBUG, dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
public void testBadReads(final GATKSAMRecord read1, final GATKSAMRecord read2) {
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read1);
final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0);
region.add(read1);
region.add(read2);
}
// -----------------------------------------------------------------------------------------------
//
// Make sure we can properly cut up an active region based on engine intervals
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "SplitActiveRegion")
public Object[][] makeSplitActiveRegion() {
List<Object[]> tests = new ArrayList<Object[]>();
final GenomeLoc whole_span = genomeLocParser.createGenomeLoc("20", 1, 500);
final GenomeLoc gl_before = genomeLocParser.createGenomeLoc("20", 1, 9);
final GenomeLoc gl_after = genomeLocParser.createGenomeLoc("20", 250, 500);
final GenomeLoc gl_diff_contig = genomeLocParser.createGenomeLoc("19", 40, 50);
final int regionStart = 10;
final int regionStop = 100;
final GenomeLoc region = genomeLocParser.createGenomeLoc("20", regionStart, regionStop);
for ( final GenomeLoc noEffect : Arrays.asList(whole_span) )
tests.add(new Object[]{
region,
Arrays.asList(noEffect),
Arrays.asList(region)});
for ( final GenomeLoc noOverlap : Arrays.asList(gl_before, gl_after, gl_diff_contig) )
tests.add(new Object[]{
region,
Arrays.asList(noOverlap),
Arrays.asList()});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 5, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, 200)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, regionStop))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 70, 200)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 70, regionStop))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, 200)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, regionStop))});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "SplitActiveRegion")
public void testSplitActiveRegion(final GenomeLoc regionLoc, final List<GenomeLoc> intervalLocs, final List<GenomeLoc> expectedRegionLocs) {
for ( final boolean addSubstates : Arrays.asList(true, false) ) {
final List<ActivityProfileState> states;
if ( addSubstates ) {
states = new LinkedList<ActivityProfileState>();
for ( int i = 0; i < regionLoc.size(); i++ )
states.add(new ActivityProfileState(genomeLocParser.createGenomeLoc(regionLoc.getContig(), regionLoc.getStart() + i), 1.0));
} else {
states = null;
}
final ActiveRegion region = new ActiveRegion(regionLoc, states, true, genomeLocParser, 0);
final GenomeLocSortedSet intervals = new GenomeLocSortedSet(genomeLocParser, intervalLocs);
final List<ActiveRegion> regions = region.splitAndTrimToIntervals(intervals);
Assert.assertEquals(regions.size(), expectedRegionLocs.size(), "Wrong number of split locations");
for ( int i = 0; i < expectedRegionLocs.size(); i++ ) {
final GenomeLoc expected = expectedRegionLocs.get(i);
final ActiveRegion actual = regions.get(i);
Assert.assertEquals(actual.getLocation(), expected, "Bad region after split");
Assert.assertEquals(actual.isActive(), region.isActive());
Assert.assertEquals(actual.getExtension(), region.getExtension());
}
}
}
}

View File

@ -89,7 +89,7 @@ public class ActivityProfileUnitTest extends BaseTest {
case Base: return new ActivityProfile(genomeLocParser);
case BandPass:
// zero size => equivalent to ActivityProfile
return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false);
return new BandPassActivityProfile(genomeLocParser, null, 0, 0.01, false);
default: throw new IllegalStateException(type.toString());
}
}

View File

@ -87,7 +87,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test(enabled = ! DEBUG, dataProvider = "BandPassBasicTest")
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, sigma, false);
final int expectedBandSize = bandPassSize * 2 + 1;
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
@ -142,7 +142,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test( enabled = ! DEBUG, dataProvider = "BandPassComposition")
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
final int start = 1;
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
// add a buffer so that we can get all of the band pass values
@ -215,7 +215,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test( enabled = ! DEBUG, dataProvider = "KernelCreation")
public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, maxSize, sigma, true);
final double[] kernel = profile.getKernel();
Assert.assertEquals(kernel.length, expectedKernel.length);
@ -255,8 +255,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
final Pair<VCFHeader, GATKVCFUtils.VCIterable> reader = GATKVCFUtils.readAllVCs(file, codec);
final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>();
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser);
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser);
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null);
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null);
int pos = start;
for ( final VariantContext vc : reader.getSecond() ) {
if ( vc == null ) continue;