Merge pull request #46 from broadinstitute/md_active_regions_GSA-770

ActivityProfile and ActiveRegions respects engine interval boundaries
This commit is contained in:
Eric Banks 2013-02-19 09:49:47 -08:00
commit 8eda0c50df
11 changed files with 348 additions and 86 deletions

View File

@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGA() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"86ceec507e70d542decdae1d20ed6f82");
"f751363288740c6fd9179a487be61fb4");
}
private void HCTestComplexGGA(String bam, String args, String md5) {
@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void testHaplotypeCallerMultiSampleGGAComplex() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538",
"76d4c4a112cf60080adf74c3e116d1fb");
"262e4c9a55baf1936a65612cfb1f6f81");
}
@Test
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
"23a4bfa0300683d8cf2ec16ce96e89ad");
"71ef8d0217c1a73dd360413dccd05f4d");
}
private void HCTestComplexVariants(String bam, String args, String md5) {
@ -153,7 +153,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
@Test
public void HCTestStructuralIndels() {
final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a77ac53d67937feebfba22a9336a5421"));
final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a4e74226b16a7d8c5999620c2f6be1ba"));
executeTest("HCTestStructuralIndels: ", spec);
}
@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
public void testReducedBamWithReadsNotFullySpanningDeletion() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1,
Arrays.asList("255947f39455c87c561be4aee4cab651"));
Arrays.asList("87bd7ac2f7d65580838c7c956ccf52b7"));
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
}
}

View File

@ -109,7 +109,8 @@ public class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegio
final double bandPassSigma = this.walker.bandPassSigma == null ? annotation.bandPassSigma() : this.walker.bandPassSigma;
walkerHasPresetRegions = this.walker.hasPresetActiveRegions();
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
if ( walkerHasPresetRegions ) {
// we load all of the preset locations into the
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {

View File

@ -29,28 +29,12 @@ import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.util.*;
/**
*
* User: aaron
* Date: May 22, 2009
* Time: 10:54:40 AM
*
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
*
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*
*/
/**
* @author aaron
* <p/>
* Class GenomeLocCollection
* <p/>
@ -59,6 +43,10 @@ import java.util.*;
* will also remove a region from the list, if the region to remove is a
* partial interval of a region in the collection it will remove the region from
* that element.
*
* @author aaron
* Date: May 22, 2009
* Time: 10:54:40 AM
*/
public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
private static Logger logger = Logger.getLogger(GenomeLocSortedSet.class);
@ -66,26 +54,47 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
private GenomeLocParser genomeLocParser;
// our private storage for the GenomeLoc's
private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
private final List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
// cache this to make overlap checking much more efficient
private int previousOverlapSearchIndex = -1;
/** default constructor */
public GenomeLocSortedSet(GenomeLocParser parser) {
/**
* Create a new, empty GenomeLocSortedSet
*
* @param parser a non-null the parser we use to create genome locs
*/
public GenomeLocSortedSet(final GenomeLocParser parser) {
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
this.genomeLocParser = parser;
}
public GenomeLocSortedSet(GenomeLocParser parser,GenomeLoc e) {
/**
* Create a new GenomeLocSortedSet containing location e
*
* @param parser a non-null the parser we use to create genome locs
* @param e a single genome locs to add to this set
*/
public GenomeLocSortedSet(final GenomeLocParser parser, final GenomeLoc e) {
this(parser);
add(e);
}
public GenomeLocSortedSet(GenomeLocParser parser,Collection<GenomeLoc> l) {
/**
* Create a new GenomeLocSortedSet containing locations l
*
* The elements in l can be in any order, and can be overlapping. They will be sorted first and
* overlapping (but not contiguous) elements will be merged
*
* @param parser a non-null the parser we use to create genome locs
* @param l a collection of genome locs to add to this set
*/
public GenomeLocSortedSet(final GenomeLocParser parser, final Collection<GenomeLoc> l) {
this(parser);
for ( GenomeLoc e : l )
add(e);
final ArrayList<GenomeLoc> sorted = new ArrayList<GenomeLoc>(l);
Collections.sort(sorted);
mArray.addAll(IntervalUtils.mergeIntervalLocations(sorted, IntervalMergingRule.OVERLAPPING_ONLY));
}
/**
@ -198,9 +207,72 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
return returnValue;
}
/**
* Return a list of intervals overlapping loc
*
* @param loc the location we want overlapping intervals
* @return a non-null list of locations that overlap loc
*/
public List<GenomeLoc> getOverlapping(final GenomeLoc loc) {
// the max ensures that if loc would be the first element, that we start searching at the first element
final int index = Collections.binarySearch(mArray, loc);
if ( index >= 0 )
// we can safely return a singleton because overlapping regions are merged and loc is exactly in
// the set already
return Collections.singletonList(loc);
// if loc isn't in the list index is (-(insertion point) - 1). The insertion point is defined as the point at
// which the key would be inserted into the list: the index of the first element greater than the key, or list.size()
// -ins - 1 = index => -ins = index + 1 => ins = -(index + 1)
// Note that we look one before the index in this case, as loc might occur after the previous overlapping interval
final int start = Math.max(-(index + 1) - 1, 0);
final int size = mArray.size();
final List<GenomeLoc> overlapping = new LinkedList<GenomeLoc>();
for ( int i = start; i < size; i++ ) {
final GenomeLoc myLoc = mArray.get(i);
if ( loc.overlapsP(myLoc) )
overlapping.add(myLoc);
else if ( myLoc.isPast(loc) )
// since mArray is ordered, if myLoc is past loc that means all future
// intervals cannot overlap loc either. So we can safely abort the search
// note that we need to be a bit conservative on our tests since index needs to start
// at -1 the position of index, so it's possible that myLoc and loc don't overlap but the next
// position might
break;
}
return overlapping;
}
/**
* Return a list of intervals overlapping loc by enumerating all locs and testing for overlap
*
* Purely for testing purposes -- this is way to slow for any production code
*
* @param loc the location we want overlapping intervals
* @return a non-null list of locations that overlap loc
*/
protected List<GenomeLoc> getOverlappingFullSearch(final GenomeLoc loc) {
final List<GenomeLoc> overlapping = new LinkedList<GenomeLoc>();
// super slow, but definitely works
for ( final GenomeLoc myLoc : mArray ) {
if ( loc.overlapsP(myLoc) )
overlapping.add(myLoc);
}
return overlapping;
}
/**
* add a genomeLoc to the collection, simply inserting in order into the set
*
* TODO -- this may break the contract of the GenomeLocSortedSet if e overlaps or
* TODO -- other locations already in the set. This code should check to see if
* TODO -- e is overlapping with its nearby elements and merge them or alternatively
* TODO -- throw an exception
*
* @param e the GenomeLoc to add
*
* @return true
@ -225,6 +297,11 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
* Adds a GenomeLoc to the collection, merging it if it overlaps another region.
* If it's not overlapping then we add it in sorted order.
*
* TODO TODO TODO -- this function is buggy and will not properly create a sorted
* TODO TODO TODO -- genome loc is addRegion is called sequentially where the second
* TODO TODO TODO -- loc added is actually before the first. So when creating
* TODO TODO TODO -- sets make sure to sort the input locations first!
*
* @param e the GenomeLoc to add to the collection
*
* @return true, if the GenomeLoc could be added to the collection
@ -380,31 +457,4 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
return s.toString();
}
/**
* Check to see whether two genomeLocSortedSets are equal.
* Note that this implementation ignores the contigInfo object.
*
*/ /*
@Override
public boolean equals(Object other) {
if(other == null)
return false;
if(other instanceof GenomeLocSortedSet) {
// send to a list, so we can ensure order correct
List otherList = ((GenomeLocSortedSet)other).toList();
List thisList = this.toList();
if (otherList.size() != this.size())
return false;
for (Integer i=0;i<thisList.size();i++) {
if (otherList.get(i).equals(thisList.get(i)))
return false;
}
return true;
}
return false;
} */
}

View File

@ -30,15 +30,13 @@ import com.google.java.contract.Invariant;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.clipping.ReadClipper;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.*;
/**
* Represents a single active region created by the Active Region Traversal for processing
@ -382,4 +380,30 @@ public class ActiveRegion implements HasGenomeLocation {
public boolean isActive() {
return isActive;
}
/**
* Intersect this active region with the allowed intervals, returning a list of active regions
* that only contain locations present in intervals
*
* Note that the returned list may be empty, if this active region doesn't overlap the set at all
*
* @param intervals a non-null set of intervals that are allowed
* @return an ordered list of active region where each interval is contained within intervals
*/
@Ensures("result != null")
protected List<ActiveRegion> splitAndTrimToIntervals(final GenomeLocSortedSet intervals) {
final List<GenomeLoc> allOverlapping = intervals.getOverlapping(getLocation());
final List<ActiveRegion> clippedRegions = new LinkedList<ActiveRegion>();
for ( final GenomeLoc overlapping : allOverlapping ) {
final GenomeLoc subLoc = getLocation().intersect(overlapping);
final int subStart = subLoc.getStart() - getLocation().getStart();
final int subEnd = subStart + subLoc.size();
final List<ActivityProfileState> subStates = supportingStates.isEmpty() ? supportingStates : supportingStates.subList(subStart, subEnd);
final ActiveRegion clipped = new ActiveRegion( subLoc, subStates, isActive, genomeLocParser, extension );
clippedRegions.add(clipped);
}
return clippedRegions;
}
}

View File

@ -29,6 +29,7 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import java.util.*;
@ -45,6 +46,7 @@ public class ActivityProfile {
protected final List<ActivityProfileState> stateList;
protected final GenomeLocParser parser;
protected final GenomeLocSortedSet restrictToIntervals;
protected GenomeLoc regionStartLoc = null;
protected GenomeLoc regionStopLoc = null;
@ -60,10 +62,20 @@ public class ActivityProfile {
* @param parser the parser we can use to create genome locs, cannot be null
*/
public ActivityProfile(final GenomeLocParser parser) {
this(parser, null);
}
/**
* Create a empty ActivityProfile, restricting output to profiles overlapping intervals, if not null
* @param parser the parser we can use to create genome locs, cannot be null
* @param intervals only include states that are within these intervals, if not null
*/
public ActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet intervals) {
if ( parser == null ) throw new IllegalArgumentException("parser cannot be null");
this.parser = parser;
this.stateList = new ArrayList<ActivityProfileState>();
this.restrictToIntervals = intervals;
}
@Override
@ -224,7 +236,7 @@ public class ActivityProfile {
if ( position > size() )
// should we allow this? probably not
throw new IllegalArgumentException("Must add state contiguous to existing states");
throw new IllegalArgumentException("Must add state contiguous to existing states: adding " + stateToAdd);
if ( position >= 0 ) {
// ignore states starting before this regions start
@ -313,7 +325,10 @@ public class ActivityProfile {
if ( nextRegion == null )
return regions;
else {
regions.add(nextRegion);
if ( restrictToIntervals == null )
regions.add(nextRegion);
else
regions.addAll(nextRegion.splitAndTrimToIntervals(restrictToIntervals));
}
}
}

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.activeregion;
import com.google.java.contract.Ensures;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.MathUtils;
import java.util.ArrayList;
@ -53,25 +54,34 @@ public class BandPassActivityProfile extends ActivityProfile {
private final double[] GaussianKernel;
/**
* Create a new BandPassActivityProfile with default sigma and file sizes
* @param parser our genome loc parser
* Create a new BandPassActivityProfile with default sigma and filter sizes
*
* @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean)
*/
public BandPassActivityProfile(final GenomeLocParser parser) {
this(parser, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals) {
this(parser, restrictToIntervals, MAX_FILTER_SIZE, DEFAULT_SIGMA, true);
}
/**
* @see #BandPassActivityProfile(org.broadinstitute.sting.utils.GenomeLocParser, org.broadinstitute.sting.utils.GenomeLocSortedSet, int, double, boolean)
*
* sets adaptiveFilterSize to true
*/
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma) {
this(parser, restrictToIntervals, maxFilterSize, sigma, true);
}
/**
* Create an activity profile that implements a band pass filter on the states
*
* @param parser our genome loc parser
* @param restrictToIntervals only include states that are within these intervals, if not null
* @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma
* @param sigma the variance of the Gaussian kernel for this band pass filter
* @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size
*/
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) {
this(parser, maxFilterSize, sigma, true);
}
public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
super(parser);
public BandPassActivityProfile(final GenomeLocParser parser, final GenomeLocSortedSet restrictToIntervals, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) {
super(parser, restrictToIntervals);
if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma);

View File

@ -30,7 +30,9 @@ import org.broadinstitute.sting.commandline.ArgumentException;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
import org.broadinstitute.sting.gatk.walkers.readutils.PrintReads;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.Test;
import java.io.File;
@ -70,10 +72,12 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest {
@Test
public void testEmptyIntervalSetHandling() throws Exception {
GenomeLocParser genomeLocParser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000).getSequenceDictionary());
GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine();
testEngine.setWalker(new PrintReads());
testEngine.setIntervals(new GenomeLocSortedSet(null));
testEngine.setIntervals(new GenomeLocSortedSet(genomeLocParser));
testEngine.validateSuppliedIntervals();
}

View File

@ -28,18 +28,21 @@ package org.broadinstitute.sting.utils;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Iterator;
import java.util.Arrays;
import java.io.File;
import java.util.*;
/**
*
@ -321,4 +324,62 @@ public class GenomeLocSortedSetUnitTest extends BaseTest {
}
assertTrue(seqNumber == GenomeLocSortedSetUnitTest.NUMBER_OF_CHROMOSOMES);
}
// -----------------------------------------------------------------------------------------------
//
// Test getOverlapping
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "GetOverlapping")
public Object[][] makeGetOverlappingTest() throws Exception {
final GenomeLocParser genomeLocParser = new GenomeLocParser(new CachingIndexedFastaSequenceFile(new File(b37KGReference)));
List<Object[]> tests = new ArrayList<Object[]>();
final GenomeLoc prev1 = genomeLocParser.createGenomeLoc("19", 1, 10);
final GenomeLoc prev2 = genomeLocParser.createGenomeLoc("19", 20, 50);
final GenomeLoc post1 = genomeLocParser.createGenomeLoc("21", 1, 10);
final GenomeLoc post2 = genomeLocParser.createGenomeLoc("21", 20, 50);
final int chr20Length = genomeLocParser.getContigs().getSequence("20").getSequenceLength();
for ( final int regionStart : Arrays.asList(1, 10, chr20Length - 10, chr20Length) ) {
for ( final int regionSize : Arrays.asList(1, 10, 100) ) {
final GenomeLoc region = genomeLocParser.createGenomeLocOnContig("20", regionStart, regionStart + regionSize);
final GenomeLoc spanning = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, region.getStop() + 10);
final GenomeLoc before_into = genomeLocParser.createGenomeLocOnContig("20", regionStart - 10, regionStart + 1);
final GenomeLoc middle = genomeLocParser.createGenomeLocOnContig("20", regionStart + 1, regionStart + 2);
final GenomeLoc middle_past = genomeLocParser.createGenomeLocOnContig("20", region.getStop()-1, region.getStop()+10);
final List<GenomeLoc> potentials = new LinkedList<GenomeLoc>();
potentials.add(region);
if ( spanning != null ) potentials.add(spanning);
if ( before_into != null ) potentials.add(before_into);
if ( middle != null ) potentials.add(middle);
if ( middle_past != null ) potentials.add(middle_past);
for ( final int n : Arrays.asList(1, 2, 3) ) {
for ( final List<GenomeLoc> regions : Utils.makePermutations(potentials, n, false) ) {
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, regions), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, post1, post2)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, post1)), region});
tests.add(new Object[]{new GenomeLocSortedSet(genomeLocParser, Utils.append(regions, prev1, prev2, post1, post2)), region});
}
}
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "GetOverlapping")
public void testGetOverlapping(final GenomeLocSortedSet intervals, final GenomeLoc region) {
final List<GenomeLoc> expectedOverlapping = intervals.getOverlappingFullSearch(region);
final List<GenomeLoc> actualOverlapping = intervals.getOverlapping(region);
Assert.assertEquals(actualOverlapping, expectedOverlapping);
Assert.assertEquals(intervals.overlaps(region), ! expectedOverlapping.isEmpty(), "GenomeLocSortedSet.overlaps didn't return expected result");
}
}

View File

@ -34,6 +34,7 @@ import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -48,6 +49,7 @@ import java.util.*;
public class ActiveRegionUnitTest extends BaseTest {
private final static boolean DEBUG = true;
private GenomeLocParser genomeLocParser;
private IndexedFastaSequenceFile seq;
private String contig;
@ -88,7 +90,7 @@ public class ActiveRegionUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(enabled = true, dataProvider = "ActionRegionCreationTest")
@Test(enabled = !DEBUG, dataProvider = "ActionRegionCreationTest")
public void testCreatingActiveRegions(final GenomeLoc loc, final List<ActivityProfileState> supportingStates, final boolean isActive, final int extension) {
final ActiveRegion region = new ActiveRegion(loc, supportingStates, isActive, genomeLocParser, extension);
Assert.assertEquals(region.getLocation(), loc);
@ -141,7 +143,7 @@ public class ActiveRegionUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "ActiveRegionReads")
@Test(enabled = !DEBUG, dataProvider = "ActiveRegionReads")
public void testActiveRegionReads(final GenomeLoc loc, final GATKSAMRecord read) {
final GenomeLoc expectedSpan = loc.union(genomeLocParser.createGenomeLoc(read));
@ -197,6 +199,12 @@ public class ActiveRegionUnitTest extends BaseTest {
Assert.assertTrue(region.getReads().get(0).getAlignmentEnd() <= region.getExtendedLoc().getStop());
}
// -----------------------------------------------------------------------------------------------
//
// Make sure bad inputs are properly detected
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "BadReadsTest")
public Object[][] makeBadReadsTest() {
List<Object[]> tests = new ArrayList<Object[]>();
@ -213,11 +221,100 @@ public class ActiveRegionUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
@Test(enabled = !DEBUG, dataProvider = "BadReadsTest", expectedExceptions = IllegalArgumentException.class)
public void testBadReads(final GATKSAMRecord read1, final GATKSAMRecord read2) {
final GenomeLoc loc = genomeLocParser.createGenomeLoc(read1);
final ActiveRegion region = new ActiveRegion(loc, null, true, genomeLocParser, 0);
region.add(read1);
region.add(read2);
}
// -----------------------------------------------------------------------------------------------
//
// Make sure we can properly cut up an active region based on engine intervals
//
// -----------------------------------------------------------------------------------------------
@DataProvider(name = "SplitActiveRegion")
public Object[][] makeSplitActiveRegion() {
List<Object[]> tests = new ArrayList<Object[]>();
final GenomeLoc whole_span = genomeLocParser.createGenomeLoc("20", 1, 500);
final GenomeLoc gl_before = genomeLocParser.createGenomeLoc("20", 1, 9);
final GenomeLoc gl_after = genomeLocParser.createGenomeLoc("20", 250, 500);
final GenomeLoc gl_diff_contig = genomeLocParser.createGenomeLoc("19", 40, 50);
final int regionStart = 10;
final int regionStop = 100;
final GenomeLoc region = genomeLocParser.createGenomeLoc("20", regionStart, regionStop);
for ( final GenomeLoc noEffect : Arrays.asList(whole_span) )
tests.add(new Object[]{
region,
Arrays.asList(noEffect),
Arrays.asList(region)});
for ( final GenomeLoc noOverlap : Arrays.asList(gl_before, gl_after, gl_diff_contig) )
tests.add(new Object[]{
region,
Arrays.asList(noOverlap),
Arrays.asList()});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 5, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, 200)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", 50, regionStop))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", 40, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", 20, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 70, 200)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 70, regionStop))});
tests.add(new Object[]{region,
Arrays.asList(genomeLocParser.createGenomeLoc("20", 1, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, 200)),
Arrays.asList(genomeLocParser.createGenomeLoc("20", regionStart, 30), genomeLocParser.createGenomeLoc("20", 40, 50), genomeLocParser.createGenomeLoc("20", 70, regionStop))});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "SplitActiveRegion")
public void testSplitActiveRegion(final GenomeLoc regionLoc, final List<GenomeLoc> intervalLocs, final List<GenomeLoc> expectedRegionLocs) {
for ( final boolean addSubstates : Arrays.asList(true, false) ) {
final List<ActivityProfileState> states;
if ( addSubstates ) {
states = new LinkedList<ActivityProfileState>();
for ( int i = 0; i < regionLoc.size(); i++ )
states.add(new ActivityProfileState(genomeLocParser.createGenomeLoc(regionLoc.getContig(), regionLoc.getStart() + i), 1.0));
} else {
states = null;
}
final ActiveRegion region = new ActiveRegion(regionLoc, states, true, genomeLocParser, 0);
final GenomeLocSortedSet intervals = new GenomeLocSortedSet(genomeLocParser, intervalLocs);
final List<ActiveRegion> regions = region.splitAndTrimToIntervals(intervals);
Assert.assertEquals(regions.size(), expectedRegionLocs.size(), "Wrong number of split locations");
for ( int i = 0; i < expectedRegionLocs.size(); i++ ) {
final GenomeLoc expected = expectedRegionLocs.get(i);
final ActiveRegion actual = regions.get(i);
Assert.assertEquals(actual.getLocation(), expected, "Bad region after split");
Assert.assertEquals(actual.isActive(), region.isActive());
Assert.assertEquals(actual.getExtension(), region.getExtension());
}
}
}
}

View File

@ -89,7 +89,7 @@ public class ActivityProfileUnitTest extends BaseTest {
case Base: return new ActivityProfile(genomeLocParser);
case BandPass:
// zero size => equivalent to ActivityProfile
return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false);
return new BandPassActivityProfile(genomeLocParser, null, 0, 0.01, false);
default: throw new IllegalStateException(type.toString());
}
}

View File

@ -87,7 +87,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test(enabled = ! DEBUG, dataProvider = "BandPassBasicTest")
public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, sigma, false);
final int expectedBandSize = bandPassSize * 2 + 1;
Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size");
@ -142,7 +142,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test( enabled = ! DEBUG, dataProvider = "BandPassComposition")
public void testBandPassComposition(final int bandPassSize, final int integrationLength) {
final int start = 1;
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA);
final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2];
// add a buffer so that we can get all of the band pass values
@ -215,7 +215,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
@Test( enabled = ! DEBUG, dataProvider = "KernelCreation")
public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) {
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true);
final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, null, maxSize, sigma, true);
final double[] kernel = profile.getKernel();
Assert.assertEquals(kernel.length, expectedKernel.length);
@ -255,8 +255,8 @@ public class BandPassActivityProfileUnitTest extends BaseTest {
final Pair<VCFHeader, GATKVCFUtils.VCIterable> reader = GATKVCFUtils.readAllVCs(file, codec);
final List<ActiveRegion> incRegions = new ArrayList<ActiveRegion>();
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser);
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser);
final BandPassActivityProfile incProfile = new BandPassActivityProfile(genomeLocParser, null);
final BandPassActivityProfile fullProfile = new BandPassActivityProfile(genomeLocParser, null);
int pos = start;
for ( final VariantContext vc : reader.getSecond() ) {
if ( vc == null ) continue;