Cleanup ART
-- Initialize routine captures essential information for running the traversal
This commit is contained in:
parent
9b2be795a7
commit
02130dfde7
|
|
@ -213,7 +213,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
|
||||
// Now that we have a progress meter, go through and initialize the traversal engines
|
||||
for ( final TraversalEngine traversalEngine : allCreatedTraversalEngines )
|
||||
traversalEngine.initialize(engine, progressMeter);
|
||||
traversalEngine.initialize(engine, walker, progressMeter);
|
||||
|
||||
// JMX does not allow multiple instances with the same ObjectName to be registered with the same platform MXBean.
|
||||
// To get around this limitation and since we have no job identifier at this point, register a simple counter that
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
* @param engine GenomeAnalysisEngine for this traversal
|
||||
* @param progressMeter An optional (null == optional) meter to track our progress
|
||||
*/
|
||||
public void initialize(final GenomeAnalysisEngine engine, final ProgressMeter progressMeter) {
|
||||
public void initialize(final GenomeAnalysisEngine engine, final Walker walker, final ProgressMeter progressMeter) {
|
||||
if ( engine == null )
|
||||
throw new ReviewedStingException("BUG: GenomeAnalysisEngine cannot be null!");
|
||||
|
||||
|
|
@ -87,8 +87,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
*
|
||||
* @param engine
|
||||
*/
|
||||
protected void initialize(final GenomeAnalysisEngine engine) {
|
||||
initialize(engine, null);
|
||||
protected void initialize(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
initialize(engine, walker, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -39,6 +40,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.LinkedList;
|
||||
|
|
@ -52,9 +54,11 @@ import java.util.List;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public abstract class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,LocusShardDataProvider> {
|
||||
protected final static boolean DEBUG = false;
|
||||
|
||||
// set by the tranversal
|
||||
protected int activeRegionExtension = -1;
|
||||
protected int maxRegionSize = -1;
|
||||
private int activeRegionExtension = -1;
|
||||
private int maxRegionSize = -1;
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
|
|
@ -64,11 +68,32 @@ public abstract class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Ac
|
|||
|
||||
abstract protected T processActiveRegion(final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M, T> walker);
|
||||
|
||||
protected int getActiveRegionExtension() {
|
||||
return activeRegionExtension;
|
||||
}
|
||||
|
||||
protected int getMaxRegionSize() {
|
||||
return maxRegionSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void initialize(GenomeAnalysisEngine engine, Walker walker, ProgressMeter progressMeter) {
|
||||
super.initialize(engine, walker, progressMeter);
|
||||
activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ActiveRegionWalker arWalker = (ActiveRegionWalker)walker;
|
||||
if ( arWalker.wantsExtendedReads() && ! arWalker.wantsNonPrimaryReads() ) {
|
||||
throw new IllegalArgumentException("Active region walker " + arWalker + " requested extended events but not " +
|
||||
"non-primary reads, an inconsistent state. Please modify the walker");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the loc outside of the intervals being requested for processing by the GATK?
|
||||
* @param loc
|
||||
|
|
@ -85,19 +110,15 @@ public abstract class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Ac
|
|||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
protected ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
final List<ActiveRegion> activeRegions) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( getActiveRegionExtension(), getMaxRegionSize() ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
|
@ -161,7 +182,7 @@ public abstract class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Ac
|
|||
}
|
||||
|
||||
protected boolean regionCompletelyWithinDeadZone(final GenomeLoc region, final boolean includeExtension) {
|
||||
return (region.getStop() < (getStartOfLiveRegion().getStart() - (includeExtension ? activeRegionExtension : 0)))
|
||||
return (region.getStop() < (getStartOfLiveRegion().getStart() - (includeExtension ? getActiveRegionExtension() : 0)))
|
||||
|| ! region.onSameContig(getStartOfLiveRegion());
|
||||
}
|
||||
|
||||
|
|
@ -172,7 +193,7 @@ public abstract class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Ac
|
|||
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||
if ( forceRegionsToBeActive || regionCompletelyWithinDeadZone(extendedLoc, false) ) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
logger.warn("Processing active region " + activeRegion + " dead zone " + getStartOfLiveRegion());
|
||||
if ( DEBUG ) logger.warn("Processing active region " + activeRegion + " dead zone " + getStartOfLiveRegion());
|
||||
sum = processActiveRegion( activeRegion, sum, walker );
|
||||
} else {
|
||||
break;
|
||||
|
|
@ -190,15 +211,18 @@ public abstract class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Ac
|
|||
return processActiveRegions((ActiveRegionWalker<M, T>)walker, sum, true);
|
||||
}
|
||||
|
||||
// todo -- remove me
|
||||
protected ActiveRegion getBestRegion(final ActiveRegion activeRegion, final GenomeLoc readLoc) {
|
||||
long minStart = activeRegion.getLocation().getStart();
|
||||
ActiveRegion bestRegion = activeRegion;
|
||||
long maxOverlap = activeRegion.getLocation().sizeOfOverlap( readLoc );
|
||||
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(readLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( readLoc );
|
||||
if( otherRegionToTest.getLocation().getStart() < minStart ) {
|
||||
minStart = otherRegionToTest.getLocation().getStart();
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
|
||||
return bestRegion;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import net.sf.samtools.SAMRecord;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
|
|
@ -47,18 +48,26 @@ import java.util.*;
|
|||
|
||||
public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M,T> {
|
||||
private LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
private Shard lastShard = null;
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
if ( DEBUG ) logger.warn(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final HashSet<GATKSAMRecord> maybeDuplicatedReads = new HashSet<GATKSAMRecord>();
|
||||
// TODO -- there's got to be a better way to know this
|
||||
if ( lastShard != dataProvider.getShard() ) {
|
||||
maybeDuplicatedReads.addAll(myReads);
|
||||
logger.info("Crossing shard boundary requires us to check for duplicates against " + maybeDuplicatedReads.size() + " reads");
|
||||
if ( DEBUG ) logger.warn("Clearing myReads");
|
||||
}
|
||||
lastShard = dataProvider.getShard();
|
||||
|
||||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
|
||||
|
|
@ -77,7 +86,15 @@ public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M
|
|||
final Collection<SAMRecord> reads = locusView.getLIBS().transferReadsFromAllPreviousPileups();
|
||||
for( final SAMRecord read : reads ) {
|
||||
notifyOfCurrentPosition((GATKSAMRecord)read);
|
||||
myReads.add((GATKSAMRecord)read);
|
||||
// most of the time maybeDuplicatedReads is empty
|
||||
// TODO -- I believe that because of the ordering of reads that as soon as we don't find a read in the
|
||||
// TODO -- potential list of duplicates we can clear the hashset
|
||||
if ( ! maybeDuplicatedReads.isEmpty() && maybeDuplicatedReads.contains(read) ) {
|
||||
if ( DEBUG ) logger.warn("Skipping duplicated " + read.getReadName());
|
||||
} else {
|
||||
if ( DEBUG ) logger.warn("Adding read " + read.getReadName() + " at " + engine.getGenomeLocParser().createGenomeLoc(read) + " from provider " + dataProvider);
|
||||
myReads.add((GATKSAMRecord)read);
|
||||
}
|
||||
}
|
||||
|
||||
// skip this location -- it's not part of our engine intervals
|
||||
|
|
@ -86,7 +103,7 @@ public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M
|
|||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
profile = incorporateActiveRegions(profile, activeRegions);
|
||||
}
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
|
@ -109,7 +126,7 @@ public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M
|
|||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
incorporateActiveRegions(profile, activeRegions);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
|
|
@ -117,10 +134,10 @@ public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M
|
|||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= getMaxRegionSize() ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
|
||||
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), getActiveRegionExtension()) );
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
|
|
@ -139,50 +156,32 @@ public class TraverseActiveRegionsOptimized<M,T> extends TraverseActiveRegions<M
|
|||
return "TraverseActiveRegionsOptimized";
|
||||
}
|
||||
|
||||
// TODO -- remove me when we fix the traversal
|
||||
private final void addToRegion(final ActiveRegion region, final GATKSAMRecord read) {
|
||||
if ( ! region.getReads().contains(read) )
|
||||
region.add(read);
|
||||
private boolean readIsDead(final GATKSAMRecord read, final GenomeLoc readLoc, final ActiveRegion activeRegion) {
|
||||
return readLoc.getStop() < activeRegion.getLocation().getStart() && regionCompletelyWithinDeadZone(readLoc, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected T processActiveRegion(final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M, T> walker) {
|
||||
final Iterator<GATKSAMRecord> liveReads = myReads.iterator();
|
||||
while ( liveReads.hasNext() ) {
|
||||
boolean killed = false;
|
||||
final GATKSAMRecord read = liveReads.next();
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
|
||||
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
// TODO -- this test assumes that we've successfully defined all regions that might be
|
||||
// TODO -- the primary home for read. Doesn't seem safe to me
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
final ActiveRegion bestRegion = getBestRegion(activeRegion, readLoc);
|
||||
addToRegion(bestRegion, read);
|
||||
activeRegion.add(read);
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(activeRegion) ) {
|
||||
addToRegion(activeRegion, read);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
|
||||
addToRegion(otherRegionToTest, read);
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
|
||||
addToRegion(otherRegionToTest, read);
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( ! walker.wantsNonPrimaryReads() ) {
|
||||
if ( DEBUG ) logger.warn("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + getStartOfLiveRegion());
|
||||
liveReads.remove();
|
||||
killed = true;
|
||||
}
|
||||
// check for non-primary vs. extended
|
||||
} else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
|
||||
activeRegion.add( read );
|
||||
}
|
||||
|
||||
if ( regionCompletelyWithinDeadZone(readLoc, true) ) {
|
||||
logger.info("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + getStartOfLiveRegion());
|
||||
if ( ! killed && readIsDead(read, readLoc, activeRegion) ) {
|
||||
if ( DEBUG ) logger.warn("Removing read " + read.getReadName() + " at " + readLoc + " with dead zone start " + getStartOfLiveRegion());
|
||||
liveReads.remove();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,8 +40,6 @@ public class TraverseActiveRegionsOriginal<M,T> extends TraverseActiveRegions<M,
|
|||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
int minStart = Integer.MAX_VALUE;
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
|
|
@ -77,7 +75,7 @@ public class TraverseActiveRegionsOriginal<M,T> extends TraverseActiveRegions<M,
|
|||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
profile = incorporateActiveRegions(profile, activeRegions);
|
||||
}
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
|
@ -100,7 +98,7 @@ public class TraverseActiveRegionsOriginal<M,T> extends TraverseActiveRegions<M,
|
|||
updateCumulativeMetrics(dataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
incorporateActiveRegions(profile, activeRegions);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
|
|
@ -108,10 +106,10 @@ public class TraverseActiveRegionsOriginal<M,T> extends TraverseActiveRegions<M,
|
|||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= getMaxRegionSize() ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
|
||||
workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), getActiveRegionExtension()) );
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
|||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
|
|
@ -148,6 +149,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
//reference = new CachingIndexedFastaSequenceFile(new File("/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta")); // hg19Reference));
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
|
@ -213,7 +215,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
private List<GenomeLoc> getIsActiveIntervals(final TraverseActiveRegions t, DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, intervals, testBAM)) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, testBAM)) {
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
activeIntervals.addAll(walker.isActiveCalls);
|
||||
}
|
||||
|
|
@ -308,40 +310,40 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// simple: Primary in 1:1-999
|
||||
// overlap_equal: Primary in 1:1-999
|
||||
// overlap_unequal: Primary in 1:1-999
|
||||
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_1_pre: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// boundary_1_post: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Primary in 1:1-999, Non-Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(t, walker, intervals);
|
||||
ActiveRegion region;
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
|
||||
verifyReadMapping(region, "simple", "overlap_equal", "overlap_unequal");
|
||||
verifyReadMapping(region, "simple", "overlap_equal", "overlap_unequal", "extended_and_np");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
|
||||
verifyReadMapping(region, "boundary_unequal", "extended_and_np", "boundary_1_pre");
|
||||
verifyReadMapping(region, "boundary_unequal", "boundary_1_pre", "boundary_equal", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
|
||||
verifyReadMapping(region);
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre");
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
|
||||
verifyReadMapping(region);
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider")
|
||||
@Test(enabled = true, dataProvider = "TraversalEngineProvider")
|
||||
public void testNonPrimaryReadMapping(TraverseActiveRegions t) {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
|
||||
|
|
@ -354,15 +356,15 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// simple: Primary in 1:1-999
|
||||
// overlap_equal: Primary in 1:1-999
|
||||
// overlap_unequal: Primary in 1:1-999
|
||||
// boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_1_pre: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// boundary_1_post: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
|
||||
// extended_and_np: Primary in 1:1-999, Non-Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(t, walker, intervals);
|
||||
|
|
@ -387,7 +389,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "TraversalEngineProvider")
|
||||
@Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider")
|
||||
public void testExtendedReadMapping(TraverseActiveRegions t) {
|
||||
DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
|
||||
EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
|
||||
|
|
@ -457,7 +459,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(TraverseActiveRegions t, DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, intervals, testBAM))
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, testBAM))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
t.endTraversal(walker, 0);
|
||||
|
|
@ -521,10 +523,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return record;
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(TraverseActiveRegions t, List<GenomeLoc> intervals, String bamFile) {
|
||||
private List<LocusShardDataProvider> createDataProviders(TraverseActiveRegions t, final Walker walker, List<GenomeLoc> intervals, String bamFile) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine);
|
||||
t.initialize(engine, walker);
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public class TraverseDuplicatesUnitTest extends BaseTest {
|
|||
engine.setReferenceDataSource(refFile);
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
|
||||
obj.initialize(engine);
|
||||
obj.initialize(engine, null);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
countReadWalker = new CountReads();
|
||||
|
||||
traversalEngine = new TraverseReadsNano(1);
|
||||
traversalEngine.initialize(engine);
|
||||
traversalEngine.initialize(engine, countReadWalker);
|
||||
}
|
||||
|
||||
/** Test out that we can shard the file and iterate over every read */
|
||||
|
|
|
|||
Loading…
Reference in New Issue