Implement ActiveRegionTraversal RefMetaDataTracker for map call; HaplotypeCaller now annotates ID from dbSNP
-- Reuse infrastructure for RODs for reads to implement general IntervalReferenceOrderedView so that both TraverseReads and TraverseActiveRegions can use the same underlying infrastructure -- TraverseActiveRegions now provides a meaningful RefMetaDataTracker to ActiveRegionWalker.map -- Cleanup misc. code as it came up -- Resolves GSA-808: Write general utility code to do rsID allele matching, hook up to UG and HC
This commit is contained in:
parent
0d593cff70
commit
1c03ebc82d
|
|
@ -49,6 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||
|
|
@ -146,6 +147,7 @@ public class GenotypingEngine {
|
|||
final GenomeLoc refLoc,
|
||||
final GenomeLoc activeRegionWindow,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final RefMetaDataTracker tracker,
|
||||
final List<VariantContext> activeAllelesToGenotype ) {
|
||||
// sanity check input arguments
|
||||
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||
|
|
@ -204,7 +206,7 @@ public class GenotypingEngine {
|
|||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||
|
||||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
||||
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
|
||||
|
||||
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||
|
|
|
|||
|
|
@ -441,7 +441,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
private final static int MIN_READ_LENGTH = 10;
|
||||
|
||||
private List<String> samplesList = new ArrayList<String>();
|
||||
private final List<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
|
||||
|
||||
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
||||
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
||||
|
|
@ -596,7 +595,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext vcFromAllelesRod = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), false, logger, UG_engine.getUAC().alleles);
|
||||
if( vcFromAllelesRod != null ) {
|
||||
allelesToGenotype.add(vcFromAllelesRod); // save for later for processing during the ActiveRegion's map call. Should be folded into a RefMetaDataTracker object
|
||||
return new ActivityProfileState(ref.getLocus(), 1.0);
|
||||
}
|
||||
}
|
||||
|
|
@ -664,12 +662,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
final List<VariantContext> activeAllelesToGenotype = new ArrayList<>();
|
||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||
for( final VariantContext vc : allelesToGenotype ) {
|
||||
if( originalActiveRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) {
|
||||
for ( final VariantContext vc : metaDataTracker.getValues(UG_engine.getUAC().alleles) ) {
|
||||
if ( vc.isNotFiltered() ) {
|
||||
activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode
|
||||
}
|
||||
}
|
||||
allelesToGenotype.removeAll( activeAllelesToGenotype );
|
||||
// No alleles found in this region so nothing to do!
|
||||
if ( activeAllelesToGenotype.isEmpty() ) { return NO_CALLS; }
|
||||
} else {
|
||||
|
|
@ -704,6 +701,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
assemblyResult.paddedReferenceLoc,
|
||||
assemblyResult.regionForGenotyping.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
metaDataTracker,
|
||||
activeAllelesToGenotype );
|
||||
|
||||
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
||||
|
|
@ -890,8 +888,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Override
|
||||
public Integer reduce(List<VariantContext> callsInRegion, Integer numCalledRegions) {
|
||||
for( final VariantContext call : callsInRegion ) {
|
||||
// TODO -- uncomment this line once ART-based walkers have a proper RefMetaDataTracker.
|
||||
// annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call);
|
||||
vcfWriter.add( call );
|
||||
}
|
||||
return (callsInRegion.isEmpty() ? 0 : 1) + numCalledRegions;
|
||||
|
|
|
|||
|
|
@ -47,15 +47,12 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broad.tribble.TribbleIndexedFeatureReader;
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -69,6 +66,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||
final static String NA12878_PCRFREE = privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam";
|
||||
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
|
||||
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||
|
||||
|
|
@ -199,4 +197,27 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
Arrays.asList("86bdd07a3ac4f6ce239c30efea8bf5ba"));
|
||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// test dbSNP annotation
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("7b23a288a31cafca3946f14f2381e7cb"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void HCTestDBSNPAnnotationWEx() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
|
||||
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
|
||||
Arrays.asList("9587029b702bb59bd4dfec69eac4c210"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,8 +72,6 @@ class IntervalOverlappingRODsFromStream {
|
|||
/**
|
||||
* Get the list of RODs overlapping loc from this stream of RODs.
|
||||
*
|
||||
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
|
||||
*
|
||||
* @param loc the interval to query
|
||||
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
|
||||
*/
|
||||
|
|
@ -84,7 +82,6 @@ class IntervalOverlappingRODsFromStream {
|
|||
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
|
||||
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
|
||||
|
||||
trimCurrentFeaturesToLoc(loc);
|
||||
readOverlappingFutureFeatures(loc);
|
||||
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
|
||||
}
|
||||
|
|
@ -128,11 +125,14 @@ class IntervalOverlappingRODsFromStream {
|
|||
/**
|
||||
* Update function. Remove all elements of currentFeatures that end before loc
|
||||
*
|
||||
* Must be called by clients periodically when they know they they will never ask for data before
|
||||
* loc, so that the running cache of RODs doesn't grow out of control.
|
||||
*
|
||||
* @param loc the location to use
|
||||
*/
|
||||
@Requires("loc != null")
|
||||
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
|
||||
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
||||
public void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
||||
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
|
||||
while ( it.hasNext() ) {
|
||||
final GATKFeature feature = it.next();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.util.PeekableIterator;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* a ROD view that allows for requests for RODs that overlap intervals on the genome to produce a RefMetaDataTracker
|
||||
*/
|
||||
public class IntervalReferenceOrderedView implements ReferenceOrderedView {
|
||||
/** a list of the RMDDataState (location->iterators) */
|
||||
private final List<RMDDataState> states = new ArrayList<>(1);
|
||||
|
||||
/**
|
||||
* Used to get genome locs for reads
|
||||
*/
|
||||
protected final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The total extent of all reads in this span. We create iterators from our RODs
|
||||
* from the start of this span, to the end.
|
||||
*/
|
||||
private final GenomeLoc shardSpan;
|
||||
|
||||
/**
|
||||
* Create a new IntervalReferenceOrderedView taking data from provider and capable of
|
||||
* servicing ROD overlap requests within the genomic interval span
|
||||
*
|
||||
* @param provider a ShardDataProvider to give us data
|
||||
* @param span a GenomeLoc span, or null indicating take the entire genome
|
||||
*/
|
||||
public IntervalReferenceOrderedView(final ShardDataProvider provider, final GenomeLoc span) {
|
||||
if ( provider == null ) throw new IllegalArgumentException("provider cannot be null");
|
||||
if ( provider.hasReferenceOrderedData() && span == null ) throw new IllegalArgumentException("span cannot be null when provider has reference ordered data");
|
||||
|
||||
this.genomeLocParser = provider.getGenomeLocParser();
|
||||
this.shardSpan = span;
|
||||
provider.register(this);
|
||||
|
||||
// conditional to optimize the case where we don't have any ROD data
|
||||
if ( provider.hasReferenceOrderedData() && ! shardSpan.isUnmapped() ) {
|
||||
for (final ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Testing constructor
|
||||
*/
|
||||
protected IntervalReferenceOrderedView(final GenomeLocParser genomeLocParser,
|
||||
final GenomeLoc shardSpan,
|
||||
final List<String> names,
|
||||
final List<PeekableIterator<RODRecordList>> featureSources) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.shardSpan = shardSpan;
|
||||
for ( int i = 0; i < names.size(); i++ )
|
||||
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
|
||||
}
|
||||
|
||||
public Collection<Class<? extends View>> getConflictingViews() {
|
||||
List<Class<? extends View>> classes = new ArrayList<>();
|
||||
classes.add(ManagingReferenceOrderedView.class);
|
||||
return classes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a RefMetaDataTracker containing bindings for all RODs overlapping the start position of loc
|
||||
* @param loc a GenomeLoc of size == 1
|
||||
* @return a non-null RefMetaDataTracker
|
||||
*/
|
||||
@Override
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus(GenomeLoc loc) {
|
||||
if ( loc == null ) throw new IllegalArgumentException("loc cannot be null");
|
||||
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
|
||||
return getReferenceOrderedDataForInterval(loc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a RefMetaDataTracker containing bindings for all RODs overlapping interval
|
||||
*
|
||||
* @param interval a non=null interval
|
||||
* @return a non-null RefMetaDataTracker
|
||||
*/
|
||||
public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
|
||||
if ( interval == null ) throw new IllegalArgumentException("Interval cannot be null");
|
||||
|
||||
if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers)
|
||||
return RefMetaDataTracker.EMPTY_TRACKER;
|
||||
else {
|
||||
final List<RODRecordList> bindings = new ArrayList<>(states.size());
|
||||
for ( final RMDDataState state : states )
|
||||
bindings.add(state.stream.getOverlapping(interval));
|
||||
return new RefMetaDataTracker(bindings);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Trim down all of the ROD managers so that they only hold ROD bindings wit start >= startOfDataToKeep.getStart()
|
||||
*
|
||||
* @param startOfDataToKeep a non-null genome loc
|
||||
*/
|
||||
public void trimCurrentFeaturesToLoc(final GenomeLoc startOfDataToKeep) {
|
||||
if ( startOfDataToKeep == null ) throw new IllegalArgumentException("startOfDataToKeep cannot be null");
|
||||
|
||||
for ( final RMDDataState state : states )
|
||||
state.stream.trimCurrentFeaturesToLoc(startOfDataToKeep);
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
public void close() {
|
||||
for (final RMDDataState state : states)
|
||||
state.close();
|
||||
|
||||
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
||||
states.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Models the traversal state of a given ROD lane.
|
||||
*/
|
||||
private static class RMDDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final IntervalOverlappingRODsFromStream stream;
|
||||
private final LocationAwareSeekableRODIterator iterator;
|
||||
|
||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<>(iterator));
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing
|
||||
*/
|
||||
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
|
||||
this.dataSource = null;
|
||||
this.iterator = null;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<>(iterator));
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if ( dataSource != null )
|
||||
dataSource.close( iterator );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +76,8 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
* @param loc Locus at which to track.
|
||||
* @return A tracker containing information about this locus.
|
||||
*/
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
@Override
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
if ( states.isEmpty() )
|
||||
return RefMetaDataTracker.EMPTY_TRACKER;
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -42,52 +42,9 @@ import java.util.Collection;
|
|||
import java.util.List;
|
||||
|
||||
/** a ROD view for reads. This provides the Read traversals a way of getting a RefMetaDataTracker */
|
||||
public class ReadBasedReferenceOrderedView implements View {
|
||||
// a list of the RMDDataState (location->iterators)
|
||||
private final List<RMDDataState> states = new ArrayList<RMDDataState>(1);
|
||||
private final static RefMetaDataTracker EMPTY_TRACKER = new RefMetaDataTracker();
|
||||
|
||||
/**
|
||||
* Used to get genome locs for reads
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The total extent of all reads in this span. We create iterators from our RODs
|
||||
* from the start of this span, to the end.
|
||||
*/
|
||||
private final GenomeLoc shardSpan;
|
||||
|
||||
public class ReadBasedReferenceOrderedView extends IntervalReferenceOrderedView {
|
||||
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
|
||||
this.genomeLocParser = provider.getGenomeLocParser();
|
||||
// conditional to optimize the case where we don't have any ROD data
|
||||
this.shardSpan = provider.getReferenceOrderedData() != null ? ((ReadShard)provider.getShard()).getReadsSpan() : null;
|
||||
provider.register(this);
|
||||
|
||||
if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) {
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Testing constructor
|
||||
*/
|
||||
protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser,
|
||||
final GenomeLoc shardSpan,
|
||||
final List<String> names,
|
||||
final List<PeekableIterator<RODRecordList>> featureSources) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.shardSpan = shardSpan;
|
||||
for ( int i = 0; i < names.size(); i++ )
|
||||
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
|
||||
}
|
||||
|
||||
public Collection<Class<? extends View>> getConflictingViews() {
|
||||
List<Class<? extends View>> classes = new ArrayList<Class<? extends View>>();
|
||||
classes.add(ManagingReferenceOrderedView.class);
|
||||
return classes;
|
||||
super(provider, provider.hasReferenceOrderedData() ? ((ReadShard)provider.getShard()).getReadsSpan() : null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -101,60 +58,11 @@ public class ReadBasedReferenceOrderedView implements View {
|
|||
@Ensures("result != null")
|
||||
public RefMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
|
||||
if ( rec.getReadUnmappedFlag() )
|
||||
// empty RODs for unmapped reads
|
||||
return new RefMetaDataTracker();
|
||||
else
|
||||
return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec));
|
||||
}
|
||||
|
||||
@Requires({"interval != null", "shardSpan == null || shardSpan.isUnmapped() || shardSpan.containsP(interval)"})
|
||||
@Ensures("result != null")
|
||||
public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
|
||||
if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers)
|
||||
return EMPTY_TRACKER;
|
||||
return RefMetaDataTracker.EMPTY_TRACKER;
|
||||
else {
|
||||
final List<RODRecordList> bindings = new ArrayList<RODRecordList>(states.size());
|
||||
for ( final RMDDataState state : states )
|
||||
bindings.add(state.stream.getOverlapping(interval));
|
||||
return new RefMetaDataTracker(bindings);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
public void close() {
|
||||
for (final RMDDataState state : states)
|
||||
state.close();
|
||||
|
||||
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
||||
states.clear();
|
||||
}
|
||||
|
||||
/** Models the traversal state of a given ROD lane. */
|
||||
private static class RMDDataState {
|
||||
public final ReferenceOrderedDataSource dataSource;
|
||||
public final IntervalOverlappingRODsFromStream stream;
|
||||
private final LocationAwareSeekableRODIterator iterator;
|
||||
|
||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||
this.dataSource = dataSource;
|
||||
this.iterator = iterator;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<RODRecordList>(iterator));
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing
|
||||
*/
|
||||
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
|
||||
this.dataSource = null;
|
||||
this.iterator = null;
|
||||
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<RODRecordList>(iterator));
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if ( dataSource != null )
|
||||
dataSource.close( iterator );
|
||||
final GenomeLoc readSpan = genomeLocParser.createGenomeLoc(rec);
|
||||
trimCurrentFeaturesToLoc(readSpan);
|
||||
return getReferenceOrderedDataForInterval(readSpan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,10 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public interface ReferenceOrderedView extends View {
|
||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
|
||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -98,7 +98,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
rodQueue = new RODMergingIterator(iterators);
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
@Override
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { allTracksHere.add(interval); }
|
||||
return new RefMetaDataTracker(allTracksHere);
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
|||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
||||
|
|
@ -114,12 +113,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
done = walker.isDone();
|
||||
}
|
||||
|
||||
// Special function call to empty out the work queue. Ugly for now but will be cleaned up when we eventually push this functionality more into the engine
|
||||
if( traversalEngine instanceof TraverseActiveRegions) {
|
||||
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
|
||||
Object result = accumulator.finishTraversal();
|
||||
|
||||
outputTracker.close();
|
||||
|
|
|
|||
|
|
@ -29,14 +29,12 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
|
@ -92,12 +90,26 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
|
||||
private TAROrderedReadCache myReads = null;
|
||||
|
||||
private GenomeLoc lastRegionProcessed = null;
|
||||
private GenomeLoc spanOfLastReadSeen = null;
|
||||
private ActivityProfile activityProfile = null;
|
||||
int maxReadsInMemory = 0;
|
||||
ActiveRegionWalker<M, T> walker;
|
||||
|
||||
final NanoScheduler<ActiveRegion, M, T> nanoScheduler;
|
||||
final NanoScheduler<MapData, M, T> nanoScheduler;
|
||||
|
||||
/**
|
||||
* Data to use in the ActiveRegionWalker.map function produced by the NanoScheduler input iterator
|
||||
*/
|
||||
private static class MapData {
|
||||
public ActiveRegion activeRegion;
|
||||
public RefMetaDataTracker tracker;
|
||||
|
||||
private MapData(ActiveRegion activeRegion, RefMetaDataTracker tracker) {
|
||||
this.activeRegion = activeRegion;
|
||||
this.tracker = tracker;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a single threaded active region traverser
|
||||
|
|
@ -112,12 +124,12 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
*/
|
||||
public TraverseActiveRegions(final int nThreads) {
|
||||
nanoScheduler = new NanoScheduler<>(nThreads);
|
||||
nanoScheduler.setProgressFunction(new NSProgressFunction<ActiveRegion>() {
|
||||
nanoScheduler.setProgressFunction(new NSProgressFunction<MapData>() {
|
||||
@Override
|
||||
public void progress(ActiveRegion lastActiveRegion) {
|
||||
public void progress(MapData lastActiveRegion) {
|
||||
if ( lastActiveRegion != null )
|
||||
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
|
||||
printProgress(lastActiveRegion.getLocation().getStopLocation());
|
||||
printProgress(lastActiveRegion.activeRegion.getLocation().getStopLocation());
|
||||
}
|
||||
});
|
||||
}
|
||||
|
|
@ -149,13 +161,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
|
||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||
|
||||
if ( walkerHasPresetRegions ) {
|
||||
// we load all of the preset locations into the workQueue
|
||||
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {
|
||||
workQueue.add(new ActiveRegion(loc, null, true, engine.getGenomeLocParser(), getActiveRegionExtension()));
|
||||
}
|
||||
}
|
||||
|
||||
final int maxReadsAcrossSamples = annotation.maxReadsToHoldInMemoryPerSample() * SampleUtils.getSAMFileSamples(engine).size();
|
||||
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, annotation.maxReadsToHoldTotal());
|
||||
myReads = new TAROrderedReadCache(maxReadsToHoldInMemory);
|
||||
|
|
@ -167,6 +172,24 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Load in the preset regions for contig into workQueue
|
||||
*
|
||||
* Should be called before starting to process work on contig
|
||||
*
|
||||
* Can only be called when walkerHasPresetRegions is true or an IllegalStateException will be thrown
|
||||
*
|
||||
* @param contig the contig we are about to process
|
||||
*/
|
||||
protected void loadPresetRegionsForContigToWorkQueue(final String contig) {
|
||||
if ( ! walkerHasPresetRegions ) throw new IllegalStateException("only appropriate to call when walker has preset regions");
|
||||
|
||||
final GenomeLoc contigSpan = engine.getGenomeLocParser().createOverEntireContig(contig);
|
||||
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions().getOverlapping(contigSpan) ) {
|
||||
workQueue.add(new ActiveRegion(loc, null, true, engine.getGenomeLocParser(), getActiveRegionExtension()));
|
||||
}
|
||||
}
|
||||
|
||||
protected int getActiveRegionExtension() {
|
||||
return activeRegionExtension;
|
||||
}
|
||||
|
|
@ -198,16 +221,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc);
|
||||
}
|
||||
|
||||
protected ReferenceOrderedView getReferenceOrderedView(final ActiveRegionWalker<M, T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Actual traverse function
|
||||
|
|
@ -254,7 +267,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
logger.info(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
nanoScheduler.setDebug(false);
|
||||
final Iterator<ActiveRegion> activeRegionIterator = new ActiveRegionIterator(dataProvider);
|
||||
final Iterator<MapData> activeRegionIterator = new ActiveRegionIterator(dataProvider);
|
||||
final TraverseActiveRegionMap myMap = new TraverseActiveRegionMap();
|
||||
final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce();
|
||||
final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce);
|
||||
|
|
@ -262,29 +275,53 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
return result;
|
||||
}
|
||||
|
||||
private class ActiveRegionIterator implements Iterator<ActiveRegion> {
|
||||
private class ActiveRegionIterator implements Iterator<MapData> {
|
||||
private final LocusShardDataProvider dataProvider;
|
||||
private LinkedList<ActiveRegion> readyActiveRegions = new LinkedList<ActiveRegion>();
|
||||
private LinkedList<MapData> readyActiveRegions = new LinkedList<>();
|
||||
private boolean done = false;
|
||||
private final LocusView locusView;
|
||||
private final LocusReferenceView referenceView;
|
||||
private final ReferenceOrderedView referenceOrderedDataView;
|
||||
private final GenomeLoc locOfLastReadAtTraversalStart;
|
||||
private final IntervalReferenceOrderedView referenceOrderedDataView;
|
||||
private final GenomeLoc currentWindow;
|
||||
private final boolean processRemainingActiveRegions;
|
||||
|
||||
public ActiveRegionIterator( final LocusShardDataProvider dataProvider ) {
|
||||
this.dataProvider = dataProvider;
|
||||
locusView = new AllLocusView(dataProvider);
|
||||
referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
||||
|
||||
// The data shard may carry a number of locations to process (due to being indexed together).
|
||||
// This value is just the interval we are processing within the entire provider
|
||||
currentWindow = dataProvider.getLocus();
|
||||
final int currentWindowPos = dataProvider.getShard().getGenomeLocs().indexOf(currentWindow);
|
||||
if ( currentWindowPos == -1 ) throw new IllegalStateException("Data provider " + dataProvider + " didn't have our current window in it " + currentWindow);
|
||||
processRemainingActiveRegions = currentWindowPos == dataProvider.getShard().getGenomeLocs().size() - 1;
|
||||
|
||||
// the rodSpan covers all of the bases in the activity profile, including all of the bases
|
||||
// through the current window interval. This is because we may issue a query to get data for an
|
||||
// active region spanning before the current interval as far back as the start of the current profile,
|
||||
// if we have pending work to do that finalizes in this interval.
|
||||
final GenomeLoc rodSpan = activityProfile.getSpan() == null ? currentWindow : activityProfile.getSpan().endpointSpan(currentWindow);
|
||||
if ( ! dataProvider.getShard().getLocation().containsP(rodSpan) ) throw new IllegalStateException("Rod span " + rodSpan + " isn't contained within the data shard " + dataProvider.getShard().getLocation() + ", meaning we wouldn't get all of the data we need");
|
||||
referenceOrderedDataView = new IntervalReferenceOrderedView( dataProvider, rodSpan );
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
locOfLastReadAtTraversalStart = spanOfLastSeenRead();
|
||||
|
||||
// load in the workQueue the present regions that span the current contig, if it's different from the last one
|
||||
if ( walkerHasPresetRegions && ( lastRegionProcessed == null || ! currentWindow.onSameContig(lastRegionProcessed)) ) {
|
||||
loadPresetRegionsForContigToWorkQueue(currentWindow.getContig());
|
||||
}
|
||||
|
||||
// remember the last region we processed for sanity checking later
|
||||
lastRegionProcessed = currentWindow;
|
||||
}
|
||||
|
||||
@Override public void remove() { throw new UnsupportedOperationException("Cannot remove from ActiveRegionIterator"); }
|
||||
|
||||
@Override
|
||||
public ActiveRegion next() {
|
||||
public MapData next() {
|
||||
return readyActiveRegions.pop();
|
||||
}
|
||||
@Override
|
||||
|
|
@ -326,7 +363,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
final boolean flushProfile = ! activityProfile.isEmpty()
|
||||
&& ( activityProfile.getContigIndex() != location.getContigIndex()
|
||||
|| location.getStart() != activityProfile.getStop() + 1);
|
||||
final List<ActiveRegion> newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false);
|
||||
final List<MapData> newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false, referenceOrderedDataView);
|
||||
|
||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
|
|
@ -335,7 +372,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
addIsActiveResult(walker, tracker, refContext, locus);
|
||||
|
|
@ -346,29 +383,25 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
if ( ! newActiveRegions.isEmpty() ) {
|
||||
readyActiveRegions.addAll(newActiveRegions);
|
||||
if ( DEBUG )
|
||||
for ( final ActiveRegion region : newActiveRegions )
|
||||
logger.info("Adding region to queue for processing " + region);
|
||||
for ( final MapData region : newActiveRegions )
|
||||
logger.info("Adding region to queue for processing " + region.activeRegion);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
if ( processRemainingActiveRegions ) {
|
||||
// we've run out of stuff to process, and since shards now span entire contig boundaries
|
||||
// we should finalized our regions. This allows us to continue to use our referenceOrderedDataView
|
||||
// which would otherwise be shutdown. Only followed when the microschedule says that we're
|
||||
// inside of the last window in the current shard
|
||||
readyActiveRegions.addAll(prepActiveRegionsForProcessing(walker, true, true, referenceOrderedDataView));
|
||||
}
|
||||
|
||||
return ! readyActiveRegions.isEmpty();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal(final Walker<M, T> walker, T sum) {
|
||||
for ( final ActiveRegion region : prepActiveRegionsForProcessing((ActiveRegionWalker<M, T>)walker, true, true) ) {
|
||||
final M x = ((ActiveRegionWalker<M, T>) walker).map(region, null);
|
||||
sum = walker.reduce( x, sum );
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions to manage and interact with the live / dead zone
|
||||
|
|
@ -594,7 +627,10 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*/
|
||||
private List<ActiveRegion> prepActiveRegionsForProcessing(final ActiveRegionWalker<M, T> walker, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) {
|
||||
private List<MapData> prepActiveRegionsForProcessing(final ActiveRegionWalker<M, T> walker,
|
||||
final boolean flushActivityProfile,
|
||||
final boolean forceAllRegionsToBeActive,
|
||||
final IntervalReferenceOrderedView referenceOrderedDataView) {
|
||||
if ( ! walkerHasPresetRegions ) {
|
||||
// We don't have preset regions, so we get our regions from the activity profile
|
||||
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile);
|
||||
|
|
@ -603,13 +639,13 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
}
|
||||
|
||||
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||
final LinkedList<ActiveRegion> readyRegions = new LinkedList<ActiveRegion>();
|
||||
final LinkedList<MapData> readyRegions = new LinkedList<>();
|
||||
while( workQueue.peek() != null ) {
|
||||
final ActiveRegion activeRegion = workQueue.peek();
|
||||
if ( forceAllRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) {
|
||||
writeActivityProfile(activeRegion.getSupportingStates());
|
||||
writeActiveRegion(activeRegion);
|
||||
readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker));
|
||||
readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker, referenceOrderedDataView));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -619,8 +655,10 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
|
||||
}
|
||||
|
||||
private ActiveRegion prepActiveRegionForProcessing(final ActiveRegion activeRegion, final ActiveRegionWalker<M, T> walker) {
|
||||
final List<GATKSAMRecord> stillLive = new LinkedList<GATKSAMRecord>();
|
||||
private MapData prepActiveRegionForProcessing(final ActiveRegion activeRegion,
|
||||
final ActiveRegionWalker<M, T> walker,
|
||||
final IntervalReferenceOrderedView referenceOrderedDataView) {
|
||||
final List<GATKSAMRecord> stillLive = new LinkedList<>();
|
||||
for ( final GATKSAMRecord read : myReads.popCurrentReads() ) {
|
||||
boolean killed = false;
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
|
|
@ -653,14 +691,21 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
|||
logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads. Overall max reads carried is %s",
|
||||
activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive(), activeRegion.size(), maxReadsInMemory));
|
||||
|
||||
return activeRegion;
|
||||
// prepare the RefMetaDataTracker information
|
||||
final GenomeLoc loc = activeRegion.getLocation();
|
||||
// get all of the RODs that cover the active region (without extension)
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataForInterval(loc);
|
||||
// trim away all of the features that occurred before this location, as we will not need them in the future
|
||||
referenceOrderedDataView.trimCurrentFeaturesToLoc(loc);
|
||||
|
||||
return new MapData(activeRegion, tracker);
|
||||
}
|
||||
|
||||
private class TraverseActiveRegionMap implements NSMapFunction<ActiveRegion, M> {
|
||||
private class TraverseActiveRegionMap implements NSMapFunction<MapData, M> {
|
||||
@Override
|
||||
public M apply(final ActiveRegion activeRegion) {
|
||||
if ( DEBUG ) logger.info("Executing walker.map for " + activeRegion + " in thread " + Thread.currentThread().getName());
|
||||
return walker.map(activeRegion, null);
|
||||
public M apply(final MapData mapData) {
|
||||
if ( DEBUG ) logger.info("Executing walker.map for " + mapData.activeRegion + " in thread " + Thread.currentThread().getName());
|
||||
return walker.map(mapData.activeRegion, mapData.tracker);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -179,7 +179,7 @@ public class TraverseLociNano<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,
|
|||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext);
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location);
|
||||
|
||||
numIterations++;
|
||||
return new MapData(locus, refContext, tracker);
|
||||
|
|
|
|||
|
|
@ -204,8 +204,10 @@ public class VariantAnnotatorEngine {
|
|||
return annotateDBs(tracker, annotated);
|
||||
}
|
||||
|
||||
public VariantContext annotateContext(final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap, VariantContext vc) {
|
||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
||||
public VariantContext annotateContextForActiveRegion(final RefMetaDataTracker tracker,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap,
|
||||
final VariantContext vc) {
|
||||
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
||||
|
||||
// go through all the requested info annotationTypes
|
||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||
|
|
@ -219,10 +221,13 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
// generate a new annotated VC
|
||||
VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||
|
||||
// annotate genotypes, creating another new VC in the process
|
||||
return builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make();
|
||||
final VariantContext annotated = builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make();
|
||||
|
||||
// annotate db occurrences
|
||||
return annotateDBs(tracker, annotated);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ public final class VariantOverlapAnnotator {
|
|||
/**
|
||||
* Add overlap attributes to vcToAnnotate against all overlapBindings in tracker
|
||||
*
|
||||
* @see #annotateOverlap(java.util.List, , String, org.broadinstitute.variant.variantcontext.VariantContext)
|
||||
* @see #annotateOverlap(java.util.List, String, org.broadinstitute.variant.variantcontext.VariantContext)
|
||||
* for more information
|
||||
*
|
||||
* @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
|
||||
|
|
@ -130,12 +130,12 @@ public final class VariantOverlapAnnotator {
|
|||
* @param vcToAnnotate a variant context to annotate
|
||||
* @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value
|
||||
*/
|
||||
public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, VariantContext vcToAnnotate) {
|
||||
public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) {
|
||||
if ( overlapBindings.isEmpty() ) return vcToAnnotate;
|
||||
|
||||
VariantContext annotated = vcToAnnotate;
|
||||
final GenomeLoc loc = getLoc(vcToAnnotate);
|
||||
for ( Map.Entry<RodBinding<VariantContext>, String> overlapBinding : overlapBindings.entrySet() ) {
|
||||
for ( final Map.Entry<RodBinding<VariantContext>, String> overlapBinding : overlapBindings.entrySet() ) {
|
||||
annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), vcToAnnotate);
|
||||
}
|
||||
|
||||
|
|
@ -186,7 +186,7 @@ public final class VariantOverlapAnnotator {
|
|||
if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null");
|
||||
if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null");
|
||||
|
||||
for ( VariantContext vcComp : rsIDSourceVCs ) {
|
||||
for ( final VariantContext vcComp : rsIDSourceVCs ) {
|
||||
if ( vcComp.isFiltered() ) continue; // don't process any failed VCs
|
||||
|
||||
if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() )
|
||||
|
|
|
|||
|
|
@ -256,7 +256,6 @@ public class ReadMetricsUnitTest extends BaseTest {
|
|||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
traverseActiveRegions.endTraversal(walker, 0);
|
||||
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
|
||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ import java.util.*;
|
|||
/**
|
||||
* @author depristo
|
||||
*/
|
||||
public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
|
||||
public class IntervalReferenceOrderedViewUnitTest extends BaseTest {
|
||||
private static int startingChr = 1;
|
||||
private static int endingChr = 2;
|
||||
private static int readCount = 100;
|
||||
|
|
@ -285,7 +285,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
|
|||
|
||||
Collections.sort(intervals);
|
||||
final GenomeLoc span = span(intervals);
|
||||
final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators);
|
||||
final IntervalReferenceOrderedView view = new IntervalReferenceOrderedView(genomeLocParser, span, names, iterators);
|
||||
|
||||
if ( testStateless ) {
|
||||
// test each tracker is well formed, as each is created
|
||||
|
|
@ -97,7 +97,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10), null);
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10));
|
||||
Assert.assertEquals(tracker.getValues(Feature.class).size(), 0, "The tracker should not have produced any data");
|
||||
}
|
||||
|
||||
|
|
@ -115,7 +115,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
|
||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20));
|
||||
TableFeature datum = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest"));
|
||||
|
||||
Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect");
|
||||
|
|
@ -141,7 +141,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
|||
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
|
||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
|
||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20));
|
||||
TableFeature datum1 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest1"));
|
||||
|
||||
Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect");
|
||||
|
|
|
|||
|
|
@ -405,8 +405,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, bam))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
|
||||
t.endTraversal(walker, 0);
|
||||
|
||||
return walker.mappedActiveRegions;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Tags;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
||||
|
|
@ -47,6 +48,7 @@ import java.io.FileNotFoundException;
|
|||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import static org.testng.Assert.fail;
|
||||
|
|
@ -146,7 +148,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
|||
fail("Shard == null");
|
||||
}
|
||||
|
||||
ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null,null);
|
||||
ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
|
||||
dataProvider.close();
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue