Implement ActiveRegionTraversal RefMetaDataTracker for map call; HaplotypeCaller now annotates ID from dbSNP
-- Reuse infrastructure for RODs for reads to implement general IntervalReferenceOrderedView so that both TraverseReads and TraverseActiveRegions can use the same underlying infrastructure -- TraverseActiveRegions now provides a meaningful RefMetaDataTracker to ActiveRegionWalker.map -- Cleanup misc. code as it came up -- Resolves GSA-808: Write general utility code to do rsID allele matching, hook up to UG and HC
This commit is contained in:
parent
0d593cff70
commit
1c03ebc82d
|
|
@ -49,6 +49,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
import com.google.java.contract.Ensures;
|
import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
||||||
|
|
@ -146,6 +147,7 @@ public class GenotypingEngine {
|
||||||
final GenomeLoc refLoc,
|
final GenomeLoc refLoc,
|
||||||
final GenomeLoc activeRegionWindow,
|
final GenomeLoc activeRegionWindow,
|
||||||
final GenomeLocParser genomeLocParser,
|
final GenomeLocParser genomeLocParser,
|
||||||
|
final RefMetaDataTracker tracker,
|
||||||
final List<VariantContext> activeAllelesToGenotype ) {
|
final List<VariantContext> activeAllelesToGenotype ) {
|
||||||
// sanity check input arguments
|
// sanity check input arguments
|
||||||
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
if (UG_engine == null) throw new IllegalArgumentException("UG_Engine input can't be null, got "+UG_engine);
|
||||||
|
|
@ -204,7 +206,7 @@ public class GenotypingEngine {
|
||||||
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
|
convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) );
|
||||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call );
|
||||||
|
|
||||||
VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
|
VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call);
|
||||||
|
|
||||||
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
|
||||||
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
annotatedCall = GATKVariantContextUtils.reverseTrimAlleles(annotatedCall);
|
||||||
|
|
|
||||||
|
|
@ -441,7 +441,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
private final static int MIN_READ_LENGTH = 10;
|
private final static int MIN_READ_LENGTH = 10;
|
||||||
|
|
||||||
private List<String> samplesList = new ArrayList<String>();
|
private List<String> samplesList = new ArrayList<String>();
|
||||||
private final List<VariantContext> allelesToGenotype = new ArrayList<VariantContext>();
|
|
||||||
|
|
||||||
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
||||||
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
|
||||||
|
|
@ -596,7 +595,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
final VariantContext vcFromAllelesRod = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), false, logger, UG_engine.getUAC().alleles);
|
final VariantContext vcFromAllelesRod = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), false, logger, UG_engine.getUAC().alleles);
|
||||||
if( vcFromAllelesRod != null ) {
|
if( vcFromAllelesRod != null ) {
|
||||||
allelesToGenotype.add(vcFromAllelesRod); // save for later for processing during the ActiveRegion's map call. Should be folded into a RefMetaDataTracker object
|
|
||||||
return new ActivityProfileState(ref.getLocus(), 1.0);
|
return new ActivityProfileState(ref.getLocus(), 1.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -664,12 +662,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
|
|
||||||
final List<VariantContext> activeAllelesToGenotype = new ArrayList<>();
|
final List<VariantContext> activeAllelesToGenotype = new ArrayList<>();
|
||||||
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
for( final VariantContext vc : allelesToGenotype ) {
|
for ( final VariantContext vc : metaDataTracker.getValues(UG_engine.getUAC().alleles) ) {
|
||||||
if( originalActiveRegion.getLocation().overlapsP( getToolkit().getGenomeLocParser().createGenomeLoc(vc) ) ) {
|
if ( vc.isNotFiltered() ) {
|
||||||
activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode
|
activeAllelesToGenotype.add(vc); // do something with these VCs during GGA mode
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
allelesToGenotype.removeAll( activeAllelesToGenotype );
|
|
||||||
// No alleles found in this region so nothing to do!
|
// No alleles found in this region so nothing to do!
|
||||||
if ( activeAllelesToGenotype.isEmpty() ) { return NO_CALLS; }
|
if ( activeAllelesToGenotype.isEmpty() ) { return NO_CALLS; }
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -704,6 +701,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
assemblyResult.paddedReferenceLoc,
|
assemblyResult.paddedReferenceLoc,
|
||||||
assemblyResult.regionForGenotyping.getLocation(),
|
assemblyResult.regionForGenotyping.getLocation(),
|
||||||
getToolkit().getGenomeLocParser(),
|
getToolkit().getGenomeLocParser(),
|
||||||
|
metaDataTracker,
|
||||||
activeAllelesToGenotype );
|
activeAllelesToGenotype );
|
||||||
|
|
||||||
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
||||||
|
|
@ -890,8 +888,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
@Override
|
@Override
|
||||||
public Integer reduce(List<VariantContext> callsInRegion, Integer numCalledRegions) {
|
public Integer reduce(List<VariantContext> callsInRegion, Integer numCalledRegions) {
|
||||||
for( final VariantContext call : callsInRegion ) {
|
for( final VariantContext call : callsInRegion ) {
|
||||||
// TODO -- uncomment this line once ART-based walkers have a proper RefMetaDataTracker.
|
|
||||||
// annotationEngine.annotateDBs(metaDataTracker, getToolkit().getGenomeLocParser().createGenomeLoc(call), call);
|
|
||||||
vcfWriter.add( call );
|
vcfWriter.add( call );
|
||||||
}
|
}
|
||||||
return (callsInRegion.isEmpty() ? 0 : 1) + numCalledRegions;
|
return (callsInRegion.isEmpty() ? 0 : 1) + numCalledRegions;
|
||||||
|
|
|
||||||
|
|
@ -47,15 +47,12 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
import org.broad.tribble.TribbleIndexedFeatureReader;
|
|
||||||
import org.broadinstitute.sting.WalkerTest;
|
import org.broadinstitute.sting.WalkerTest;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
@ -69,6 +66,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
final static String NA12878_CHR20_BAM = validationDataLocation + "NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||||
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
final static String CEUTRIO_BAM = validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam";
|
||||||
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
final static String NA12878_RECALIBRATED_BAM = privateTestDir + "NA12878.100kb.BQSRv2.example.bam";
|
||||||
|
final static String NA12878_PCRFREE = privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam";
|
||||||
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
|
final static String CEUTRIO_MT_TEST_BAM = privateTestDir + "CEUTrio.HiSeq.b37.MT.1_50.bam";
|
||||||
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals";
|
||||||
|
|
||||||
|
|
@ -199,4 +197,27 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
||||||
Arrays.asList("86bdd07a3ac4f6ce239c30efea8bf5ba"));
|
Arrays.asList("86bdd07a3ac4f6ce239c30efea8bf5ba"));
|
||||||
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// test dbSNP annotation
|
||||||
|
//
|
||||||
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void HCTestDBSNPAnnotationWGS() {
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||||
|
Arrays.asList("7b23a288a31cafca3946f14f2381e7cb"));
|
||||||
|
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void HCTestDBSNPAnnotationWEx() {
|
||||||
|
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||||
|
"-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132
|
||||||
|
+ " -L " + hg19Intervals + " -isr INTERSECTION", 1,
|
||||||
|
Arrays.asList("9587029b702bb59bd4dfec69eac4c210"));
|
||||||
|
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -72,8 +72,6 @@ class IntervalOverlappingRODsFromStream {
|
||||||
/**
|
/**
|
||||||
* Get the list of RODs overlapping loc from this stream of RODs.
|
* Get the list of RODs overlapping loc from this stream of RODs.
|
||||||
*
|
*
|
||||||
* Sequential calls to this function must obey the rule that loc2.getStart >= loc1.getStart
|
|
||||||
*
|
|
||||||
* @param loc the interval to query
|
* @param loc the interval to query
|
||||||
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
|
* @return a non-null RODRecordList containing the overlapping RODs, which may be empty
|
||||||
*/
|
*/
|
||||||
|
|
@ -84,7 +82,6 @@ class IntervalOverlappingRODsFromStream {
|
||||||
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
|
if ( lastQuery != null && loc.getStart() < lastQuery.getStart() )
|
||||||
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
|
throw new IllegalArgumentException(String.format("BUG: query interval (%s) starts before the previous interval %s", loc, lastQuery));
|
||||||
|
|
||||||
trimCurrentFeaturesToLoc(loc);
|
|
||||||
readOverlappingFutureFeatures(loc);
|
readOverlappingFutureFeatures(loc);
|
||||||
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
|
return new RODRecordListImpl(name, subsetToOverlapping(loc, currentFeatures), loc);
|
||||||
}
|
}
|
||||||
|
|
@ -128,11 +125,14 @@ class IntervalOverlappingRODsFromStream {
|
||||||
/**
|
/**
|
||||||
* Update function. Remove all elements of currentFeatures that end before loc
|
* Update function. Remove all elements of currentFeatures that end before loc
|
||||||
*
|
*
|
||||||
|
* Must be called by clients periodically when they know they they will never ask for data before
|
||||||
|
* loc, so that the running cache of RODs doesn't grow out of control.
|
||||||
|
*
|
||||||
* @param loc the location to use
|
* @param loc the location to use
|
||||||
*/
|
*/
|
||||||
@Requires("loc != null")
|
@Requires("loc != null")
|
||||||
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
|
@Ensures("currentFeatures.size() <= old(currentFeatures.size())")
|
||||||
private void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
public void trimCurrentFeaturesToLoc(final GenomeLoc loc) {
|
||||||
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
|
final ListIterator<GATKFeature> it = currentFeatures.listIterator();
|
||||||
while ( it.hasNext() ) {
|
while ( it.hasNext() ) {
|
||||||
final GATKFeature feature = it.next();
|
final GATKFeature feature = it.next();
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,184 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import net.sf.picard.util.PeekableIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a ROD view that allows for requests for RODs that overlap intervals on the genome to produce a RefMetaDataTracker
|
||||||
|
*/
|
||||||
|
public class IntervalReferenceOrderedView implements ReferenceOrderedView {
|
||||||
|
/** a list of the RMDDataState (location->iterators) */
|
||||||
|
private final List<RMDDataState> states = new ArrayList<>(1);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to get genome locs for reads
|
||||||
|
*/
|
||||||
|
protected final GenomeLocParser genomeLocParser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The total extent of all reads in this span. We create iterators from our RODs
|
||||||
|
* from the start of this span, to the end.
|
||||||
|
*/
|
||||||
|
private final GenomeLoc shardSpan;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new IntervalReferenceOrderedView taking data from provider and capable of
|
||||||
|
* servicing ROD overlap requests within the genomic interval span
|
||||||
|
*
|
||||||
|
* @param provider a ShardDataProvider to give us data
|
||||||
|
* @param span a GenomeLoc span, or null indicating take the entire genome
|
||||||
|
*/
|
||||||
|
public IntervalReferenceOrderedView(final ShardDataProvider provider, final GenomeLoc span) {
|
||||||
|
if ( provider == null ) throw new IllegalArgumentException("provider cannot be null");
|
||||||
|
if ( provider.hasReferenceOrderedData() && span == null ) throw new IllegalArgumentException("span cannot be null when provider has reference ordered data");
|
||||||
|
|
||||||
|
this.genomeLocParser = provider.getGenomeLocParser();
|
||||||
|
this.shardSpan = span;
|
||||||
|
provider.register(this);
|
||||||
|
|
||||||
|
// conditional to optimize the case where we don't have any ROD data
|
||||||
|
if ( provider.hasReferenceOrderedData() && ! shardSpan.isUnmapped() ) {
|
||||||
|
for (final ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||||
|
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testing constructor
|
||||||
|
*/
|
||||||
|
protected IntervalReferenceOrderedView(final GenomeLocParser genomeLocParser,
|
||||||
|
final GenomeLoc shardSpan,
|
||||||
|
final List<String> names,
|
||||||
|
final List<PeekableIterator<RODRecordList>> featureSources) {
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
|
this.shardSpan = shardSpan;
|
||||||
|
for ( int i = 0; i < names.size(); i++ )
|
||||||
|
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<Class<? extends View>> getConflictingViews() {
|
||||||
|
List<Class<? extends View>> classes = new ArrayList<>();
|
||||||
|
classes.add(ManagingReferenceOrderedView.class);
|
||||||
|
return classes;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a RefMetaDataTracker containing bindings for all RODs overlapping the start position of loc
|
||||||
|
* @param loc a GenomeLoc of size == 1
|
||||||
|
* @return a non-null RefMetaDataTracker
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public RefMetaDataTracker getReferenceOrderedDataAtLocus(GenomeLoc loc) {
|
||||||
|
if ( loc == null ) throw new IllegalArgumentException("loc cannot be null");
|
||||||
|
if ( loc.size() != 1 ) throw new IllegalArgumentException("GenomeLoc must have size == 1 but got " + loc);
|
||||||
|
return getReferenceOrderedDataForInterval(loc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a RefMetaDataTracker containing bindings for all RODs overlapping interval
|
||||||
|
*
|
||||||
|
* @param interval a non=null interval
|
||||||
|
* @return a non-null RefMetaDataTracker
|
||||||
|
*/
|
||||||
|
public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
|
||||||
|
if ( interval == null ) throw new IllegalArgumentException("Interval cannot be null");
|
||||||
|
|
||||||
|
if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers)
|
||||||
|
return RefMetaDataTracker.EMPTY_TRACKER;
|
||||||
|
else {
|
||||||
|
final List<RODRecordList> bindings = new ArrayList<>(states.size());
|
||||||
|
for ( final RMDDataState state : states )
|
||||||
|
bindings.add(state.stream.getOverlapping(interval));
|
||||||
|
return new RefMetaDataTracker(bindings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Trim down all of the ROD managers so that they only hold ROD bindings wit start >= startOfDataToKeep.getStart()
|
||||||
|
*
|
||||||
|
* @param startOfDataToKeep a non-null genome loc
|
||||||
|
*/
|
||||||
|
public void trimCurrentFeaturesToLoc(final GenomeLoc startOfDataToKeep) {
|
||||||
|
if ( startOfDataToKeep == null ) throw new IllegalArgumentException("startOfDataToKeep cannot be null");
|
||||||
|
|
||||||
|
for ( final RMDDataState state : states )
|
||||||
|
state.stream.trimCurrentFeaturesToLoc(startOfDataToKeep);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Closes the current view.
|
||||||
|
*/
|
||||||
|
public void close() {
|
||||||
|
for (final RMDDataState state : states)
|
||||||
|
state.close();
|
||||||
|
|
||||||
|
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
||||||
|
states.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Models the traversal state of a given ROD lane.
|
||||||
|
*/
|
||||||
|
private static class RMDDataState {
|
||||||
|
public final ReferenceOrderedDataSource dataSource;
|
||||||
|
public final IntervalOverlappingRODsFromStream stream;
|
||||||
|
private final LocationAwareSeekableRODIterator iterator;
|
||||||
|
|
||||||
|
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
this.iterator = iterator;
|
||||||
|
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<>(iterator));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For testing
|
||||||
|
*/
|
||||||
|
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
|
||||||
|
this.dataSource = null;
|
||||||
|
this.iterator = null;
|
||||||
|
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<>(iterator));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
if ( dataSource != null )
|
||||||
|
dataSource.close( iterator );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -76,7 +76,8 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
||||||
* @param loc Locus at which to track.
|
* @param loc Locus at which to track.
|
||||||
* @return A tracker containing information about this locus.
|
* @return A tracker containing information about this locus.
|
||||||
*/
|
*/
|
||||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
@Override
|
||||||
|
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||||
if ( states.isEmpty() )
|
if ( states.isEmpty() )
|
||||||
return RefMetaDataTracker.EMPTY_TRACKER;
|
return RefMetaDataTracker.EMPTY_TRACKER;
|
||||||
else {
|
else {
|
||||||
|
|
|
||||||
|
|
@ -42,52 +42,9 @@ import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/** a ROD view for reads. This provides the Read traversals a way of getting a RefMetaDataTracker */
|
/** a ROD view for reads. This provides the Read traversals a way of getting a RefMetaDataTracker */
|
||||||
public class ReadBasedReferenceOrderedView implements View {
|
public class ReadBasedReferenceOrderedView extends IntervalReferenceOrderedView {
|
||||||
// a list of the RMDDataState (location->iterators)
|
|
||||||
private final List<RMDDataState> states = new ArrayList<RMDDataState>(1);
|
|
||||||
private final static RefMetaDataTracker EMPTY_TRACKER = new RefMetaDataTracker();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Used to get genome locs for reads
|
|
||||||
*/
|
|
||||||
private final GenomeLocParser genomeLocParser;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The total extent of all reads in this span. We create iterators from our RODs
|
|
||||||
* from the start of this span, to the end.
|
|
||||||
*/
|
|
||||||
private final GenomeLoc shardSpan;
|
|
||||||
|
|
||||||
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
|
public ReadBasedReferenceOrderedView(final ShardDataProvider provider) {
|
||||||
this.genomeLocParser = provider.getGenomeLocParser();
|
super(provider, provider.hasReferenceOrderedData() ? ((ReadShard)provider.getShard()).getReadsSpan() : null);
|
||||||
// conditional to optimize the case where we don't have any ROD data
|
|
||||||
this.shardSpan = provider.getReferenceOrderedData() != null ? ((ReadShard)provider.getShard()).getReadsSpan() : null;
|
|
||||||
provider.register(this);
|
|
||||||
|
|
||||||
if ( provider.getReferenceOrderedData() != null && ! shardSpan.isUnmapped() ) {
|
|
||||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
|
||||||
states.add(new RMDDataState(dataSource, dataSource.seek(shardSpan)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Testing constructor
|
|
||||||
*/
|
|
||||||
protected ReadBasedReferenceOrderedView(final GenomeLocParser genomeLocParser,
|
|
||||||
final GenomeLoc shardSpan,
|
|
||||||
final List<String> names,
|
|
||||||
final List<PeekableIterator<RODRecordList>> featureSources) {
|
|
||||||
this.genomeLocParser = genomeLocParser;
|
|
||||||
this.shardSpan = shardSpan;
|
|
||||||
for ( int i = 0; i < names.size(); i++ )
|
|
||||||
states.add(new RMDDataState(names.get(i), featureSources.get(i)));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Collection<Class<? extends View>> getConflictingViews() {
|
|
||||||
List<Class<? extends View>> classes = new ArrayList<Class<? extends View>>();
|
|
||||||
classes.add(ManagingReferenceOrderedView.class);
|
|
||||||
return classes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -101,60 +58,11 @@ public class ReadBasedReferenceOrderedView implements View {
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public RefMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
|
public RefMetaDataTracker getReferenceOrderedDataForRead(final SAMRecord rec) {
|
||||||
if ( rec.getReadUnmappedFlag() )
|
if ( rec.getReadUnmappedFlag() )
|
||||||
// empty RODs for unmapped reads
|
return RefMetaDataTracker.EMPTY_TRACKER;
|
||||||
return new RefMetaDataTracker();
|
|
||||||
else
|
|
||||||
return getReferenceOrderedDataForInterval(genomeLocParser.createGenomeLoc(rec));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Requires({"interval != null", "shardSpan == null || shardSpan.isUnmapped() || shardSpan.containsP(interval)"})
|
|
||||||
@Ensures("result != null")
|
|
||||||
public RefMetaDataTracker getReferenceOrderedDataForInterval(final GenomeLoc interval) {
|
|
||||||
if ( states.isEmpty() || shardSpan.isUnmapped() ) // optimization for no bindings (common for read walkers)
|
|
||||||
return EMPTY_TRACKER;
|
|
||||||
else {
|
else {
|
||||||
final List<RODRecordList> bindings = new ArrayList<RODRecordList>(states.size());
|
final GenomeLoc readSpan = genomeLocParser.createGenomeLoc(rec);
|
||||||
for ( final RMDDataState state : states )
|
trimCurrentFeaturesToLoc(readSpan);
|
||||||
bindings.add(state.stream.getOverlapping(interval));
|
return getReferenceOrderedDataForInterval(readSpan);
|
||||||
return new RefMetaDataTracker(bindings);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Closes the current view.
|
|
||||||
*/
|
|
||||||
public void close() {
|
|
||||||
for (final RMDDataState state : states)
|
|
||||||
state.close();
|
|
||||||
|
|
||||||
// Clear out the existing data so that post-close() accesses to this data will fail-fast.
|
|
||||||
states.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Models the traversal state of a given ROD lane. */
|
|
||||||
private static class RMDDataState {
|
|
||||||
public final ReferenceOrderedDataSource dataSource;
|
|
||||||
public final IntervalOverlappingRODsFromStream stream;
|
|
||||||
private final LocationAwareSeekableRODIterator iterator;
|
|
||||||
|
|
||||||
public RMDDataState(ReferenceOrderedDataSource dataSource, LocationAwareSeekableRODIterator iterator) {
|
|
||||||
this.dataSource = dataSource;
|
|
||||||
this.iterator = iterator;
|
|
||||||
this.stream = new IntervalOverlappingRODsFromStream(dataSource.getName(), new PeekableIterator<RODRecordList>(iterator));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For testing
|
|
||||||
*/
|
|
||||||
public RMDDataState(final String name, final PeekableIterator<RODRecordList> iterator) {
|
|
||||||
this.dataSource = null;
|
|
||||||
this.iterator = null;
|
|
||||||
this.stream = new IntervalOverlappingRODsFromStream(name, new PeekableIterator<RODRecordList>(iterator));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() {
|
|
||||||
if ( dataSource != null )
|
|
||||||
dataSource.close( iterator );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
public interface ReferenceOrderedView extends View {
|
public interface ReferenceOrderedView extends View {
|
||||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
|
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -98,7 +98,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
rodQueue = new RODMergingIterator(iterators);
|
rodQueue = new RODMergingIterator(iterators);
|
||||||
}
|
}
|
||||||
|
|
||||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
@Override
|
||||||
|
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||||
// special case the interval again -- add it into the ROD
|
// special case the interval again -- add it into the ROD
|
||||||
if ( interval != null ) { allTracksHere.add(interval); }
|
if ( interval != null ) { allTracksHere.add(interval); }
|
||||||
return new RefMetaDataTracker(allTracksHere);
|
return new RefMetaDataTracker(allTracksHere);
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,6 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
|
||||||
|
|
@ -114,12 +113,6 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
done = walker.isDone();
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Special function call to empty out the work queue. Ugly for now but will be cleaned up when we eventually push this functionality more into the engine
|
|
||||||
if( traversalEngine instanceof TraverseActiveRegions) {
|
|
||||||
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
|
||||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
|
||||||
}
|
|
||||||
|
|
||||||
Object result = accumulator.finishTraversal();
|
Object result = accumulator.finishTraversal();
|
||||||
|
|
||||||
outputTracker.close();
|
outputTracker.close();
|
||||||
|
|
|
||||||
|
|
@ -29,14 +29,12 @@ import com.google.java.contract.Ensures;
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters;
|
import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
@ -92,12 +90,26 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
|
|
||||||
private TAROrderedReadCache myReads = null;
|
private TAROrderedReadCache myReads = null;
|
||||||
|
|
||||||
|
private GenomeLoc lastRegionProcessed = null;
|
||||||
private GenomeLoc spanOfLastReadSeen = null;
|
private GenomeLoc spanOfLastReadSeen = null;
|
||||||
private ActivityProfile activityProfile = null;
|
private ActivityProfile activityProfile = null;
|
||||||
int maxReadsInMemory = 0;
|
int maxReadsInMemory = 0;
|
||||||
ActiveRegionWalker<M, T> walker;
|
ActiveRegionWalker<M, T> walker;
|
||||||
|
|
||||||
final NanoScheduler<ActiveRegion, M, T> nanoScheduler;
|
final NanoScheduler<MapData, M, T> nanoScheduler;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data to use in the ActiveRegionWalker.map function produced by the NanoScheduler input iterator
|
||||||
|
*/
|
||||||
|
private static class MapData {
|
||||||
|
public ActiveRegion activeRegion;
|
||||||
|
public RefMetaDataTracker tracker;
|
||||||
|
|
||||||
|
private MapData(ActiveRegion activeRegion, RefMetaDataTracker tracker) {
|
||||||
|
this.activeRegion = activeRegion;
|
||||||
|
this.tracker = tracker;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a single threaded active region traverser
|
* Create a single threaded active region traverser
|
||||||
|
|
@ -112,12 +124,12 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
*/
|
*/
|
||||||
public TraverseActiveRegions(final int nThreads) {
|
public TraverseActiveRegions(final int nThreads) {
|
||||||
nanoScheduler = new NanoScheduler<>(nThreads);
|
nanoScheduler = new NanoScheduler<>(nThreads);
|
||||||
nanoScheduler.setProgressFunction(new NSProgressFunction<ActiveRegion>() {
|
nanoScheduler.setProgressFunction(new NSProgressFunction<MapData>() {
|
||||||
@Override
|
@Override
|
||||||
public void progress(ActiveRegion lastActiveRegion) {
|
public void progress(MapData lastActiveRegion) {
|
||||||
if ( lastActiveRegion != null )
|
if ( lastActiveRegion != null )
|
||||||
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
|
// note, need to use getStopLocation so we don't give an interval to ProgressMeterDaemon
|
||||||
printProgress(lastActiveRegion.getLocation().getStopLocation());
|
printProgress(lastActiveRegion.activeRegion.getLocation().getStopLocation());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
@ -149,13 +161,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
|
|
||||||
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
activityProfile = new BandPassActivityProfile(engine.getGenomeLocParser(), engine.getIntervals(), BandPassActivityProfile.MAX_FILTER_SIZE, bandPassSigma);
|
||||||
|
|
||||||
if ( walkerHasPresetRegions ) {
|
|
||||||
// we load all of the preset locations into the workQueue
|
|
||||||
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions()) {
|
|
||||||
workQueue.add(new ActiveRegion(loc, null, true, engine.getGenomeLocParser(), getActiveRegionExtension()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final int maxReadsAcrossSamples = annotation.maxReadsToHoldInMemoryPerSample() * SampleUtils.getSAMFileSamples(engine).size();
|
final int maxReadsAcrossSamples = annotation.maxReadsToHoldInMemoryPerSample() * SampleUtils.getSAMFileSamples(engine).size();
|
||||||
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, annotation.maxReadsToHoldTotal());
|
final int maxReadsToHoldInMemory = Math.min(maxReadsAcrossSamples, annotation.maxReadsToHoldTotal());
|
||||||
myReads = new TAROrderedReadCache(maxReadsToHoldInMemory);
|
myReads = new TAROrderedReadCache(maxReadsToHoldInMemory);
|
||||||
|
|
@ -167,6 +172,24 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
//
|
//
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load in the preset regions for contig into workQueue
|
||||||
|
*
|
||||||
|
* Should be called before starting to process work on contig
|
||||||
|
*
|
||||||
|
* Can only be called when walkerHasPresetRegions is true or an IllegalStateException will be thrown
|
||||||
|
*
|
||||||
|
* @param contig the contig we are about to process
|
||||||
|
*/
|
||||||
|
protected void loadPresetRegionsForContigToWorkQueue(final String contig) {
|
||||||
|
if ( ! walkerHasPresetRegions ) throw new IllegalStateException("only appropriate to call when walker has preset regions");
|
||||||
|
|
||||||
|
final GenomeLoc contigSpan = engine.getGenomeLocParser().createOverEntireContig(contig);
|
||||||
|
for ( final GenomeLoc loc : this.walker.getPresetActiveRegions().getOverlapping(contigSpan) ) {
|
||||||
|
workQueue.add(new ActiveRegion(loc, null, true, engine.getGenomeLocParser(), getActiveRegionExtension()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected int getActiveRegionExtension() {
|
protected int getActiveRegionExtension() {
|
||||||
return activeRegionExtension;
|
return activeRegionExtension;
|
||||||
}
|
}
|
||||||
|
|
@ -198,16 +221,6 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc);
|
return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected ReferenceOrderedView getReferenceOrderedView(final ActiveRegionWalker<M, T> walker,
|
|
||||||
final LocusShardDataProvider dataProvider,
|
|
||||||
final LocusView locusView) {
|
|
||||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
|
||||||
return new ManagingReferenceOrderedView( dataProvider );
|
|
||||||
else
|
|
||||||
return (RodLocusView)locusView;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Actual traverse function
|
// Actual traverse function
|
||||||
|
|
@ -254,7 +267,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
logger.info(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
logger.info(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||||
|
|
||||||
nanoScheduler.setDebug(false);
|
nanoScheduler.setDebug(false);
|
||||||
final Iterator<ActiveRegion> activeRegionIterator = new ActiveRegionIterator(dataProvider);
|
final Iterator<MapData> activeRegionIterator = new ActiveRegionIterator(dataProvider);
|
||||||
final TraverseActiveRegionMap myMap = new TraverseActiveRegionMap();
|
final TraverseActiveRegionMap myMap = new TraverseActiveRegionMap();
|
||||||
final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce();
|
final TraverseActiveRegionReduce myReduce = new TraverseActiveRegionReduce();
|
||||||
final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce);
|
final T result = nanoScheduler.execute(activeRegionIterator, myMap, sum, myReduce);
|
||||||
|
|
@ -262,29 +275,53 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class ActiveRegionIterator implements Iterator<ActiveRegion> {
|
private class ActiveRegionIterator implements Iterator<MapData> {
|
||||||
private final LocusShardDataProvider dataProvider;
|
private final LocusShardDataProvider dataProvider;
|
||||||
private LinkedList<ActiveRegion> readyActiveRegions = new LinkedList<ActiveRegion>();
|
private LinkedList<MapData> readyActiveRegions = new LinkedList<>();
|
||||||
private boolean done = false;
|
private boolean done = false;
|
||||||
private final LocusView locusView;
|
private final LocusView locusView;
|
||||||
private final LocusReferenceView referenceView;
|
private final LocusReferenceView referenceView;
|
||||||
private final ReferenceOrderedView referenceOrderedDataView;
|
|
||||||
private final GenomeLoc locOfLastReadAtTraversalStart;
|
private final GenomeLoc locOfLastReadAtTraversalStart;
|
||||||
|
private final IntervalReferenceOrderedView referenceOrderedDataView;
|
||||||
|
private final GenomeLoc currentWindow;
|
||||||
|
private final boolean processRemainingActiveRegions;
|
||||||
|
|
||||||
public ActiveRegionIterator( final LocusShardDataProvider dataProvider ) {
|
public ActiveRegionIterator( final LocusShardDataProvider dataProvider ) {
|
||||||
this.dataProvider = dataProvider;
|
this.dataProvider = dataProvider;
|
||||||
locusView = new AllLocusView(dataProvider);
|
locusView = new AllLocusView(dataProvider);
|
||||||
referenceView = new LocusReferenceView( walker, dataProvider );
|
referenceView = new LocusReferenceView( walker, dataProvider );
|
||||||
referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
|
|
||||||
|
// The data shard may carry a number of locations to process (due to being indexed together).
|
||||||
|
// This value is just the interval we are processing within the entire provider
|
||||||
|
currentWindow = dataProvider.getLocus();
|
||||||
|
final int currentWindowPos = dataProvider.getShard().getGenomeLocs().indexOf(currentWindow);
|
||||||
|
if ( currentWindowPos == -1 ) throw new IllegalStateException("Data provider " + dataProvider + " didn't have our current window in it " + currentWindow);
|
||||||
|
processRemainingActiveRegions = currentWindowPos == dataProvider.getShard().getGenomeLocs().size() - 1;
|
||||||
|
|
||||||
|
// the rodSpan covers all of the bases in the activity profile, including all of the bases
|
||||||
|
// through the current window interval. This is because we may issue a query to get data for an
|
||||||
|
// active region spanning before the current interval as far back as the start of the current profile,
|
||||||
|
// if we have pending work to do that finalizes in this interval.
|
||||||
|
final GenomeLoc rodSpan = activityProfile.getSpan() == null ? currentWindow : activityProfile.getSpan().endpointSpan(currentWindow);
|
||||||
|
if ( ! dataProvider.getShard().getLocation().containsP(rodSpan) ) throw new IllegalStateException("Rod span " + rodSpan + " isn't contained within the data shard " + dataProvider.getShard().getLocation() + ", meaning we wouldn't get all of the data we need");
|
||||||
|
referenceOrderedDataView = new IntervalReferenceOrderedView( dataProvider, rodSpan );
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
// We keep processing while the next reference location is within the interval
|
||||||
locOfLastReadAtTraversalStart = spanOfLastSeenRead();
|
locOfLastReadAtTraversalStart = spanOfLastSeenRead();
|
||||||
|
|
||||||
|
// load in the workQueue the present regions that span the current contig, if it's different from the last one
|
||||||
|
if ( walkerHasPresetRegions && ( lastRegionProcessed == null || ! currentWindow.onSameContig(lastRegionProcessed)) ) {
|
||||||
|
loadPresetRegionsForContigToWorkQueue(currentWindow.getContig());
|
||||||
|
}
|
||||||
|
|
||||||
|
// remember the last region we processed for sanity checking later
|
||||||
|
lastRegionProcessed = currentWindow;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void remove() { throw new UnsupportedOperationException("Cannot remove from ActiveRegionIterator"); }
|
@Override public void remove() { throw new UnsupportedOperationException("Cannot remove from ActiveRegionIterator"); }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public ActiveRegion next() {
|
public MapData next() {
|
||||||
return readyActiveRegions.pop();
|
return readyActiveRegions.pop();
|
||||||
}
|
}
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -326,7 +363,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
final boolean flushProfile = ! activityProfile.isEmpty()
|
final boolean flushProfile = ! activityProfile.isEmpty()
|
||||||
&& ( activityProfile.getContigIndex() != location.getContigIndex()
|
&& ( activityProfile.getContigIndex() != location.getContigIndex()
|
||||||
|| location.getStart() != activityProfile.getStop() + 1);
|
|| location.getStart() != activityProfile.getStop() + 1);
|
||||||
final List<ActiveRegion> newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false);
|
final List<MapData> newActiveRegions = prepActiveRegionsForProcessing(walker, flushProfile, false, referenceOrderedDataView);
|
||||||
|
|
||||||
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
dataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||||
|
|
||||||
|
|
@ -335,7 +372,7 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this location
|
// Iterate forward to get all reference ordered data covering this location
|
||||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||||
|
|
||||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||||
addIsActiveResult(walker, tracker, refContext, locus);
|
addIsActiveResult(walker, tracker, refContext, locus);
|
||||||
|
|
@ -346,27 +383,23 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
if ( ! newActiveRegions.isEmpty() ) {
|
if ( ! newActiveRegions.isEmpty() ) {
|
||||||
readyActiveRegions.addAll(newActiveRegions);
|
readyActiveRegions.addAll(newActiveRegions);
|
||||||
if ( DEBUG )
|
if ( DEBUG )
|
||||||
for ( final ActiveRegion region : newActiveRegions )
|
for ( final MapData region : newActiveRegions )
|
||||||
logger.info("Adding region to queue for processing " + region);
|
logger.info("Adding region to queue for processing " + region.activeRegion);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
if ( processRemainingActiveRegions ) {
|
||||||
}
|
// we've run out of stuff to process, and since shards now span entire contig boundaries
|
||||||
}
|
// we should finalized our regions. This allows us to continue to use our referenceOrderedDataView
|
||||||
|
// which would otherwise be shutdown. Only followed when the microschedule says that we're
|
||||||
|
// inside of the last window in the current shard
|
||||||
|
readyActiveRegions.addAll(prepActiveRegionsForProcessing(walker, true, true, referenceOrderedDataView));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
return ! readyActiveRegions.isEmpty();
|
||||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
}
|
||||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
|
||||||
*/
|
|
||||||
public T endTraversal(final Walker<M, T> walker, T sum) {
|
|
||||||
for ( final ActiveRegion region : prepActiveRegionsForProcessing((ActiveRegionWalker<M, T>)walker, true, true) ) {
|
|
||||||
final M x = ((ActiveRegionWalker<M, T>) walker).map(region, null);
|
|
||||||
sum = walker.reduce( x, sum );
|
|
||||||
}
|
}
|
||||||
return sum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
|
|
@ -594,7 +627,10 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
* add these blocks of work to the work queue
|
* add these blocks of work to the work queue
|
||||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||||
*/
|
*/
|
||||||
private List<ActiveRegion> prepActiveRegionsForProcessing(final ActiveRegionWalker<M, T> walker, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) {
|
private List<MapData> prepActiveRegionsForProcessing(final ActiveRegionWalker<M, T> walker,
|
||||||
|
final boolean flushActivityProfile,
|
||||||
|
final boolean forceAllRegionsToBeActive,
|
||||||
|
final IntervalReferenceOrderedView referenceOrderedDataView) {
|
||||||
if ( ! walkerHasPresetRegions ) {
|
if ( ! walkerHasPresetRegions ) {
|
||||||
// We don't have preset regions, so we get our regions from the activity profile
|
// We don't have preset regions, so we get our regions from the activity profile
|
||||||
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile);
|
final Collection<ActiveRegion> activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile);
|
||||||
|
|
@ -603,13 +639,13 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
}
|
}
|
||||||
|
|
||||||
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
// Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them
|
||||||
final LinkedList<ActiveRegion> readyRegions = new LinkedList<ActiveRegion>();
|
final LinkedList<MapData> readyRegions = new LinkedList<>();
|
||||||
while( workQueue.peek() != null ) {
|
while( workQueue.peek() != null ) {
|
||||||
final ActiveRegion activeRegion = workQueue.peek();
|
final ActiveRegion activeRegion = workQueue.peek();
|
||||||
if ( forceAllRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) {
|
if ( forceAllRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) {
|
||||||
writeActivityProfile(activeRegion.getSupportingStates());
|
writeActivityProfile(activeRegion.getSupportingStates());
|
||||||
writeActiveRegion(activeRegion);
|
writeActiveRegion(activeRegion);
|
||||||
readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker));
|
readyRegions.add(prepActiveRegionForProcessing(workQueue.remove(), walker, referenceOrderedDataView));
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -619,8 +655,10 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private ActiveRegion prepActiveRegionForProcessing(final ActiveRegion activeRegion, final ActiveRegionWalker<M, T> walker) {
|
private MapData prepActiveRegionForProcessing(final ActiveRegion activeRegion,
|
||||||
final List<GATKSAMRecord> stillLive = new LinkedList<GATKSAMRecord>();
|
final ActiveRegionWalker<M, T> walker,
|
||||||
|
final IntervalReferenceOrderedView referenceOrderedDataView) {
|
||||||
|
final List<GATKSAMRecord> stillLive = new LinkedList<>();
|
||||||
for ( final GATKSAMRecord read : myReads.popCurrentReads() ) {
|
for ( final GATKSAMRecord read : myReads.popCurrentReads() ) {
|
||||||
boolean killed = false;
|
boolean killed = false;
|
||||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||||
|
|
@ -653,14 +691,21 @@ public final class TraverseActiveRegions<M, T> extends TraversalEngine<M,T,Activ
|
||||||
logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads. Overall max reads carried is %s",
|
logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads. Overall max reads carried is %s",
|
||||||
activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive(), activeRegion.size(), maxReadsInMemory));
|
activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive(), activeRegion.size(), maxReadsInMemory));
|
||||||
|
|
||||||
return activeRegion;
|
// prepare the RefMetaDataTracker information
|
||||||
|
final GenomeLoc loc = activeRegion.getLocation();
|
||||||
|
// get all of the RODs that cover the active region (without extension)
|
||||||
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataForInterval(loc);
|
||||||
|
// trim away all of the features that occurred before this location, as we will not need them in the future
|
||||||
|
referenceOrderedDataView.trimCurrentFeaturesToLoc(loc);
|
||||||
|
|
||||||
|
return new MapData(activeRegion, tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TraverseActiveRegionMap implements NSMapFunction<ActiveRegion, M> {
|
private class TraverseActiveRegionMap implements NSMapFunction<MapData, M> {
|
||||||
@Override
|
@Override
|
||||||
public M apply(final ActiveRegion activeRegion) {
|
public M apply(final MapData mapData) {
|
||||||
if ( DEBUG ) logger.info("Executing walker.map for " + activeRegion + " in thread " + Thread.currentThread().getName());
|
if ( DEBUG ) logger.info("Executing walker.map for " + mapData.activeRegion + " in thread " + Thread.currentThread().getName());
|
||||||
return walker.map(activeRegion, null);
|
return walker.map(mapData.activeRegion, mapData.tracker);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -179,7 +179,7 @@ public class TraverseLociNano<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,
|
||||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this location
|
// Iterate forward to get all reference ordered data covering this location
|
||||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext);
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location);
|
||||||
|
|
||||||
numIterations++;
|
numIterations++;
|
||||||
return new MapData(locus, refContext, tracker);
|
return new MapData(locus, refContext, tracker);
|
||||||
|
|
|
||||||
|
|
@ -204,8 +204,10 @@ public class VariantAnnotatorEngine {
|
||||||
return annotateDBs(tracker, annotated);
|
return annotateDBs(tracker, annotated);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VariantContext annotateContext(final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap, VariantContext vc) {
|
public VariantContext annotateContextForActiveRegion(final RefMetaDataTracker tracker,
|
||||||
Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
|
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap,
|
||||||
|
final VariantContext vc) {
|
||||||
|
final Map<String, Object> infoAnnotations = new LinkedHashMap<>(vc.getAttributes());
|
||||||
|
|
||||||
// go through all the requested info annotationTypes
|
// go through all the requested info annotationTypes
|
||||||
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations ) {
|
||||||
|
|
@ -219,10 +221,13 @@ public class VariantAnnotatorEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// generate a new annotated VC
|
// generate a new annotated VC
|
||||||
VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
final VariantContextBuilder builder = new VariantContextBuilder(vc).attributes(infoAnnotations);
|
||||||
|
|
||||||
// annotate genotypes, creating another new VC in the process
|
// annotate genotypes, creating another new VC in the process
|
||||||
return builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make();
|
final VariantContext annotated = builder.genotypes(annotateGenotypes(null, null, null, vc, perReadAlleleLikelihoodMap)).make();
|
||||||
|
|
||||||
|
// annotate db occurrences
|
||||||
|
return annotateDBs(tracker, annotated);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -122,7 +122,7 @@ public final class VariantOverlapAnnotator {
|
||||||
/**
|
/**
|
||||||
* Add overlap attributes to vcToAnnotate against all overlapBindings in tracker
|
* Add overlap attributes to vcToAnnotate against all overlapBindings in tracker
|
||||||
*
|
*
|
||||||
* @see #annotateOverlap(java.util.List, , String, org.broadinstitute.variant.variantcontext.VariantContext)
|
* @see #annotateOverlap(java.util.List, String, org.broadinstitute.variant.variantcontext.VariantContext)
|
||||||
* for more information
|
* for more information
|
||||||
*
|
*
|
||||||
* @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
|
* @param tracker non-null tracker, which we will use to update the rsID of vcToAnnotate
|
||||||
|
|
@ -130,12 +130,12 @@ public final class VariantOverlapAnnotator {
|
||||||
* @param vcToAnnotate a variant context to annotate
|
* @param vcToAnnotate a variant context to annotate
|
||||||
* @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value
|
* @return a VariantContext (may be == to vcToAnnotate) with updated overlaps update fields value
|
||||||
*/
|
*/
|
||||||
public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, VariantContext vcToAnnotate) {
|
public VariantContext annotateOverlaps(final RefMetaDataTracker tracker, final VariantContext vcToAnnotate) {
|
||||||
if ( overlapBindings.isEmpty() ) return vcToAnnotate;
|
if ( overlapBindings.isEmpty() ) return vcToAnnotate;
|
||||||
|
|
||||||
VariantContext annotated = vcToAnnotate;
|
VariantContext annotated = vcToAnnotate;
|
||||||
final GenomeLoc loc = getLoc(vcToAnnotate);
|
final GenomeLoc loc = getLoc(vcToAnnotate);
|
||||||
for ( Map.Entry<RodBinding<VariantContext>, String> overlapBinding : overlapBindings.entrySet() ) {
|
for ( final Map.Entry<RodBinding<VariantContext>, String> overlapBinding : overlapBindings.entrySet() ) {
|
||||||
annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), vcToAnnotate);
|
annotated = annotateOverlap(tracker.getValues(overlapBinding.getKey(), loc), overlapBinding.getValue(), vcToAnnotate);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -186,7 +186,7 @@ public final class VariantOverlapAnnotator {
|
||||||
if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null");
|
if ( rsIDSourceVCs == null ) throw new IllegalArgumentException("rsIDSourceVCs cannot be null");
|
||||||
if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null");
|
if ( vcToAnnotate == null ) throw new IllegalArgumentException("vcToAnnotate cannot be null");
|
||||||
|
|
||||||
for ( VariantContext vcComp : rsIDSourceVCs ) {
|
for ( final VariantContext vcComp : rsIDSourceVCs ) {
|
||||||
if ( vcComp.isFiltered() ) continue; // don't process any failed VCs
|
if ( vcComp.isFiltered() ) continue; // don't process any failed VCs
|
||||||
|
|
||||||
if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() )
|
if ( ! vcComp.getChr().equals(vcToAnnotate.getChr()) || vcComp.getStart() != vcToAnnotate.getStart() )
|
||||||
|
|
|
||||||
|
|
@ -256,7 +256,6 @@ public class ReadMetricsUnitTest extends BaseTest {
|
||||||
}
|
}
|
||||||
windowMaker.close();
|
windowMaker.close();
|
||||||
}
|
}
|
||||||
traverseActiveRegions.endTraversal(walker, 0);
|
|
||||||
|
|
||||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
|
Assert.assertEquals(engine.getCumulativeMetrics().getNumReadsSeen(), contigs.size() * numReadsPerContig);
|
||||||
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
|
Assert.assertEquals(engine.getCumulativeMetrics().getNumIterations(), contigs.size() * numReadsPerContig);
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,7 @@ import java.util.*;
|
||||||
/**
|
/**
|
||||||
* @author depristo
|
* @author depristo
|
||||||
*/
|
*/
|
||||||
public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
|
public class IntervalReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
private static int startingChr = 1;
|
private static int startingChr = 1;
|
||||||
private static int endingChr = 2;
|
private static int endingChr = 2;
|
||||||
private static int readCount = 100;
|
private static int readCount = 100;
|
||||||
|
|
@ -285,7 +285,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
|
|
||||||
Collections.sort(intervals);
|
Collections.sort(intervals);
|
||||||
final GenomeLoc span = span(intervals);
|
final GenomeLoc span = span(intervals);
|
||||||
final ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(genomeLocParser, span, names, iterators);
|
final IntervalReferenceOrderedView view = new IntervalReferenceOrderedView(genomeLocParser, span, names, iterators);
|
||||||
|
|
||||||
if ( testStateless ) {
|
if ( testStateless ) {
|
||||||
// test each tracker is well formed, as each is created
|
// test each tracker is well formed, as each is created
|
||||||
|
|
@ -97,7 +97,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
|
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||||
|
|
||||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10), null);
|
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",10));
|
||||||
Assert.assertEquals(tracker.getValues(Feature.class).size(), 0, "The tracker should not have produced any data");
|
Assert.assertEquals(tracker.getValues(Feature.class).size(), 0, "The tracker should not have produced any data");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -115,7 +115,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
|
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Collections.singletonList(dataSource));
|
||||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||||
|
|
||||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
|
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20));
|
||||||
TableFeature datum = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest"));
|
TableFeature datum = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest"));
|
||||||
|
|
||||||
Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect");
|
Assert.assertEquals(datum.get("COL1"),"C","datum parameter for COL1 is incorrect");
|
||||||
|
|
@ -141,7 +141,7 @@ public class ReferenceOrderedViewUnitTest extends BaseTest {
|
||||||
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
|
LocusShardDataProvider provider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, seq, Arrays.asList(dataSource1,dataSource2));
|
||||||
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
ReferenceOrderedView view = new ManagingReferenceOrderedView( provider );
|
||||||
|
|
||||||
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20), null);
|
RefMetaDataTracker tracker = view.getReferenceOrderedDataAtLocus(genomeLocParser.createGenomeLoc("chrM",20));
|
||||||
TableFeature datum1 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest1"));
|
TableFeature datum1 = tracker.getFirstValue(new RodBinding<TableFeature>(TableFeature.class, "tableTest1"));
|
||||||
|
|
||||||
Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect");
|
Assert.assertEquals(datum1.get("COL1"),"C","datum1 parameter for COL1 is incorrect");
|
||||||
|
|
|
||||||
|
|
@ -405,8 +405,6 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
||||||
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, bam))
|
for (LocusShardDataProvider dataProvider : createDataProviders(t, walker, intervals, bam))
|
||||||
t.traverse(walker, dataProvider, 0);
|
t.traverse(walker, dataProvider, 0);
|
||||||
|
|
||||||
t.endTraversal(walker, 0);
|
|
||||||
|
|
||||||
return walker.mappedActiveRegions;
|
return walker.mappedActiveRegions;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Tags;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||||
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
||||||
|
|
@ -47,6 +48,7 @@ import java.io.FileNotFoundException;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static org.testng.Assert.fail;
|
import static org.testng.Assert.fail;
|
||||||
|
|
@ -146,7 +148,7 @@ public class TraverseReadsUnitTest extends BaseTest {
|
||||||
fail("Shard == null");
|
fail("Shard == null");
|
||||||
}
|
}
|
||||||
|
|
||||||
ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null,null);
|
ReadShardDataProvider dataProvider = new ReadShardDataProvider(shard,genomeLocParser,dataSource.seek(shard),null, Collections.<ReferenceOrderedDataSource>emptyList());
|
||||||
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
|
accumulator = traversalEngine.traverse(countReadWalker, dataProvider, accumulator);
|
||||||
dataProvider.close();
|
dataProvider.close();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue