Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
27722a15e5
|
|
@ -570,11 +570,32 @@ public class GenomeAnalysisEngine {
|
|||
else if(walker instanceof ActiveRegionWalker) {
|
||||
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
|
||||
}
|
||||
|
||||
switch(argCollection.activeRegionShardType) {
|
||||
case LOCUSSHARD:
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
|
||||
case READSHARD:
|
||||
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
|
||||
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
|
||||
new LegacyReadShardBalancer() :
|
||||
new ReadShardBalancer();
|
||||
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer);
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer);
|
||||
case ACTIVEREGIONSHARD:
|
||||
if(intervals == null)
|
||||
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer());
|
||||
else
|
||||
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer());
|
||||
default:
|
||||
throw new UserException.CommandLineException("Invalid active region shard type.");
|
||||
}
|
||||
}
|
||||
else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) {
|
||||
// Apply special validation to read pair walkers.
|
||||
if(walker instanceof ReadPairWalker) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
|
|||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
|
|
@ -448,5 +449,14 @@ public class GATKArgumentCollection {
|
|||
@Hidden
|
||||
public boolean generateShadowBCF = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Experimental Active Region Traversal modes
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false)
|
||||
public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,58 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShardDataProvider extends ShardDataProvider {
|
||||
final private ReadShardDataProvider readProvider;
|
||||
final private LocusShardDataProvider locusProvider;
|
||||
|
||||
public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard, genomeLocParser, reference, rods); // TODO: necessary?
|
||||
readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods);
|
||||
locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods);
|
||||
}
|
||||
|
||||
public ReadShardDataProvider getReadShardDataProvider() {
|
||||
return readProvider;
|
||||
}
|
||||
|
||||
public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) {
|
||||
return locusProvider;
|
||||
}
|
||||
}
|
||||
|
|
@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
*/
|
||||
private final ReadProperties sourceInfo;
|
||||
|
||||
/**
|
||||
* The parser, used to create and build new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
|
||||
*/
|
||||
|
|
@ -45,7 +40,22 @@ public class LocusShardDataProvider extends ShardDataProvider {
|
|||
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
|
||||
super(shard,genomeLocParser,reference,rods);
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a data provider based on an input provider
|
||||
* Used only by ExperimentalReadShardTraverseActiveRegions
|
||||
* @param dataProvider
|
||||
* @param sourceInfo
|
||||
* @param genomeLocParser
|
||||
* @param locus
|
||||
* @param locusIterator
|
||||
*/
|
||||
public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) {
|
||||
super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData());
|
||||
this.sourceInfo = sourceInfo;
|
||||
this.locus = locus;
|
||||
this.locusIterator = locusIterator;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
import net.sf.samtools.SAMFileSpan;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShard extends ReadShard {
|
||||
public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
|
||||
super(parser, readsDataSource, fileSpans, loci, isUnmapped);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||
|
||||
/**
|
||||
* @author Joel Thibault
|
||||
*/
|
||||
public class ActiveRegionShardBalancer extends ReadShardBalancer {
|
||||
// TODO ?
|
||||
}
|
||||
|
|
@ -40,7 +40,9 @@ import java.util.Map;
|
|||
*/
|
||||
public abstract class Shard implements HasGenomeLocation {
|
||||
public enum ShardType {
|
||||
READ, LOCUS
|
||||
READ,
|
||||
LOCUS,
|
||||
ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions
|
||||
}
|
||||
|
||||
protected final GenomeLocParser parser; // incredibly annoying!
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.executive;
|
|||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
|
|
@ -11,6 +12,8 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|||
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
|
||||
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
|
|
@ -78,6 +81,18 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) {
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
if ( walker.isDone() ) break;
|
||||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
else {
|
||||
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
|
|
@ -93,6 +108,14 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) {
|
||||
final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) {
|
||||
final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
|
||||
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
|
||||
}
|
||||
|
||||
Object result = accumulator.finishTraversal();
|
||||
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.traversals.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.AutoFormattingTime;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
|
||||
|
|
@ -245,7 +246,12 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
} else if (walker instanceof ReadPairWalker) {
|
||||
return new TraverseReadPairs();
|
||||
} else if (walker instanceof ActiveRegionWalker) {
|
||||
return new TraverseActiveRegions();
|
||||
switch (engine.getArguments().activeRegionShardType) {
|
||||
case LOCUSSHARD: return new TraverseActiveRegions();
|
||||
case READSHARD: return new ExperimentalReadShardTraverseActiveRegions();
|
||||
case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions();
|
||||
default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker.");
|
||||
}
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -117,7 +117,7 @@ public class GATKReport {
|
|||
* @param numColumns the number of columns in this table
|
||||
*/
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
|
||||
addTable(tableName, tableDescription, numColumns, false, false);
|
||||
addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -126,11 +126,10 @@ public class GATKReport {
|
|||
* @param tableName the name of the table
|
||||
* @param tableDescription the description of the table
|
||||
* @param numColumns the number of columns in this table
|
||||
* @param sortByRowID whether to sort the rows by the row ID
|
||||
* @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
|
||||
* @param sortingWay way to sort table
|
||||
*/
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
|
||||
public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) {
|
||||
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
|
||||
tables.put(tableName, table);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -46,8 +46,7 @@ public class GATKReportTable {
|
|||
private final String tableName;
|
||||
private final String tableDescription;
|
||||
|
||||
private final boolean sortByRowID;
|
||||
private final boolean sortByAllColumns;
|
||||
private final TableSortingWay sortingWay;
|
||||
|
||||
private List<Object[]> underlyingData;
|
||||
private final List<GATKReportColumn> columnInfo;
|
||||
|
|
@ -73,6 +72,12 @@ public class GATKReportTable {
|
|||
public int index() { return index; }
|
||||
}
|
||||
|
||||
public enum TableSortingWay {
|
||||
SORT_BY_ROW,
|
||||
SORT_BY_COLUMN,
|
||||
DO_NOT_SORT
|
||||
}
|
||||
|
||||
protected enum TableNameHeaderFields {
|
||||
NAME(2),
|
||||
DESCRIPTION(3);
|
||||
|
|
@ -107,10 +112,7 @@ public class GATKReportTable {
|
|||
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
|
||||
|
||||
// when reading from a file, we do not re-sort the rows
|
||||
sortByRowID = false;
|
||||
|
||||
// when reading from a file, we do not re-sort the rows
|
||||
sortByAllColumns = false;
|
||||
sortingWay = TableSortingWay.DO_NOT_SORT;
|
||||
|
||||
// initialize the data
|
||||
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
|
||||
|
|
@ -181,7 +183,7 @@ public class GATKReportTable {
|
|||
* @param numColumns the number of columns in this table
|
||||
*/
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) {
|
||||
this(tableName, tableDescription, numColumns, true, false);
|
||||
this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -190,10 +192,9 @@ public class GATKReportTable {
|
|||
* @param tableName the name of the table
|
||||
* @param tableDescription the description of the table
|
||||
* @param numColumns the number of columns in this table
|
||||
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
|
||||
* @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added)
|
||||
* @param sortingWay in what way to sort rows (instead of the order in which they were added)
|
||||
*/
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
|
||||
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) {
|
||||
if ( !isValidName(tableName) ) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
|
@ -204,8 +205,7 @@ public class GATKReportTable {
|
|||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByRowID = sortByRowID;
|
||||
this.sortByAllColumns = sortByAllColumns;
|
||||
this.sortingWay = sortingWay;
|
||||
|
||||
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
|
||||
columnInfo = new ArrayList<GATKReportColumn>(numColumns);
|
||||
|
|
@ -218,7 +218,7 @@ public class GATKReportTable {
|
|||
* @param tableToCopy
|
||||
*/
|
||||
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns);
|
||||
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay);
|
||||
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
|
||||
addColumn(column.getColumnName(), column.getFormat());
|
||||
if ( copyData )
|
||||
|
|
@ -569,56 +569,53 @@ public class GATKReportTable {
|
|||
out.println();
|
||||
|
||||
// write the table body
|
||||
if ( sortByAllColumns ) {
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
switch (sortingWay) {
|
||||
case SORT_BY_COLUMN:
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
int result = EQUAL;
|
||||
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
}
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
});
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
} else if ( sortByRowID ) {
|
||||
// make sure that there are exactly the correct number of ID mappings
|
||||
if ( rowIdToIndex.size() != underlyingData.size() )
|
||||
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
|
||||
});
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
break;
|
||||
case SORT_BY_ROW:
|
||||
// make sure that there are exactly the correct number of ID mappings
|
||||
if ( rowIdToIndex.size() != underlyingData.size() )
|
||||
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
|
||||
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
|
||||
}
|
||||
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
|
||||
writeRow(out, underlyingData.get(rowKey.getValue()));
|
||||
} else {
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
}
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
|
||||
}
|
||||
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
|
||||
writeRow(out, underlyingData.get(rowKey.getValue()));
|
||||
break;
|
||||
case DO_NOT_SORT:
|
||||
for ( final Object[] row : underlyingData )
|
||||
writeRow(out, row);
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
|
|
@ -735,53 +732,47 @@ public class GATKReportTable {
|
|||
}
|
||||
|
||||
private List<Object[]> getOrderedRows() {
|
||||
if ( sortByAllColumns ) {
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
switch (sortingWay) {
|
||||
case SORT_BY_COLUMN:
|
||||
Collections.sort(underlyingData, new Comparator<Object[]>() {
|
||||
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
|
||||
@Override
|
||||
public int compare(Object[] objectArr1, Object[] objectArr2) {
|
||||
final int EQUAL = 0;
|
||||
int result = EQUAL;
|
||||
int l = objectArr1.length;
|
||||
for (int x = 0; x < l; x++) {
|
||||
if (objectArr1[x] instanceof Integer) {
|
||||
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
}
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
} else if (objectArr1[x] instanceof Double) {
|
||||
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
} else { // default uses String comparison
|
||||
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
|
||||
if( result != EQUAL) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
return underlyingData;
|
||||
case SORT_BY_ROW:
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
return underlyingData;
|
||||
}
|
||||
});
|
||||
return underlyingData;
|
||||
} else if ( !sortByRowID ) {
|
||||
return underlyingData;
|
||||
|
||||
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
|
||||
for ( final int rowKey : sortedMap.values() )
|
||||
orderedData.add(underlyingData.get(rowKey));
|
||||
|
||||
return orderedData;
|
||||
default:
|
||||
return underlyingData;
|
||||
}
|
||||
|
||||
final TreeMap<Object, Integer> sortedMap;
|
||||
try {
|
||||
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
|
||||
} catch (ClassCastException e) {
|
||||
return underlyingData;
|
||||
}
|
||||
|
||||
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
|
||||
for ( final int rowKey : sortedMap.values() )
|
||||
orderedData.add(underlyingData.get(rowKey));
|
||||
|
||||
return orderedData;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,309 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ExperimentalActiveRegionShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ActiveRegionShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final ActiveRegionShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider();
|
||||
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ReadView readView = new ReadView(readDataProvider);
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
|
||||
|
||||
Shard readShard = readDataProvider.getShard();
|
||||
SAMFileHeader header = readShard.getReadProperties().getHeader();
|
||||
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
|
||||
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator);
|
||||
final LocusView locusView = new AllLocusView(locusDataProvider);
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
locusDataProvider.close();
|
||||
}
|
||||
|
||||
windowMaker.close();
|
||||
|
||||
updateCumulativeMetrics(readDataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now process the active regions, where possible
|
||||
boolean emptyQueue = false;
|
||||
sum = processActiveRegions(walker, sum, emptyQueue);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// simple utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
}
|
||||
|
||||
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// code to handle processing active regions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
return sum;
|
||||
} else {
|
||||
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out each active region to the walker activeRegionOutStream
|
||||
*
|
||||
* @param walker
|
||||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
|
||||
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
|
||||
|
||||
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peekFirst() != null ) {
|
||||
ActiveRegion firstRegion = workQueue.getFirst();
|
||||
final String firstRegionContig = firstRegion.getLocation().getContig();
|
||||
if (emptyQueue || firstRegionContig != lastRegionContig) {
|
||||
sum = processFirstActiveRegion(sum, walker);
|
||||
}
|
||||
else {
|
||||
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
|
||||
if (lastRegionStart > firstRegionMaxReadStop) {
|
||||
sum = processFirstActiveRegion( sum, walker );
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the first active region and all remaining reads which overlap
|
||||
*
|
||||
* Remove the first active region from the queue
|
||||
* (NB: some reads associated with this active region may have already been processed)
|
||||
*
|
||||
* Remove all of these reads from the queue
|
||||
* (NB: some may be associated with other active regions)
|
||||
*
|
||||
* @param sum
|
||||
* @param walker
|
||||
* @return
|
||||
*/
|
||||
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ActiveRegion firstRegion = workQueue.removeFirst();
|
||||
|
||||
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
|
||||
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
|
||||
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
|
||||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
|
||||
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
ActiveRegion bestRegion = firstRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( firstRead );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(firstRegion) ) {
|
||||
firstRegion.add(firstRead);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for non-primary vs. extended
|
||||
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
|
||||
myReads.removeFirst();
|
||||
firstRead = myReads.peekFirst();
|
||||
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
}
|
||||
|
||||
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
|
||||
final M x = walker.map( firstRegion, null );
|
||||
return walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
boolean emptyQueue = true;
|
||||
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,309 @@
|
|||
package org.broadinstitute.sting.gatk.traversals;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.WalkerManager;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class ExperimentalReadShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ReadShardDataProvider> {
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
|
||||
|
||||
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
|
||||
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
@Override
|
||||
public String getTraversalUnits() {
|
||||
return "active regions";
|
||||
}
|
||||
|
||||
@Override
|
||||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final ReadShardDataProvider readDataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider));
|
||||
|
||||
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
|
||||
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
|
||||
|
||||
final ReadView readView = new ReadView(readDataProvider);
|
||||
|
||||
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
|
||||
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
|
||||
|
||||
Shard readShard = readDataProvider.getShard();
|
||||
SAMFileHeader header = readShard.getReadProperties().getHeader();
|
||||
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
|
||||
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider,
|
||||
iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator);
|
||||
|
||||
final LocusView locusView = new AllLocusView(locusDataProvider);
|
||||
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
|
||||
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
GenomeLoc prevLoc = null;
|
||||
while( locusView.hasNext() ) {
|
||||
final AlignmentContext locus = locusView.next();
|
||||
final GenomeLoc location = locus.getLocation();
|
||||
|
||||
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
|
||||
// we've move across some interval boundary, restart profile
|
||||
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
}
|
||||
|
||||
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
final ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
// Call the walkers isActive function for this locus and add them to the list to be integrated later
|
||||
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
|
||||
|
||||
prevLoc = location;
|
||||
|
||||
printProgress(locus.getLocation());
|
||||
}
|
||||
|
||||
locusDataProvider.close();
|
||||
}
|
||||
|
||||
windowMaker.close();
|
||||
|
||||
updateCumulativeMetrics(readDataProvider.getShard());
|
||||
|
||||
if ( ! profile.isEmpty() )
|
||||
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
|
||||
|
||||
// add active regions to queue of regions to process
|
||||
// first check if can merge active regions over shard boundaries
|
||||
if( !activeRegions.isEmpty() ) {
|
||||
if( !workQueue.isEmpty() ) {
|
||||
final ActiveRegion last = workQueue.getLast();
|
||||
final ActiveRegion first = activeRegions.get(0);
|
||||
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
|
||||
workQueue.removeLast();
|
||||
activeRegions.remove(first);
|
||||
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
|
||||
}
|
||||
}
|
||||
workQueue.addAll( activeRegions );
|
||||
}
|
||||
|
||||
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
|
||||
|
||||
// now process the active regions, where possible
|
||||
boolean emptyQueue = false;
|
||||
sum = processActiveRegions(walker, sum, emptyQueue);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take the individual isActive calls and integrate them into contiguous active regions and
|
||||
* add these blocks of work to the work queue
|
||||
* band-pass filter the list of isActive probabilities and turn into active regions
|
||||
*
|
||||
* @param profile
|
||||
* @param activeRegions
|
||||
* @param activeRegionExtension
|
||||
* @param maxRegionSize
|
||||
* @return
|
||||
*/
|
||||
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
|
||||
final List<ActiveRegion> activeRegions,
|
||||
final int activeRegionExtension,
|
||||
final int maxRegionSize) {
|
||||
if ( profile.isEmpty() )
|
||||
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
|
||||
|
||||
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
|
||||
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
|
||||
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
|
||||
}
|
||||
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// simple utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
|
||||
final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext locus, final GenomeLoc location) {
|
||||
if ( walker.hasPresetActiveRegions() ) {
|
||||
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
|
||||
} else {
|
||||
return walker.isActive( tracker, refContext, locus );
|
||||
}
|
||||
}
|
||||
|
||||
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
final LocusView locusView) {
|
||||
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
|
||||
return new ManagingReferenceOrderedView( dataProvider );
|
||||
else
|
||||
return (RodLocusView)locusView;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// code to handle processing active regions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
if( walker.activeRegionOutStream != null ) {
|
||||
writeActiveRegionsToStream(walker);
|
||||
return sum;
|
||||
} else {
|
||||
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out each active region to the walker activeRegionOutStream
|
||||
*
|
||||
* @param walker
|
||||
*/
|
||||
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
|
||||
// Just want to output the active regions to a file, not actually process them
|
||||
for( final ActiveRegion activeRegion : workQueue ) {
|
||||
if( activeRegion.isActive ) {
|
||||
walker.activeRegionOutStream.println( activeRegion.getLocation() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
|
||||
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
|
||||
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
|
||||
|
||||
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
|
||||
// TODO can implement parallel traversal here
|
||||
while( workQueue.peekFirst() != null ) {
|
||||
ActiveRegion firstRegion = workQueue.getFirst();
|
||||
final String firstRegionContig = firstRegion.getLocation().getContig();
|
||||
if (emptyQueue || firstRegionContig != lastRegionContig) {
|
||||
sum = processFirstActiveRegion(sum, walker);
|
||||
}
|
||||
else {
|
||||
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
|
||||
if (lastRegionStart > firstRegionMaxReadStop) {
|
||||
sum = processFirstActiveRegion( sum, walker );
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the first active region and all remaining reads which overlap
|
||||
*
|
||||
* Remove the first active region from the queue
|
||||
* (NB: some reads associated with this active region may have already been processed)
|
||||
*
|
||||
* Remove all of these reads from the queue
|
||||
* (NB: some may be associated with other active regions)
|
||||
*
|
||||
* @param sum
|
||||
* @param walker
|
||||
* @return
|
||||
*/
|
||||
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ActiveRegion firstRegion = workQueue.removeFirst();
|
||||
|
||||
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
|
||||
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
|
||||
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
|
||||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
|
||||
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
ActiveRegion bestRegion = firstRegion;
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
|
||||
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
|
||||
bestRegion = otherRegionToTest;
|
||||
}
|
||||
}
|
||||
bestRegion.add( firstRead );
|
||||
|
||||
// The read is also added to all other regions in which it overlaps but marked as non-primary
|
||||
if( walker.wantsNonPrimaryReads() ) {
|
||||
if( !bestRegion.equals(firstRegion) ) {
|
||||
firstRegion.add(firstRead);
|
||||
}
|
||||
for( final ActiveRegion otherRegionToTest : workQueue ) {
|
||||
if( !bestRegion.equals(otherRegionToTest) ) {
|
||||
// check for non-primary vs. extended
|
||||
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
|
||||
otherRegionToTest.add( firstRead );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check for non-primary vs. extended
|
||||
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
|
||||
if ( walker.wantsNonPrimaryReads() ) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
|
||||
firstRegion.add( firstRead );
|
||||
}
|
||||
|
||||
myReads.removeFirst();
|
||||
firstRead = myReads.peekFirst();
|
||||
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
|
||||
}
|
||||
|
||||
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
|
||||
final M x = walker.map( firstRegion, null );
|
||||
return walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
/**
|
||||
* Special function called in LinearMicroScheduler to empty out the work queue.
|
||||
* Ugly for now but will be cleaned up when we push this functionality more into the engine
|
||||
*/
|
||||
public T endTraversal( final Walker<M,T> walker, T sum) {
|
||||
boolean emptyQueue = true;
|
||||
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
|
||||
}
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
public T traverse( final ActiveRegionWalker<M,T> walker,
|
||||
final LocusShardDataProvider dataProvider,
|
||||
T sum) {
|
||||
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
|
||||
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
|
||||
|
||||
final LocusView locusView = new AllLocusView(dataProvider);
|
||||
|
||||
|
|
@ -227,7 +227,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
|
||||
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
|
||||
final ActiveRegion activeRegion = workQueue.remove();
|
||||
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
|
||||
sum = processActiveRegion( activeRegion, sum, walker );
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
|
@ -236,9 +236,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
return sum;
|
||||
}
|
||||
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M,T> walker ) {
|
||||
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
|
||||
for( final GATKSAMRecord read : reads ) {
|
||||
for( final GATKSAMRecord read : myReads ) {
|
||||
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
|
||||
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
|
||||
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
|
||||
|
|
@ -278,7 +278,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
|
|||
activeRegion.add( read );
|
||||
}
|
||||
}
|
||||
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
myReads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
|
||||
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
|
||||
|
||||
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());
|
||||
|
|
|
|||
|
|
@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection {
|
|||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable;
|
||||
if(SORT_BY_ALL_COLUMNS) {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true);
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
report = new GATKReport();
|
||||
report.addTable(reportName, reportDescription, 6, true, false);
|
||||
report.addTable(reportName, reportDescription, 6, GATKReportTable.TableSortingWay.SORT_BY_ROW);
|
||||
table = report.getTable(reportName);
|
||||
table.addColumn("readgroup");
|
||||
table.addColumn("cycle");
|
||||
|
|
|
|||
|
|
@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
|
|||
// create the table
|
||||
final String tableName = ve.getSimpleName();
|
||||
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
|
||||
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false);
|
||||
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), GATKReportTable.TableSortingWay.SORT_BY_ROW);
|
||||
|
||||
// grab the table, and add the columns we need to it
|
||||
final GATKReportTable table = report.getTable(tableName);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,11 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
private final GenomeLocParser genomeLocParser;
|
||||
public final boolean isActive;
|
||||
|
||||
// maximum stop position of all reads with start position in this active region
|
||||
// Used only by ExperimentalReadShardTraverseActiveRegions
|
||||
// NB: these reads may not be associated with this active region!
|
||||
private int maxReadStop;
|
||||
|
||||
public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) {
|
||||
this.activeRegionLoc = activeRegionLoc;
|
||||
this.isActive = isActive;
|
||||
|
|
@ -33,6 +38,7 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
this.extension = extension;
|
||||
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
|
||||
fullExtentReferenceLoc = extendedLoc;
|
||||
maxReadStop = activeRegionLoc.getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -93,6 +99,18 @@ public class ActiveRegion implements HasGenomeLocation {
|
|||
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
|
||||
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
|
||||
|
||||
public void setMaxReadStop(int maxReadStop) {
|
||||
this.maxReadStop = maxReadStop;
|
||||
}
|
||||
|
||||
public int getMaxReadStop() {
|
||||
return maxReadStop;
|
||||
}
|
||||
|
||||
public int getExtendedMaxReadStop() {
|
||||
return maxReadStop + extension;
|
||||
}
|
||||
|
||||
public boolean equalExceptReads(final ActiveRegion other) {
|
||||
if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
|
||||
if ( isActive != other.isActive ) return false;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,14 @@
|
|||
package org.broadinstitute.sting.utils.activeregion;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 1/2/13
|
||||
* Time: 4:59 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public enum ExperimentalActiveRegionShardType {
|
||||
LOCUSSHARD, // default/legacy type
|
||||
READSHARD,
|
||||
ACTIVEREGIONSHARD
|
||||
}
|
||||
|
|
@ -70,7 +70,7 @@ public class QuantizationInfo {
|
|||
public GATKReportTable generateReportTable(boolean sortBycols) {
|
||||
GATKReportTable quantizedTable;
|
||||
if(sortBycols) {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true);
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -287,7 +287,7 @@ public class RecalUtils {
|
|||
final GATKReportTable reportTable;
|
||||
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
|
||||
if(sortByCols) {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true);
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,13 @@ import org.testng.annotations.Test;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.Random;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
public class GATKReportUnitTest extends BaseTest {
|
||||
@Test
|
||||
|
|
@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
|
||||
}
|
||||
|
||||
private GATKReportTable getTableWithRandomValues() {
|
||||
Random number = new Random(123L);
|
||||
final int VALUESRANGE = 10;
|
||||
|
||||
GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3");
|
||||
GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN );
|
||||
|
||||
final int NUMROWS = 100;
|
||||
for (int x = 0; x < NUMROWS; x++) {
|
||||
report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE));
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testSortingByColumn() {
|
||||
Assert.assertEquals(isSorted(getTableWithRandomValues()), true);
|
||||
}
|
||||
|
||||
private boolean isSorted(GATKReportTable table) {
|
||||
boolean result = true;
|
||||
File testingSortingTableFile = new File("myFile.txt");
|
||||
|
||||
try {
|
||||
// Connect print stream to the output stream
|
||||
PrintStream ps = new PrintStream(testingSortingTableFile);
|
||||
table.write(ps);
|
||||
ps.close();
|
||||
}
|
||||
catch (Exception e){
|
||||
System.err.println ("Error: " + e.getMessage());
|
||||
}
|
||||
|
||||
ArrayList<int[]> rows = new ArrayList<int[]>();
|
||||
try {
|
||||
// Open the file
|
||||
FileInputStream fStream = new FileInputStream(testingSortingTableFile);
|
||||
// Get the object of DataInputStream
|
||||
DataInputStream in = new DataInputStream(fStream);
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(in));
|
||||
String strLine;
|
||||
//Read File Line By Line
|
||||
while ((strLine = br.readLine()) != null) {
|
||||
|
||||
String[] parts = strLine.split(" ");
|
||||
int l = parts.length;
|
||||
int[] row = new int[l];
|
||||
for(int n = 0; n < l; n++) {
|
||||
row[n] = Integer.parseInt(parts[n]);
|
||||
}
|
||||
rows.add(row);
|
||||
}
|
||||
//Close the input stream
|
||||
in.close();
|
||||
} catch (Exception e){//Catch exception if any
|
||||
System.err.println("Error: " + e.getMessage());
|
||||
}
|
||||
for (int x = 1; x < rows.size() && result; x++) {
|
||||
result = checkRowOrder(rows.get(x - 1), rows.get(x));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private boolean checkRowOrder(int[] row1, int[] row2) {
|
||||
int l = row1.length;
|
||||
final int EQUAL = 0;
|
||||
|
||||
int result = EQUAL;
|
||||
|
||||
for(int x = 0; x < l && ( result <= EQUAL); x++) {
|
||||
result = ((Integer)row1[x]).compareTo(row2[x]);
|
||||
}
|
||||
if (result <= EQUAL) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private GATKReportTable makeBasicTable() {
|
||||
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
|
||||
GATKReportTable table = report.getTable("TableName");
|
||||
|
|
@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest {
|
|||
table.set("RZ", "SomeFloat", 535646345.657453464576);
|
||||
table.set("RZ", "TrueFalse", true);
|
||||
|
||||
report1.addTable("Table3", "blah", 1, true, false);
|
||||
report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW);
|
||||
report1.getTable("Table3").addColumn("a");
|
||||
report1.getTable("Table3").addRowIDMapping("q", 2);
|
||||
report1.getTable("Table3").addRowIDMapping("5", 3);
|
||||
|
|
|
|||
|
|
@ -3,10 +3,16 @@ package org.broadinstitute.sting.gatk.traversals;
|
|||
import com.google.java.contract.PreconditionError;
|
||||
import net.sf.samtools.*;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
|
||||
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
|
@ -15,7 +21,6 @@ import org.broadinstitute.sting.BaseTest;
|
|||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.executive.WindowMaker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -28,6 +33,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
|||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
import org.testng.Assert;
|
||||
import org.testng.TestException;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -95,7 +101,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
|
||||
private final TraverseActiveRegions<Integer, Integer> traverse = new TraverseActiveRegions<Integer, Integer>();
|
||||
private final ExperimentalReadShardTraverseActiveRegions<Integer, Integer> readShardTraverse = new ExperimentalReadShardTraverseActiveRegions<Integer, Integer>();
|
||||
private final ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer> activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer>();
|
||||
|
||||
private IndexedFastaSequenceFile reference;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
|
|
@ -106,13 +114,14 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
private static final String testBAM = "TraverseActiveRegionsUnitTest.bam";
|
||||
private static final String testBAI = "TraverseActiveRegionsUnitTest.bai";
|
||||
|
||||
private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
|
||||
|
||||
@BeforeClass
|
||||
private void init() throws FileNotFoundException {
|
||||
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
|
||||
dictionary = reference.getSequenceDictionary();
|
||||
genomeLocParser = new GenomeLocParser(dictionary);
|
||||
|
||||
// TODO: test shard boundaries
|
||||
// TODO: reads with indels
|
||||
// TODO: reads which span many regions
|
||||
// TODO: reads which are partially between intervals (in/outside extension)
|
||||
|
|
@ -142,6 +151,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050));
|
||||
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
|
||||
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
|
||||
reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385));
|
||||
reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400));
|
||||
reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414));
|
||||
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
|
||||
|
||||
createBAM(reads);
|
||||
|
|
@ -153,7 +165,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
File indexFile = new File(testBAI);
|
||||
indexFile.deleteOnExit();
|
||||
|
||||
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
|
||||
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile);
|
||||
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
|
||||
out.addAlignment(read);
|
||||
}
|
||||
|
|
@ -171,8 +183,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
|
||||
traverse(walker, dataProvider, 0);
|
||||
activeIntervals.addAll(walker.isActiveCalls);
|
||||
}
|
||||
|
||||
|
|
@ -272,6 +284,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -286,6 +301,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -309,6 +330,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -323,6 +347,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -347,6 +377,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
|
||||
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
|
||||
// outside_intervals: none
|
||||
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
|
||||
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
|
||||
// simple20: Primary in 20:10000-10100
|
||||
|
||||
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
|
||||
|
|
@ -361,6 +394,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
|
||||
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
|
||||
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
|
||||
|
||||
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
|
||||
verifyReadMapping(region, "simple20");
|
||||
}
|
||||
|
|
@ -382,10 +421,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
|
||||
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
|
||||
t.traverse(walker, dataProvider, 0);
|
||||
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
|
||||
traverse(walker, dataProvider, 0);
|
||||
|
||||
t.endTraversal(walker, 0);
|
||||
endTraversal(walker, 0);
|
||||
|
||||
return walker.mappedActiveRegions;
|
||||
}
|
||||
|
|
@ -429,6 +468,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
|
||||
SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
|
||||
header.setSequenceDictionary(dictionary);
|
||||
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
|
||||
GATKSAMRecord record = new GATKSAMRecord(header);
|
||||
|
||||
record.setReadName(readName);
|
||||
|
|
@ -445,10 +485,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
return record;
|
||||
}
|
||||
|
||||
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
|
||||
private List<ShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
|
||||
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setGenomeLocParser(genomeLocParser);
|
||||
t.initialize(engine);
|
||||
GATKArgumentCollection arguments = new GATKArgumentCollection();
|
||||
arguments.activeRegionShardType = shardType; // make explicit
|
||||
engine.setArguments(arguments);
|
||||
|
||||
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
|
||||
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
|
||||
|
|
@ -456,13 +498,65 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
|
|||
|
||||
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser);
|
||||
|
||||
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
List<ShardDataProvider> providers = new ArrayList<ShardDataProvider>();
|
||||
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) {
|
||||
providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.initialize(engine);
|
||||
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) {
|
||||
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
|
||||
providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
|
||||
return providers;
|
||||
}
|
||||
|
||||
private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) {
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i);
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
}
|
||||
|
||||
private void endTraversal(DummyActiveRegionWalker walker, int i) {
|
||||
switch (shardType) {
|
||||
case LOCUSSHARD:
|
||||
traverse.endTraversal(walker, i);
|
||||
break;
|
||||
case READSHARD:
|
||||
readShardTraverse.endTraversal(walker, i);
|
||||
break;
|
||||
case ACTIVEREGIONSHARD:
|
||||
activeRegionShardTraverse.endTraversal(walker, i);
|
||||
break;
|
||||
default: throw new TestException("Invalid shard type");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue