Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Ryan Poplin 2013-01-04 11:38:18 -05:00
commit 27722a15e5
23 changed files with 1166 additions and 143 deletions

View File

@ -570,11 +570,32 @@ public class GenomeAnalysisEngine {
else if(walker instanceof ActiveRegionWalker) {
if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Active region walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
}
switch(argCollection.activeRegionShardType) {
case LOCUSSHARD:
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new LocusShardBalancer());
case READSHARD:
// Use the legacy ReadShardBalancer if legacy downsampling is enabled
ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
new LegacyReadShardBalancer() :
new ReadShardBalancer();
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(), readShardBalancer);
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), readShardBalancer);
case ACTIVEREGIONSHARD:
if(intervals == null)
return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new ActiveRegionShardBalancer());
else
return readsDataSource.createShardIteratorOverIntervals(((ActiveRegionWalker)walker).extendIntervals(intervals, this.genomeLocParser, this.getReferenceDataSource().getReference()), new ActiveRegionShardBalancer());
default:
throw new UserException.CommandLineException("Invalid active region shard type.");
}
}
else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) {
// Apply special validation to read pair walkers.
if(walker instanceof ReadPairWalker) {

View File

@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
@ -448,5 +449,14 @@ public class GATKArgumentCollection {
@Hidden
public boolean generateShadowBCF = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
// --------------------------------------------------------------------------------------------------------------
//
// Experimental Active Region Traversal modes
//
// --------------------------------------------------------------------------------------------------------------
@Argument(fullName = "active_region_traversal_shard_type", shortName = "active_region_traversal_shard_type", doc = "Choose an experimental shard type for active region traversal, instead of the default LocusShard", required = false)
public ExperimentalActiveRegionShardType activeRegionShardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Collection;
/**
* @author Joel Thibault
*/
public class ActiveRegionShardDataProvider extends ShardDataProvider {
final private ReadShardDataProvider readProvider;
final private LocusShardDataProvider locusProvider;
public ActiveRegionShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, StingSAMIterator reads, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard, genomeLocParser, reference, rods); // TODO: necessary?
readProvider = new ReadShardDataProvider(shard, genomeLocParser, reads, reference, rods);
locusProvider = new LocusShardDataProvider(shard, sourceInfo, genomeLocParser, locus, locusIterator, reference, rods);
}
public ReadShardDataProvider getReadShardDataProvider() {
return readProvider;
}
public LocusShardDataProvider getLocusShardDataProvider(LocusIterator iterator) {
return locusProvider;
}
}

View File

@ -22,11 +22,6 @@ public class LocusShardDataProvider extends ShardDataProvider {
*/
private final ReadProperties sourceInfo;
/**
* The parser, used to create and build new GenomeLocs.
*/
private final GenomeLocParser genomeLocParser;
/**
* The particular locus for which data is provided. Should be contained within shard.getGenomeLocs().
*/
@ -45,7 +40,22 @@ public class LocusShardDataProvider extends ShardDataProvider {
public LocusShardDataProvider(Shard shard, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods) {
super(shard,genomeLocParser,reference,rods);
this.sourceInfo = sourceInfo;
this.genomeLocParser = genomeLocParser;
this.locus = locus;
this.locusIterator = locusIterator;
}
/**
* Create a data provider based on an input provider
* Used only by ExperimentalReadShardTraverseActiveRegions
* @param dataProvider
* @param sourceInfo
* @param genomeLocParser
* @param locus
* @param locusIterator
*/
public LocusShardDataProvider(ShardDataProvider dataProvider, ReadProperties sourceInfo, GenomeLocParser genomeLocParser, GenomeLoc locus, LocusIterator locusIterator) {
super(dataProvider.getShard(),genomeLocParser,dataProvider.getReference(),dataProvider.getReferenceOrderedData());
this.sourceInfo = sourceInfo;
this.locus = locus;
this.locusIterator = locusIterator;
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.SAMFileSpan;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.List;
import java.util.Map;
/**
* @author Joel Thibault
*/
public class ActiveRegionShard extends ReadShard {
public ActiveRegionShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
super(parser, readsDataSource, fileSpans, loci, isUnmapped);
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2012, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reads;
/**
* @author Joel Thibault
*/
public class ActiveRegionShardBalancer extends ReadShardBalancer {
// TODO ?
}

View File

@ -40,7 +40,9 @@ import java.util.Map;
*/
public abstract class Shard implements HasGenomeLocation {
public enum ShardType {
READ, LOCUS
READ,
LOCUS,
ACTIVEREGION // Used only by ExperimentalActiveRegionShardTraverseActiveRegions
}
protected final GenomeLocParser parser; // incredibly annoying!

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.executive;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
@ -11,6 +12,8 @@ import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.gatk.traversals.ExperimentalActiveRegionShardTraverseActiveRegions;
import org.broadinstitute.sting.gatk.traversals.ExperimentalReadShardTraverseActiveRegions;
import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
import org.broadinstitute.sting.gatk.walkers.Walker;
@ -78,6 +81,18 @@ public class LinearMicroScheduler extends MicroScheduler {
}
windowMaker.close();
}
else if(shard.getShardType() == Shard.ShardType.ACTIVEREGION) {
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new ActiveRegionShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),getReadIterator(shard),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
if ( walker.isDone() ) break;
}
windowMaker.close();
}
else {
ShardDataProvider dataProvider = new ReadShardDataProvider(shard,engine.getGenomeLocParser(),getReadIterator(shard),reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
@ -93,6 +108,14 @@ public class LinearMicroScheduler extends MicroScheduler {
final Object result = ((TraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
}
else if( traversalEngine instanceof ExperimentalReadShardTraverseActiveRegions ) {
final Object result = ((ExperimentalReadShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
}
else if( traversalEngine instanceof ExperimentalActiveRegionShardTraverseActiveRegions) {
final Object result = ((ExperimentalActiveRegionShardTraverseActiveRegions) traversalEngine).endTraversal(walker, accumulator.getReduceInit());
accumulator.accumulate(null, result); // Assumes only used with StandardAccumulator
}
Object result = accumulator.finishTraversal();

View File

@ -41,6 +41,7 @@ import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.AutoFormattingTime;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
@ -245,7 +246,12 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
} else if (walker instanceof ReadPairWalker) {
return new TraverseReadPairs();
} else if (walker instanceof ActiveRegionWalker) {
return new TraverseActiveRegions();
switch (engine.getArguments().activeRegionShardType) {
case LOCUSSHARD: return new TraverseActiveRegions();
case READSHARD: return new ExperimentalReadShardTraverseActiveRegions();
case ACTIVEREGIONSHARD: return new ExperimentalActiveRegionShardTraverseActiveRegions();
default: throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type of ActiveRegionWalker.");
}
} else {
throw new UnsupportedOperationException("Unable to determine traversal type, the walker is an unknown type.");
}

View File

@ -117,7 +117,7 @@ public class GATKReport {
* @param numColumns the number of columns in this table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns) {
addTable(tableName, tableDescription, numColumns, false, false);
addTable(tableName, tableDescription, numColumns, GATKReportTable.TableSortingWay.DO_NOT_SORT);
}
/**
@ -126,11 +126,10 @@ public class GATKReport {
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort the rows by the row ID
* @param sortByAllColumns whether to sort the rows by all columns starting from leftmost column
* @param sortingWay way to sort table
*/
public void addTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortByRowID, sortByAllColumns);
public void addTable(final String tableName, final String tableDescription, final int numColumns, final GATKReportTable.TableSortingWay sortingWay) {
GATKReportTable table = new GATKReportTable(tableName, tableDescription, numColumns, sortingWay);
tables.put(tableName, table);
}

View File

@ -46,8 +46,7 @@ public class GATKReportTable {
private final String tableName;
private final String tableDescription;
private final boolean sortByRowID;
private final boolean sortByAllColumns;
private final TableSortingWay sortingWay;
private List<Object[]> underlyingData;
private final List<GATKReportColumn> columnInfo;
@ -73,6 +72,12 @@ public class GATKReportTable {
public int index() { return index; }
}
public enum TableSortingWay {
SORT_BY_ROW,
SORT_BY_COLUMN,
DO_NOT_SORT
}
protected enum TableNameHeaderFields {
NAME(2),
DESCRIPTION(3);
@ -107,10 +112,7 @@ public class GATKReportTable {
tableDescription = (tableNameData.length <= TableNameHeaderFields.DESCRIPTION.index()) ? "" : tableNameData[TableNameHeaderFields.DESCRIPTION.index()]; // table may have no description! (and that's okay)
// when reading from a file, we do not re-sort the rows
sortByRowID = false;
// when reading from a file, we do not re-sort the rows
sortByAllColumns = false;
sortingWay = TableSortingWay.DO_NOT_SORT;
// initialize the data
final int nColumns = Integer.parseInt(tableData[TableDataHeaderFields.COLS.index()]);
@ -181,7 +183,7 @@ public class GATKReportTable {
* @param numColumns the number of columns in this table
*/
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns) {
this(tableName, tableDescription, numColumns, true, false);
this(tableName, tableDescription, numColumns, TableSortingWay.SORT_BY_ROW);
}
/**
@ -190,10 +192,9 @@ public class GATKReportTable {
* @param tableName the name of the table
* @param tableDescription the description of the table
* @param numColumns the number of columns in this table
* @param sortByRowID whether to sort rows by the row ID (instead of the order in which they were added)
* @param sortByAllColumns whether to sort rows by all columns (instead of the order in which they were added)
* @param sortingWay in what way to sort rows (instead of the order in which they were added)
*/
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final boolean sortByRowID, final boolean sortByAllColumns) {
public GATKReportTable(final String tableName, final String tableDescription, final int numColumns, final TableSortingWay sortingWay) {
if ( !isValidName(tableName) ) {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
@ -204,8 +205,7 @@ public class GATKReportTable {
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByRowID = sortByRowID;
this.sortByAllColumns = sortByAllColumns;
this.sortingWay = sortingWay;
underlyingData = new ArrayList<Object[]>(INITITAL_ARRAY_SIZE);
columnInfo = new ArrayList<GATKReportColumn>(numColumns);
@ -218,7 +218,7 @@ public class GATKReportTable {
* @param tableToCopy
*/
public GATKReportTable(final GATKReportTable tableToCopy, final boolean copyData) {
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortByRowID, tableToCopy.sortByAllColumns);
this(tableToCopy.getTableName(), tableToCopy.getTableDescription(), tableToCopy.getNumColumns(), tableToCopy.sortingWay);
for ( final GATKReportColumn column : tableToCopy.getColumnInfo() )
addColumn(column.getColumnName(), column.getFormat());
if ( copyData )
@ -569,56 +569,53 @@ public class GATKReportTable {
out.println();
// write the table body
if ( sortByAllColumns ) {
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
switch (sortingWay) {
case SORT_BY_COLUMN:
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
}
if( result != EQUAL) {
return result;
}
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
if( result != EQUAL) {
return result;
}
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
if( result != EQUAL) {
return result;
}
}
return result;
}
return result;
}
});
for ( final Object[] row : underlyingData )
writeRow(out, row);
} else if ( sortByRowID ) {
// make sure that there are exactly the correct number of ID mappings
if ( rowIdToIndex.size() != underlyingData.size() )
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
});
for ( final Object[] row : underlyingData )
writeRow(out, row);
break;
case SORT_BY_ROW:
// make sure that there are exactly the correct number of ID mappings
if ( rowIdToIndex.size() != underlyingData.size() )
throw new ReviewedStingException("There isn't a 1-to-1 mapping from row ID to index; this can happen when rows are not created consistently");
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
} else {
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
throw new ReviewedStingException("Unable to sort the rows based on the row IDs because the ID Objects are of different types");
}
for ( final Map.Entry<Object, Integer> rowKey : sortedMap.entrySet() )
writeRow(out, underlyingData.get(rowKey.getValue()));
break;
case DO_NOT_SORT:
for ( final Object[] row : underlyingData )
writeRow(out, row);
}
out.println();
}
@ -735,53 +732,47 @@ public class GATKReportTable {
}
private List<Object[]> getOrderedRows() {
if ( sortByAllColumns ) {
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
if( result != EQUAL) {
return result;
switch (sortingWay) {
case SORT_BY_COLUMN:
Collections.sort(underlyingData, new Comparator<Object[]>() {
//INVARIANT the two arrays are of the same length and corresponding elements are of the same type
@Override
public int compare(Object[] objectArr1, Object[] objectArr2) {
final int EQUAL = 0;
int result = EQUAL;
int l = objectArr1.length;
for (int x = 0; x < l; x++) {
if (objectArr1[x] instanceof Integer) {
result = ((Integer)objectArr1[x]).compareTo((Integer)objectArr2[x]);
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
}
if( result != EQUAL) {
return result;
}
}
} else if (objectArr1[x] instanceof Double) {
result = ((Double)objectArr1[x]).compareTo((Double)objectArr2[x]);
if( result != EQUAL) {
return result;
}
} else { // default uses String comparison
result = objectArr1[x].toString().compareTo(objectArr2[x].toString());
if( result != EQUAL) {
return result;
}
}
return result;
}
return result;
});
return underlyingData;
case SORT_BY_ROW:
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
});
return underlyingData;
} else if ( !sortByRowID ) {
return underlyingData;
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
default:
return underlyingData;
}
final TreeMap<Object, Integer> sortedMap;
try {
sortedMap = new TreeMap<Object, Integer>(rowIdToIndex);
} catch (ClassCastException e) {
return underlyingData;
}
final List<Object[]> orderedData = new ArrayList<Object[]>(underlyingData.size());
for ( final int rowKey : sortedMap.values() )
orderedData.add(underlyingData.get(rowKey));
return orderedData;
}
}

View File

@ -0,0 +1,309 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
public class ExperimentalActiveRegionShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ActiveRegionShardDataProvider> {
/**
* our log, which we want to capture anything from this class
*/
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
@Override
public String getTraversalUnits() {
return "active regions";
}
@Override
public T traverse( final ActiveRegionWalker<M,T> walker,
final ActiveRegionShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("ExperimentalActiveRegionShardTraverseActiveRegions.traverse: Shard is %s", dataProvider));
ReadShardDataProvider readDataProvider = dataProvider.getReadShardDataProvider();
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
final ReadView readView = new ReadView(readDataProvider);
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
Shard readShard = readDataProvider.getShard();
SAMFileHeader header = readShard.getReadProperties().getHeader();
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
LocusShardDataProvider locusDataProvider = dataProvider.getLocusShardDataProvider(iterator);
final LocusView locusView = new AllLocusView(locusDataProvider);
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
// We keep processing while the next reference location is within the interval
GenomeLoc prevLoc = null;
while( locusView.hasNext() ) {
final AlignmentContext locus = locusView.next();
final GenomeLoc location = locus.getLocation();
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
// we've move across some interval boundary, restart profile
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
}
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
// Call the walkers isActive function for this locus and add them to the list to be integrated later
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
prevLoc = location;
printProgress(locus.getLocation());
}
locusDataProvider.close();
}
windowMaker.close();
updateCumulativeMetrics(readDataProvider.getShard());
if ( ! profile.isEmpty() )
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
// add active regions to queue of regions to process
// first check if can merge active regions over shard boundaries
if( !activeRegions.isEmpty() ) {
if( !workQueue.isEmpty() ) {
final ActiveRegion last = workQueue.getLast();
final ActiveRegion first = activeRegions.get(0);
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
workQueue.removeLast();
activeRegions.remove(first);
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
}
}
workQueue.addAll( activeRegions );
}
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
// now process the active regions, where possible
boolean emptyQueue = false;
sum = processActiveRegions(walker, sum, emptyQueue);
return sum;
}
/**
* Take the individual isActive calls and integrate them into contiguous active regions and
* add these blocks of work to the work queue
* band-pass filter the list of isActive probabilities and turn into active regions
*
* @param profile
* @param activeRegions
* @param activeRegionExtension
* @param maxRegionSize
* @return
*/
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
final List<ActiveRegion> activeRegions,
final int activeRegionExtension,
final int maxRegionSize) {
if ( profile.isEmpty() )
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
}
// --------------------------------------------------------------------------------
//
// simple utility functions
//
// --------------------------------------------------------------------------------
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}
}
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
final LocusView locusView) {
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
return new ManagingReferenceOrderedView( dataProvider );
else
return (RodLocusView)locusView;
}
// --------------------------------------------------------------------------------
//
// code to handle processing active regions
//
// --------------------------------------------------------------------------------
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
if( walker.activeRegionOutStream != null ) {
writeActiveRegionsToStream(walker);
return sum;
} else {
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
}
}
/**
* Write out each active region to the walker activeRegionOutStream
*
* @param walker
*/
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
// Just want to output the active regions to a file, not actually process them
for( final ActiveRegion activeRegion : workQueue ) {
if( activeRegion.isActive ) {
walker.activeRegionOutStream.println( activeRegion.getLocation() );
}
}
}
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
// TODO can implement parallel traversal here
while( workQueue.peekFirst() != null ) {
ActiveRegion firstRegion = workQueue.getFirst();
final String firstRegionContig = firstRegion.getLocation().getContig();
if (emptyQueue || firstRegionContig != lastRegionContig) {
sum = processFirstActiveRegion(sum, walker);
}
else {
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
if (lastRegionStart > firstRegionMaxReadStop) {
sum = processFirstActiveRegion( sum, walker );
}
else {
break;
}
}
}
return sum;
}
/**
* Process the first active region and all remaining reads which overlap
*
* Remove the first active region from the queue
* (NB: some reads associated with this active region may have already been processed)
*
* Remove all of these reads from the queue
* (NB: some may be associated with other active regions)
*
* @param sum
* @param walker
* @return
*/
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
final ActiveRegion firstRegion = workQueue.removeFirst();
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
ActiveRegion bestRegion = firstRegion;
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
bestRegion = otherRegionToTest;
}
}
bestRegion.add( firstRead );
// The read is also added to all other regions in which it overlaps but marked as non-primary
if( walker.wantsNonPrimaryReads() ) {
if( !bestRegion.equals(firstRegion) ) {
firstRegion.add(firstRead);
}
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( !bestRegion.equals(otherRegionToTest) ) {
// check for non-primary vs. extended
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
}
}
}
}
// check for non-primary vs. extended
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
if ( walker.wantsNonPrimaryReads() ) {
firstRegion.add( firstRead );
}
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
firstRegion.add( firstRead );
}
myReads.removeFirst();
firstRead = myReads.peekFirst();
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
}
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
final M x = walker.map( firstRegion, null );
return walker.reduce(x, sum);
}
/**
* Special function called in LinearMicroScheduler to empty out the work queue.
* Ugly for now but will be cleaned up when we push this functionality more into the engine
*/
public T endTraversal( final Walker<M,T> walker, T sum) {
boolean emptyQueue = true;
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
}
}

View File

@ -0,0 +1,309 @@
package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMFileHeader;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.WalkerManager;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension;
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*;
public class ExperimentalReadShardTraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegionWalker<M,T>,ReadShardDataProvider> {
/**
* our log, which we want to capture anything from this class
*/
protected final static Logger logger = Logger.getLogger(TraversalEngine.class);
private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
private final LinkedList<GATKSAMRecord> myReads = new LinkedList<GATKSAMRecord>();
@Override
public String getTraversalUnits() {
return "active regions";
}
@Override
public T traverse( final ActiveRegionWalker<M,T> walker,
final ReadShardDataProvider readDataProvider,
T sum) {
logger.debug(String.format("ExperimentalReadShardTraverseActiveRegions.traverse: Read Shard is %s", readDataProvider));
final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
final ReadView readView = new ReadView(readDataProvider);
final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions());
Shard readShard = readDataProvider.getShard();
SAMFileHeader header = readShard.getReadProperties().getHeader();
WindowMaker windowMaker = new WindowMaker(readShard, engine.getGenomeLocParser(),
readView.iterator(), readShard.getGenomeLocs(), SampleUtils.getSAMFileSamples(header));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
LocusShardDataProvider locusDataProvider = new LocusShardDataProvider(readDataProvider,
iterator.getSourceInfo(), engine.getGenomeLocParser(), iterator.getLocus(), iterator);
final LocusView locusView = new AllLocusView(locusDataProvider);
final LocusReferenceView referenceView = new LocusReferenceView( walker, locusDataProvider );
ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, locusDataProvider, locusView);
// We keep processing while the next reference location is within the interval
GenomeLoc prevLoc = null;
while( locusView.hasNext() ) {
final AlignmentContext locus = locusView.next();
final GenomeLoc location = locus.getLocation();
if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
// we've move across some interval boundary, restart profile
profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
}
readDataProvider.getShard().getReadMetrics().incrementNumIterations();
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
final ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
// Call the walkers isActive function for this locus and add them to the list to be integrated later
profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
prevLoc = location;
printProgress(locus.getLocation());
}
locusDataProvider.close();
}
windowMaker.close();
updateCumulativeMetrics(readDataProvider.getShard());
if ( ! profile.isEmpty() )
incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
// add active regions to queue of regions to process
// first check if can merge active regions over shard boundaries
if( !activeRegions.isEmpty() ) {
if( !workQueue.isEmpty() ) {
final ActiveRegion last = workQueue.getLast();
final ActiveRegion first = activeRegions.get(0);
if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
workQueue.removeLast();
activeRegions.remove(first);
workQueue.addLast(new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension));
}
}
workQueue.addAll( activeRegions );
}
logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
// now process the active regions, where possible
boolean emptyQueue = false;
sum = processActiveRegions(walker, sum, emptyQueue);
return sum;
}
/**
* Take the individual isActive calls and integrate them into contiguous active regions and
* add these blocks of work to the work queue
* band-pass filter the list of isActive probabilities and turn into active regions
*
* @param profile
* @param activeRegions
* @param activeRegionExtension
* @param maxRegionSize
* @return
*/
private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
final List<ActiveRegion> activeRegions,
final int activeRegionExtension,
final int maxRegionSize) {
if ( profile.isEmpty() )
throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
final ActivityProfile bandPassFiltered = profile.bandPassFilter();
activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
}
// --------------------------------------------------------------------------------
//
// simple utility functions
//
// --------------------------------------------------------------------------------
private final ActivityProfileResult walkerActiveProb(final ActiveRegionWalker<M,T> walker,
final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext locus, final GenomeLoc location) {
if ( walker.hasPresetActiveRegions() ) {
return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
} else {
return walker.isActive( tracker, refContext, locus );
}
}
private ReferenceOrderedView getReferenceOrderedView( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
final LocusView locusView) {
if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
return new ManagingReferenceOrderedView( dataProvider );
else
return (RodLocusView)locusView;
}
// --------------------------------------------------------------------------------
//
// code to handle processing active regions
//
// --------------------------------------------------------------------------------
private T processActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
if( walker.activeRegionOutStream != null ) {
writeActiveRegionsToStream(walker);
return sum;
} else {
return callWalkerMapOnActiveRegions(walker, sum, emptyQueue);
}
}
/**
* Write out each active region to the walker activeRegionOutStream
*
* @param walker
*/
private void writeActiveRegionsToStream( final ActiveRegionWalker<M,T> walker ) {
// Just want to output the active regions to a file, not actually process them
for( final ActiveRegion activeRegion : workQueue ) {
if( activeRegion.isActive ) {
walker.activeRegionOutStream.println( activeRegion.getLocation() );
}
}
}
private T callWalkerMapOnActiveRegions( final ActiveRegionWalker<M,T> walker, T sum, boolean emptyQueue ) {
final int lastRegionStart = workQueue.getLast().getLocation().getStart();
final String lastRegionContig = workQueue.getLast().getLocation().getContig();
// If we've traversed sufficiently past the beginning of the workQueue we can unload those regions and process them
// TODO can implement parallel traversal here
while( workQueue.peekFirst() != null ) {
ActiveRegion firstRegion = workQueue.getFirst();
final String firstRegionContig = firstRegion.getLocation().getContig();
if (emptyQueue || firstRegionContig != lastRegionContig) {
sum = processFirstActiveRegion(sum, walker);
}
else {
final int firstRegionMaxReadStop = walker.wantsExtendedReads() ? firstRegion.getMaxReadStop() : firstRegion.getExtendedMaxReadStop();
if (lastRegionStart > firstRegionMaxReadStop) {
sum = processFirstActiveRegion( sum, walker );
}
else {
break;
}
}
}
return sum;
}
/**
* Process the first active region and all remaining reads which overlap
*
* Remove the first active region from the queue
* (NB: some reads associated with this active region may have already been processed)
*
* Remove all of these reads from the queue
* (NB: some may be associated with other active regions)
*
* @param sum
* @param walker
* @return
*/
private T processFirstActiveRegion( final T sum, final ActiveRegionWalker<M,T> walker ) {
final ActiveRegion firstRegion = workQueue.removeFirst();
GATKSAMRecord firstRead = myReads.peekFirst(); // don't remove because it may not be placed here
GenomeLoc firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
while ( firstRegion.getLocation().overlapsP( firstReadLoc ) ||
(walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc ))) {
if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
long maxOverlap = firstRegion.getLocation().sizeOfOverlap( firstReadLoc );
ActiveRegion bestRegion = firstRegion;
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( otherRegionToTest.getLocation().sizeOfOverlap(firstReadLoc) >= maxOverlap ) {
maxOverlap = otherRegionToTest.getLocation().sizeOfOverlap( firstReadLoc );
bestRegion = otherRegionToTest;
}
}
bestRegion.add( firstRead );
// The read is also added to all other regions in which it overlaps but marked as non-primary
if( walker.wantsNonPrimaryReads() ) {
if( !bestRegion.equals(firstRegion) ) {
firstRegion.add(firstRead);
}
for( final ActiveRegion otherRegionToTest : workQueue ) {
if( !bestRegion.equals(otherRegionToTest) ) {
// check for non-primary vs. extended
if ( otherRegionToTest.getLocation().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
} else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( firstReadLoc ) ) {
otherRegionToTest.add( firstRead );
}
}
}
}
// check for non-primary vs. extended
} else if( firstRegion.getLocation().overlapsP( firstReadLoc ) ) {
if ( walker.wantsNonPrimaryReads() ) {
firstRegion.add( firstRead );
}
} else if( walker.wantsExtendedReads() && firstRegion.getExtendedLoc().overlapsP( firstReadLoc )) {
firstRegion.add( firstRead );
}
myReads.removeFirst();
firstRead = myReads.peekFirst();
firstReadLoc = this.engine.getGenomeLocParser().createGenomeLoc( firstRead );
}
logger.debug(">> Map call with " + firstRegion.getReads().size() + " " + (firstRegion.isActive ? "active" : "inactive") + " reads @ " + firstRegion.getLocation() + " with full extent: " + firstRegion.getReferenceLoc());
final M x = walker.map( firstRegion, null );
return walker.reduce(x, sum);
}
/**
* Special function called in LinearMicroScheduler to empty out the work queue.
* Ugly for now but will be cleaned up when we push this functionality more into the engine
*/
public T endTraversal( final Walker<M,T> walker, T sum) {
boolean emptyQueue = true;
return processActiveRegions((ActiveRegionWalker<M,T>)walker, sum, emptyQueue);
}
}

View File

@ -43,7 +43,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
public T traverse( final ActiveRegionWalker<M,T> walker,
final LocusShardDataProvider dataProvider,
T sum) {
logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider));
final LocusView locusView = new AllLocusView(dataProvider);
@ -227,7 +227,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
final GenomeLoc extendedLoc = workQueue.peek().getExtendedLoc();
if ( extendedLoc.getStop() < minStart || (currentContig != null && !workQueue.peek().getExtendedLoc().getContig().equals(currentContig))) {
final ActiveRegion activeRegion = workQueue.remove();
sum = processActiveRegion( activeRegion, myReads, workQueue, sum, walker );
sum = processActiveRegion( activeRegion, sum, walker );
} else {
break;
}
@ -236,9 +236,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
return sum;
}
private T processActiveRegion( final ActiveRegion activeRegion, final LinkedHashSet<GATKSAMRecord> reads, final Queue<ActiveRegion> workQueue, final T sum, final ActiveRegionWalker<M,T> walker ) {
private T processActiveRegion( final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker<M,T> walker ) {
final ArrayList<GATKSAMRecord> placedReads = new ArrayList<GATKSAMRecord>();
for( final GATKSAMRecord read : reads ) {
for( final GATKSAMRecord read : myReads ) {
final GenomeLoc readLoc = this.engine.getGenomeLocParser().createGenomeLoc( read );
if( activeRegion.getLocation().overlapsP( readLoc ) ) {
// The region which the highest amount of overlap is chosen as the primary region for the read (tie breaking is done as right most region)
@ -278,7 +278,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
activeRegion.add( read );
}
}
reads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
myReads.removeAll( placedReads ); // remove all the reads which have been placed into their active region
// WARNING: This hashset relies on reads being exactly equal when they are placed in the list as when they are removed. So the ActiveRegionWalker can't modify the reads in any way.
logger.debug(">> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc());

View File

@ -207,7 +207,7 @@ public class RecalibrationArgumentCollection {
public GATKReportTable generateReportTable(final String covariateNames) {
GATKReportTable argumentsTable;
if(SORT_BY_ALL_COLUMNS) {
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, false, true);
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
}

View File

@ -124,7 +124,7 @@ public class ErrorRatePerCycle extends LocusWalker<Integer, Integer> {
public void initialize() {
report = new GATKReport();
report.addTable(reportName, reportDescription, 6, true, false);
report.addTable(reportName, reportDescription, 6, GATKReportTable.TableSortingWay.SORT_BY_ROW);
table = report.getTable(reportName);
table.addColumn("readgroup");
table.addColumn("cycle");

View File

@ -162,7 +162,7 @@ public class VariantEvalReportWriter {
// create the table
final String tableName = ve.getSimpleName();
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), true, false);
report.addTable(tableName, tableDesc, 1 + stratifiers.size() + (scanner.hasMoltenField() ? 2 : datamap.size()), GATKReportTable.TableSortingWay.SORT_BY_ROW);
// grab the table, and add the columns we need to it
final GATKReportTable table = report.getTable(tableName);

View File

@ -26,6 +26,11 @@ public class ActiveRegion implements HasGenomeLocation {
private final GenomeLocParser genomeLocParser;
public final boolean isActive;
// maximum stop position of all reads with start position in this active region
// Used only by ExperimentalReadShardTraverseActiveRegions
// NB: these reads may not be associated with this active region!
private int maxReadStop;
public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) {
this.activeRegionLoc = activeRegionLoc;
this.isActive = isActive;
@ -33,6 +38,7 @@ public class ActiveRegion implements HasGenomeLocation {
this.extension = extension;
extendedLoc = genomeLocParser.createGenomeLocOnContig(activeRegionLoc.getContig(), activeRegionLoc.getStart() - extension, activeRegionLoc.getStop() + extension);
fullExtentReferenceLoc = extendedLoc;
maxReadStop = activeRegionLoc.getStart();
}
@Override
@ -93,6 +99,18 @@ public class ActiveRegion implements HasGenomeLocation {
public void remove( final GATKSAMRecord read ) { reads.remove( read ); }
public void removeAll( final ArrayList<GATKSAMRecord> readsToRemove ) { reads.removeAll( readsToRemove ); }
public void setMaxReadStop(int maxReadStop) {
this.maxReadStop = maxReadStop;
}
public int getMaxReadStop() {
return maxReadStop;
}
public int getExtendedMaxReadStop() {
return maxReadStop + extension;
}
public boolean equalExceptReads(final ActiveRegion other) {
if ( activeRegionLoc.compareTo(other.activeRegionLoc) != 0 ) return false;
if ( isActive != other.isActive ) return false;

View File

@ -0,0 +1,14 @@
package org.broadinstitute.sting.utils.activeregion;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 1/2/13
* Time: 4:59 PM
* To change this template use File | Settings | File Templates.
*/
public enum ExperimentalActiveRegionShardType {
LOCUSSHARD, // default/legacy type
READSHARD,
ACTIVEREGIONSHARD
}

View File

@ -70,7 +70,7 @@ public class QuantizationInfo {
public GATKReportTable generateReportTable(boolean sortBycols) {
GATKReportTable quantizedTable;
if(sortBycols) {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, false, true);
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
}

View File

@ -287,7 +287,7 @@ public class RecalUtils {
final GATKReportTable reportTable;
if (tableIndex <= RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.index) {
if(sortByCols) {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), false, true);
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size(), GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
} else {
reportTable = new GATKReportTable("RecalTable" + reportTableIndex++, "", columnNames.size());
}

View File

@ -32,6 +32,13 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Random;
import java.io.FileInputStream;
import java.io.DataInputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.ArrayList;
public class GATKReportUnitTest extends BaseTest {
@Test
@ -77,6 +84,85 @@ public class GATKReportUnitTest extends BaseTest {
Assert.assertEquals(GATKReportColumn.isRightAlign(value), expected, "right align of '" + value + "'");
}
private GATKReportTable getTableWithRandomValues() {
Random number = new Random(123L);
final int VALUESRANGE = 10;
GATKReport report = GATKReport.newSimpleReport("TableName", "col1", "col2", "col3");
GATKReportTable table = new GATKReportTable("testSortingTable", "table with random values sorted by columns", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN );
final int NUMROWS = 100;
for (int x = 0; x < NUMROWS; x++) {
report.addRow(number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE), number.nextInt(VALUESRANGE));
}
return table;
}
@Test(enabled = true)
public void testSortingByColumn() {
Assert.assertEquals(isSorted(getTableWithRandomValues()), true);
}
private boolean isSorted(GATKReportTable table) {
boolean result = true;
File testingSortingTableFile = new File("myFile.txt");
try {
// Connect print stream to the output stream
PrintStream ps = new PrintStream(testingSortingTableFile);
table.write(ps);
ps.close();
}
catch (Exception e){
System.err.println ("Error: " + e.getMessage());
}
ArrayList<int[]> rows = new ArrayList<int[]>();
try {
// Open the file
FileInputStream fStream = new FileInputStream(testingSortingTableFile);
// Get the object of DataInputStream
DataInputStream in = new DataInputStream(fStream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
//Read File Line By Line
while ((strLine = br.readLine()) != null) {
String[] parts = strLine.split(" ");
int l = parts.length;
int[] row = new int[l];
for(int n = 0; n < l; n++) {
row[n] = Integer.parseInt(parts[n]);
}
rows.add(row);
}
//Close the input stream
in.close();
} catch (Exception e){//Catch exception if any
System.err.println("Error: " + e.getMessage());
}
for (int x = 1; x < rows.size() && result; x++) {
result = checkRowOrder(rows.get(x - 1), rows.get(x));
}
return result;
}
private boolean checkRowOrder(int[] row1, int[] row2) {
int l = row1.length;
final int EQUAL = 0;
int result = EQUAL;
for(int x = 0; x < l && ( result <= EQUAL); x++) {
result = ((Integer)row1[x]).compareTo(row2[x]);
}
if (result <= EQUAL) {
return true;
} else {
return false;
}
}
private GATKReportTable makeBasicTable() {
GATKReport report = GATKReport.newSimpleReport("TableName", "sample", "value");
GATKReportTable table = report.getTable("TableName");
@ -168,7 +254,7 @@ public class GATKReportUnitTest extends BaseTest {
table.set("RZ", "SomeFloat", 535646345.657453464576);
table.set("RZ", "TrueFalse", true);
report1.addTable("Table3", "blah", 1, true, false);
report1.addTable("Table3", "blah", 1, GATKReportTable.TableSortingWay.SORT_BY_ROW);
report1.getTable("Table3").addColumn("a");
report1.getTable("Table3").addRowIDMapping("q", 2);
report1.getTable("Table3").addRowIDMapping("5", 3);

View File

@ -3,10 +3,16 @@ package org.broadinstitute.sting.gatk.traversals;
import com.google.java.contract.PreconditionError;
import net.sf.samtools.*;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.datasources.providers.ActiveRegionShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.sting.utils.activeregion.ExperimentalActiveRegionShardType;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -15,7 +21,6 @@ import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.executive.WindowMaker;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -28,6 +33,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.testng.Assert;
import org.testng.TestException;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -95,7 +101,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
}
}
private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
private final TraverseActiveRegions<Integer, Integer> traverse = new TraverseActiveRegions<Integer, Integer>();
private final ExperimentalReadShardTraverseActiveRegions<Integer, Integer> readShardTraverse = new ExperimentalReadShardTraverseActiveRegions<Integer, Integer>();
private final ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer> activeRegionShardTraverse = new ExperimentalActiveRegionShardTraverseActiveRegions<Integer, Integer>();
private IndexedFastaSequenceFile reference;
private SAMSequenceDictionary dictionary;
@ -106,13 +114,14 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
private static final String testBAM = "TraverseActiveRegionsUnitTest.bam";
private static final String testBAI = "TraverseActiveRegionsUnitTest.bai";
private static final ExperimentalActiveRegionShardType shardType = ExperimentalActiveRegionShardType.LOCUSSHARD;
@BeforeClass
private void init() throws FileNotFoundException {
reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
dictionary = reference.getSequenceDictionary();
genomeLocParser = new GenomeLocParser(dictionary);
// TODO: test shard boundaries
// TODO: reads with indels
// TODO: reads which span many regions
// TODO: reads which are partially between intervals (in/outside extension)
@ -142,6 +151,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
reads.add(buildSAMRecord("boundary_1_post", "1", 1999, 2050));
reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
reads.add(buildSAMRecord("shard_boundary_1_pre", "1", 16300, 16385));
reads.add(buildSAMRecord("shard_boundary_1_post", "1", 16384, 16400));
reads.add(buildSAMRecord("shard_boundary_equal", "1", 16355, 16414));
reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
createBAM(reads);
@ -153,7 +165,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
File indexFile = new File(testBAI);
indexFile.deleteOnExit();
SAMFileWriter out = new SAMFileWriterFactory().makeBAMWriter(reads.get(0).getHeader(), true, outFile);
SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(reads.get(0).getHeader(), true, outFile);
for (GATKSAMRecord read : ReadUtils.sortReadsByCoordinate(reads)) {
out.addAlignment(read);
}
@ -171,8 +183,8 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
t.traverse(walker, dataProvider, 0);
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM)) {
traverse(walker, dataProvider, 0);
activeIntervals.addAll(walker.isActiveCalls);
}
@ -272,6 +284,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -286,6 +301,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -309,6 +330,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -323,6 +347,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -347,6 +377,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
// boundary_1_post: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
// extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
// outside_intervals: none
// shard_boundary_1_pre: Primary in 1:14908-16384, Non-Primary in 1:16385-16927
// shard_boundary_1_post: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// shard_boundary_equal: Non-Primary in 1:14908-16384, Primary in 1:16385-16927
// simple20: Primary in 20:10000-10100
Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@ -361,6 +394,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927));
verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal");
region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
verifyReadMapping(region, "simple20");
}
@ -382,10 +421,10 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
}
private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
for (LocusShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
t.traverse(walker, dataProvider, 0);
for (ShardDataProvider dataProvider : createDataProviders(intervals, testBAM))
traverse(walker, dataProvider, 0);
t.endTraversal(walker, 0);
endTraversal(walker, 0);
return walker.mappedActiveRegions;
}
@ -429,6 +468,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
SAMFileHeader header = ArtificialSAMUtils.createDefaultReadGroup(new SAMFileHeader(), "test", "test");
header.setSequenceDictionary(dictionary);
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
GATKSAMRecord record = new GATKSAMRecord(header);
record.setReadName(readName);
@ -445,10 +485,12 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
return record;
}
private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
private List<ShardDataProvider> createDataProviders(List<GenomeLoc> intervals, String bamFile) {
GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
engine.setGenomeLocParser(genomeLocParser);
t.initialize(engine);
GATKArgumentCollection arguments = new GATKArgumentCollection();
arguments.activeRegionShardType = shardType; // make explicit
engine.setArguments(arguments);
Collection<SAMReaderID> samFiles = new ArrayList<SAMReaderID>();
SAMReaderID readerID = new SAMReaderID(new File(bamFile), new Tags());
@ -456,13 +498,65 @@ public class TraverseActiveRegionsUnitTest extends BaseTest {
SAMDataSource dataSource = new SAMDataSource(samFiles, new ThreadAllocation(), null, genomeLocParser);
List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
List<ShardDataProvider> providers = new ArrayList<ShardDataProvider>();
switch (shardType) {
case LOCUSSHARD:
traverse.initialize(engine);
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new LocusShardBalancer())) {
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
}
break;
case READSHARD:
readShardTraverse.initialize(engine);
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ReadShardBalancer())) {
providers.add(new ReadShardDataProvider(shard, genomeLocParser, shard.iterator(), reference, new ArrayList<ReferenceOrderedDataSource>()));
}
break;
case ACTIVEREGIONSHARD:
activeRegionShardTraverse.initialize(engine);
for (Shard shard : dataSource.createShardIteratorOverIntervals(new GenomeLocSortedSet(genomeLocParser, intervals), new ActiveRegionShardBalancer())) {
for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, dataSource.seek(shard), shard.getGenomeLocs())) {
providers.add(new ActiveRegionShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, shard.iterator(), window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
}
}
break;
default: throw new TestException("Invalid shard type");
}
return providers;
}
private void traverse(DummyActiveRegionWalker walker, ShardDataProvider dataProvider, int i) {
switch (shardType) {
case LOCUSSHARD:
traverse.traverse(walker, (LocusShardDataProvider) dataProvider, i);
break;
case READSHARD:
readShardTraverse.traverse(walker, (ReadShardDataProvider) dataProvider, i);
break;
case ACTIVEREGIONSHARD:
activeRegionShardTraverse.traverse(walker, (ActiveRegionShardDataProvider) dataProvider, i);
break;
default: throw new TestException("Invalid shard type");
}
}
private void endTraversal(DummyActiveRegionWalker walker, int i) {
switch (shardType) {
case LOCUSSHARD:
traverse.endTraversal(walker, i);
break;
case READSHARD:
readShardTraverse.endTraversal(walker, i);
break;
case ACTIVEREGIONSHARD:
activeRegionShardTraverse.endTraversal(walker, i);
break;
default: throw new TestException("Invalid shard type");
}
}
}