Resolving merge conflicts.

This commit is contained in:
Ryan Poplin 2012-08-31 11:25:55 -04:00
commit ff6ebbf3fd
20 changed files with 323 additions and 95 deletions

View File

@ -262,7 +262,7 @@ public class SAMDataSource {
else {
// Choose a sensible default for the read buffer size. For the moment, we're picking 1000 reads per BAM per shard (which effectively
// will mean per-thread once ReadWalkers are parallelized) with a max cap of 250K reads in memory at once.
ReadShard.setReadBufferSize(Math.min(1000*samFiles.size(),250000));
ReadShard.setReadBufferSize(Math.min(10000*samFiles.size(),250000));
}
resourcePool = new SAMResourcePool(Integer.MAX_VALUE);

View File

@ -100,22 +100,29 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* @return The best-fit microscheduler.
*/
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
if (walker instanceof TreeReducible && threadAllocation.getNumCPUThreads() > 1) {
if(walker.isReduceByInterval())
if (threadAllocation.getNumCPUThreads() > 1) {
if (walker.isReduceByInterval())
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
if ( walker instanceof ReadWalker )
if ( walker instanceof ReadWalker ) {
if ( ! (walker instanceof ThreadSafeMapReduce) ) badNT(engine, walker);
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
else
} else {
// TODO -- update test for when nano scheduling only is an option
if ( ! (walker instanceof TreeReducible) ) badNT(engine, walker);
return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
}
} else {
if(threadAllocation.getNumCPUThreads() > 1)
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
}
}
private static void badNT(final GenomeAnalysisEngine engine, final Walker walker) {
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
}
/**
* Create a microscheduler given the reads and reference.
*

View File

@ -25,9 +25,13 @@
package org.broadinstitute.sting.gatk.filters;
import com.google.common.base.Function;
import com.google.common.collect.Collections2;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import java.util.Collection;
import java.util.List;
/**
* Manage filters and filter options. Any requests for basic filtering classes
@ -54,4 +58,26 @@ public class FilterManager extends PluginManager<ReadFilter> {
public Collection<Class<? extends ReadFilter>> getValues() {
return this.getPlugins();
}
/**
* Rather than use the default error message, print out a list of read filters as well.
* @param pluginCategory - string, the category of the plugin (e.g. read filter)
* @param pluginName - string, what we were trying to match (but failed to)
* @return - A wall of text with the default message, followed by a listing of available read filters
*/
@Override
protected String formatErrorMessage(String pluginCategory, String pluginName) {
List<Class<? extends ReadFilter>> availableFilters = this.getPluginsImplementing(ReadFilter.class);
Collection<String> availableFilterNames = Collections2.transform(availableFilters, new Function<Class<? extends ReadFilter>,String>(){
@Override
public String apply(final Class<? extends ReadFilter> input) {
return getName(input);
}
});
return String.format("Read filter %s not found. Available read filters:%n%s.%n%n%s",pluginName,
Utils.join(String.format(", "),availableFilterNames),
"Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
}
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -48,9 +47,9 @@ public class VerifyingSamIterator implements StingSAMIterator {
if(cur.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || cur.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START)
throw new UserException.MalformedBAM(last,String.format("read %s has inconsistent mapping information.",cur.format()));
GenomeLoc lastLoc = genomeLocParser.createGenomeLoc( last );
GenomeLoc curLoc = genomeLocParser.createGenomeLoc( cur );
return curLoc.compareTo(lastLoc) == -1;
return (last.getReferenceIndex() > cur.getReferenceIndex()) ||
(last.getReferenceIndex().equals(cur.getReferenceIndex()) &&
last.getAlignmentStart() > cur.getAlignmentStart());
}
}

View File

@ -27,16 +27,21 @@ package org.broadinstitute.sting.gatk.traversals;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.providers.*;
import org.broadinstitute.sting.gatk.datasources.providers.ReadBasedReferenceOrderedView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadReferenceView;
import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.ArrayList;
import java.util.List;
/**
* @author aaron
* @version 1.0
@ -50,12 +55,13 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
/** our log, which we want to capture anything from this class */
protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
private static final boolean DEBUG = false;
final NanoScheduler<SAMRecord, M, T> nanoScheduler;
private static final int MIN_GROUP_SIZE = 100;
final NanoScheduler<MapData, M, T> nanoScheduler;
public TraverseReadsNano(int nThreads) {
final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
final int mapGroupSize = bufferSize / 10 + 1;
nanoScheduler = new NanoScheduler<SAMRecord, M, T>(bufferSize, mapGroupSize, nThreads);
final int mapGroupSize = (int)Math.max(Math.ceil(bufferSize / 50.0 + 1), MIN_GROUP_SIZE);
nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, mapGroupSize, nThreads);
}
@Override
@ -79,24 +85,42 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
if( !dataProvider.hasReads() )
throw new IllegalArgumentException("Unable to traverse reads; no read data is available.");
if ( dataProvider.hasReferenceOrderedData() )
throw new ReviewedStingException("Parallel read walkers currently don't support access to reference ordered data");
final ReadView reads = new ReadView(dataProvider);
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
nanoScheduler.setDebug(DEBUG);
final TraverseReadsMap myMap = new TraverseReadsMap(reads, reference, rodView, walker);
final TraverseReadsMap myMap = new TraverseReadsMap(walker);
final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
T result = nanoScheduler.execute(reads.iterator().iterator(), myMap, sum, myReduce);
T result = nanoScheduler.execute(aggregateMapData(dataProvider).iterator(), myMap, sum, myReduce);
// TODO -- how do we print progress?
//printProgress(dataProvider.getShard(), ???);
return result;
}
private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
final ReadView reads = new ReadView(dataProvider);
final ReadReferenceView reference = new ReadReferenceView(dataProvider);
final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
final List<MapData> mapData = new ArrayList<MapData>(); // TODO -- need size of reads
for ( final SAMRecord read : reads ) {
final ReferenceContext refContext = ! read.getReadUnmappedFlag()
? reference.getReferenceContext(read)
: null;
// if the read is mapped, create a metadata tracker
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0
? rodView.getReferenceOrderedDataForRead(read)
: null;
// update the number of reads we've seen
dataProvider.getShard().getReadMetrics().incrementNumIterations();
mapData.add(new MapData((GATKSAMRecord)read, refContext, tracker));
}
return mapData;
}
@Override
public void printOnTraversalDone() {
nanoScheduler.shutdown();
@ -116,36 +140,31 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
}
}
private class TraverseReadsMap implements MapFunction<SAMRecord, M> {
final ReadView reads;
final ReadReferenceView reference;
final ReadBasedReferenceOrderedView rodView;
private class MapData {
final GATKSAMRecord read;
final ReferenceContext refContext;
final RefMetaDataTracker tracker;
private MapData(GATKSAMRecord read, ReferenceContext refContext, RefMetaDataTracker tracker) {
this.read = read;
this.refContext = refContext;
this.tracker = tracker;
}
}
private class TraverseReadsMap implements MapFunction<MapData, M> {
final ReadWalker<M,T> walker;
private TraverseReadsMap(ReadView reads, ReadReferenceView reference, ReadBasedReferenceOrderedView rodView, ReadWalker<M, T> walker) {
this.reads = reads;
this.reference = reference;
this.rodView = rodView;
private TraverseReadsMap(ReadWalker<M, T> walker) {
this.walker = walker;
}
@Override
public M apply(final SAMRecord read) {
public M apply(final MapData data) {
if ( ! walker.isDone() ) {
// ReferenceContext -- the reference bases covered by the read
final ReferenceContext refContext = ! read.getReadUnmappedFlag() && reference != null
? reference.getReferenceContext(read)
: null;
// update the number of reads we've seen
//dataProvider.getShard().getReadMetrics().incrementNumIterations();
// if the read is mapped, create a metadata tracker
final RefMetaDataTracker tracker = read.getReferenceIndex() >= 0 ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
final boolean keepMeP = walker.filter(data.refContext, data.read);
if (keepMeP) {
return walker.map(refContext, (GATKSAMRecord) read, tracker);
return walker.map(data.refContext, data.read, data.tracker);
}
}

View File

@ -45,7 +45,7 @@ import java.text.NumberFormat;
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements TreeReducible<FlagStat.FlagStatus> {
public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements ThreadSafeMapReduce {
@Output
PrintStream out;
@ -193,11 +193,6 @@ public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatu
return sum.add(value);
}
@Override
public FlagStatus treeReduce(final FlagStatus value, final FlagStatus sum) {
return sum.add(value);
}
@Override
public void onTraversalDone(final FlagStatus result) {
out.println(result.toString());

View File

@ -93,7 +93,7 @@ import java.util.TreeSet;
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements TreeReducible<SAMFileWriter> {
public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {
@Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
SAMFileWriter out;
@ -245,9 +245,4 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
output.addAlignment(read);
return output;
}
@Override
public SAMFileWriter treeReduce(SAMFileWriter lhs, SAMFileWriter rhs) {
return lhs; // nothing to do
}
}

View File

@ -0,0 +1,31 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers;
/**
* Root parallelism interface. Walkers that implement this
* declare that their map function is thread-safe and so multiple
* map calls can be run in parallel in the same JVM instance.
*/
public interface ThreadSafeMapReduce {
}

View File

@ -13,7 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
* shards of the data can reduce with each other, and the composite result
* can be reduced with other composite results.
*/
public interface TreeReducible<ReduceType> {
public interface TreeReducible<ReduceType> extends ThreadSafeMapReduce {
/**
* A composite, 'reduce of reduces' function.
* @param lhs 'left-most' portion of data in the composite reduce.

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.Requires;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.ThreadSafeMapReduce;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -41,12 +41,11 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
*/
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
public class CountReads extends ReadWalker<Integer, Integer> implements TreeReducible<Integer> {
public class CountReads extends ReadWalker<Integer, Integer> implements ThreadSafeMapReduce {
public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
return 1;
}
@Override public Integer reduceInit() { return 0; }
@Override public Integer reduce(Integer value, Integer sum) { return value + sum; }
@Override public Integer treeReduce(Integer lhs, Integer rhs) { return lhs + rhs; }
}

View File

@ -39,11 +39,11 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
import java.io.File;
import java.util.*;
@ -218,7 +218,7 @@ public class ApplyRecalibration extends RodWalker<Integer, Integer> implements T
String filterString = null;
// Annotate the new record with its VQSLOD and the worst performing annotation
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lodString); // use the String representation so that we don't lose precision on output
builder.attribute(VariantRecalibrator.VQS_LOD_KEY, lod);
builder.attribute(VariantRecalibrator.CULPRIT_KEY, recalDatum.getAttribute(VariantRecalibrator.CULPRIT_KEY));
for( int i = tranches.size() - 1; i >= 0; i-- ) {

View File

@ -286,7 +286,6 @@ public class VariantDataManager {
case INDEL:
case MIXED:
case SYMBOLIC:
case STRUCTURAL_INDEL:
return checkVariationClass( evalVC, VariantRecalibratorArgumentCollection.Mode.INDEL );
default:
return false;

View File

@ -427,7 +427,10 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
result = cmpContig;
} else {
if ( this.getStart() < that.getStart() ) result = -1;
if ( this.getStart() > that.getStart() ) result = 1;
else if ( this.getStart() > that.getStart() ) result = 1;
// these have the same start, so check the ends
else if ( this.getStop() < that.getStop() ) result = -1;
else if ( this.getStop() > that.getStop() ) result = 1;
}
}

View File

@ -277,7 +277,7 @@ public class PluginManager<PluginType> {
public PluginType createByName(String pluginName) {
Class<? extends PluginType> plugin = pluginsByName.get(pluginName);
if( plugin == null )
throw new UserException(String.format("Could not find %s with name: %s", pluginCategory,pluginName));
throw new UserException(formatErrorMessage(pluginCategory,pluginName));
try {
return plugin.newInstance();
} catch (Exception e) {
@ -330,4 +330,14 @@ public class PluginManager<PluginType> {
return pluginName;
}
/**
* Generate the error message for the plugin manager. The message is allowed to depend on the class.
* @param pluginCategory - string, the category of the plugin (e.g. read filter)
* @param pluginName - string, what we were trying to match (but failed to)
* @return error message text describing the error
*/
protected String formatErrorMessage(String pluginCategory, String pluginName ) {
return String.format("Could not find %s with name: %s", pluginCategory,pluginName);
}
}

View File

@ -43,7 +43,8 @@ import java.util.concurrent.*;
* Time: 9:47 AM
*/
public class NanoScheduler<InputType, MapType, ReduceType> {
private static Logger logger = Logger.getLogger(NanoScheduler.class);
private final static Logger logger = Logger.getLogger(NanoScheduler.class);
private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
final int bufferSize;
final int mapGroupSize;
@ -79,7 +80,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
this.mapGroupSize = mapGroupSize;
}
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads - 1);
this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
}
/**
@ -172,7 +173,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
if ( getnThreads() == 1 ) {
if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
return executeSingleThreaded(inputReader, map, initialValue, reduce);
} else {
return executeMultiThreaded(inputReader, map, initialValue, reduce);

View File

@ -457,7 +457,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
SNP,
MNP, // a multi-nucleotide polymorphism
INDEL,
STRUCTURAL_INDEL,
SYMBOLIC,
MIXED,
}
@ -531,7 +530,17 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
public boolean isStructuralIndel() {
return getType() == Type.STRUCTURAL_INDEL;
if ( getType() == Type.INDEL ) {
List<Integer> sizes = getIndelLengths();
if ( sizes != null ) {
for ( Integer length : sizes ) {
if ( length > MAX_ALLELE_SIZE_FOR_NON_SV ) {
return true;
}
}
}
}
return false;
}
/**
@ -716,7 +725,7 @@ public class VariantContext implements Feature { // to enable tribble integratio
* @return a list of indel lengths ( null if not of type indel or mixed )
*/
public List<Integer> getIndelLengths() {
if ( getType() != Type.INDEL && getType() != Type.MIXED && getType() != Type.STRUCTURAL_INDEL ) {
if ( getType() != Type.INDEL && getType() != Type.MIXED ) {
return null;
}
@ -1263,13 +1272,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
// is reserved for cases of multiple alternate alleles of different types). Therefore, if we've reached this point
// in the code (so we're not a SNP, MNP, or symbolic allele), we absolutely must be an INDEL.
// Because a number of structural variation callers write the whole alternate allele into the VCF where possible,
// this can result in insertion/deletion alleles of structural variant size, e.g. 151+. As of July 2012, we now
// classify these as structural events, rather than indel events, as we think differently about the mechanism,
// representation, and handling of these events. Check for this case here:
if ( ref.length() > MAX_ALLELE_SIZE_FOR_NON_SV || allele.length() > MAX_ALLELE_SIZE_FOR_NON_SV )
return Type.STRUCTURAL_INDEL;
return Type.INDEL;
// old incorrect logic:

View File

@ -121,7 +121,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
// test in the present of a large spanning element
{
List<Feature> oneLargeSpan = new ArrayList<Feature>(handPickedFeatures);
oneLargeSpan.add(new BasicFeature(contig, 1, 100));
oneLargeSpan.add(new BasicFeature(contig, 1, 30));
createTestsForFeatures(oneLargeSpan);
}
@ -135,7 +135,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
// test in the presence of a partially spanning element at the end
{
List<Feature> partialSpanEnd = new ArrayList<Feature>(handPickedFeatures);
partialSpanEnd.add(new BasicFeature(contig, 10, 100));
partialSpanEnd.add(new BasicFeature(contig, 10, 30));
createTestsForFeatures(partialSpanEnd);
}
@ -165,7 +165,7 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
int featuresStart = 1; for ( final Feature f : features ) featuresStart = Math.min(featuresStart, f.getStart());
int featuresStop = 1; for ( final Feature f : features ) featuresStop = Math.max(featuresStop, f.getEnd());
for ( final int size : Arrays.asList(1, 5, 10, 100, 1000) ) {
for ( final int size : Arrays.asList(1, 5, 10, 100) ) {
final List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>();
// regularly spaced
for ( int start = featuresStart; start < featuresStop; start++) {
@ -256,11 +256,12 @@ public class ReadBasedReferenceOrderedViewUnitTest extends BaseTest {
}
// all 3 way pairwise tests
for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
tests.add(new Object[]{singleTest, testStateless});
}
//for ( List<ReadMetaDataTrackerRODStreamTest> singleTest : Utils.makePermutations(multiSiteTests, 3, false)) {
// tests.add(new Object[]{singleTest, testStateless});
//}
}
logger.warn("Creating " + tests.size() + " tests for ReadMetaDataTrackerTests");
return tests.toArray(new Object[][]{});
}

View File

@ -19,6 +19,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
@ -255,6 +256,90 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
}
}
////////////////////////////////////////////
// comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////
private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
private static final int IS_BEFORE_DELETION_START_FLAG = 2;
private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
private static final int IS_AFTER_DELETION_END_FLAG = 8;
private static final int IS_BEFORE_INSERTION_FLAG = 16;
private static final int IS_AFTER_INSERTION_FLAG = 32;
private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
private static class LIBSTest {
final String cigar;
final int readLength;
final List<Integer> offsets;
final List<Integer> flags;
private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
this.cigar = cigar;
this.readLength = readLength;
this.offsets = offsets;
this.flags = flags;
}
}
@DataProvider(name = "LIBSTest")
public Object[][] createLIBSTestData() {
return new Object[][]{
{new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
{new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
{new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
{new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
//TODO -- uncomment these when LIBS is fixed
//{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
//{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
//{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
{new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
{new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
{new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
{new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
};
}
@Test(dataProvider = "LIBSTest")
public void testLIBS(LIBSTest params) {
final int locus = 44367788;
SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
read.setCigarString(params.cigar);
// create the iterator by state with the fake reads and fake records
li = makeLTBS(Arrays.asList(read), createTestReadProperties());
int offset = 0;
while ( li.hasNext() ) {
AlignmentContext alignmentContext = li.next();
ReadBackedPileup p = alignmentContext.getBasePileup();
Assert.assertTrue(p.getNumberOfElements() == 1);
PileupElement pe = p.iterator().next();
final int flag = params.flags.get(offset);
Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
offset++;
}
}
////////////////////////////////////////////////
// End comprehensive LIBS/PileupElement tests //
////////////////////////////////////////////////
private static ReadProperties createTestReadProperties() {
return new ReadProperties(
Collections.<SAMReaderID>emptyList(),

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.broadinstitute.sting.WalkerTest;
import org.testng.annotations.Test;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import java.util.Arrays;
public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
private static class VRTest {
@ -28,7 +28,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf",
"f360ce3eb2b0b887301be917a9843e2b", // tranches
"287fea5ea066bf3fdd71f5ce9b58eab3", // recal file
"356b9570817b9389da71fbe991d8b2f5"); // cut VCF
"afa297c743437551cc2bd36ddd6d6d75"); // cut VCF
@DataProvider(name = "VRTest")
public Object[][] createData1() {
@ -77,7 +77,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
"a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches
"74c10fc15f9739a938b7138909fbde04", // recal file
"62fda105e14b619a1c263855cf56af1d"); // cut VCF
"c30d163871a37f2bbf8ee7f761e870b4"); // cut VCF
@DataProvider(name = "VRBCFTest")
public Object[][] createVRBCFTest() {
@ -129,13 +129,13 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as .
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"64f576881e21323dd4078262604717a2"); // cut VCF
"b2c6827be592c24a4692b1753edc7d23"); // cut VCF
VRTest indelFiltered = new VRTest(
validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS
"b7589cd098dc153ec64c02dcff2838e4", // tranches
"a04a9001f62eff43d363f4d63769f3ee", // recal file
"af22c55d91394c56a222fd40d6d54781"); // cut VCF
"5d483fe1ba2ef36ee9e6c14cbd654706"); // cut VCF
@DataProvider(name = "VRIndelTest")
public Object[][] createTestVariantRecalibratorIndel() {
@ -193,7 +193,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
" -o %s" +
" -tranchesFile " + privateTestDir + "VQSR.mixedTest.tranches" +
" -recalFile " + privateTestDir + "VQSR.mixedTest.recal",
Arrays.asList("ec519e1f01459813dab57aefffc019e2"));
Arrays.asList("018b3a5cc7cf0cb5468c6a0c80ccaa8b"));
executeTest("testApplyRecalibrationSnpAndIndelTogether", spec);
}
}

View File

@ -16,6 +16,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
@ -211,4 +212,59 @@ public class GenomeLocUnitTest extends BaseTest {
Assert.assertEquals(cfg.gl1.reciprocialOverlapFraction(cfg.gl2), cfg.overlapFraction);
}
}
// -------------------------------------------------------------------------------------
//
// testing comparison, hashcode, and equals
//
// -------------------------------------------------------------------------------------
@DataProvider(name = "GenomeLocComparisons")
public Object[][] createGenomeLocComparisons() {
List<Object[]> tests = new ArrayList<Object[]>();
final int start = 10;
for ( int stop = start; stop < start + 3; stop++ ) {
final GenomeLoc g1 = genomeLocParser.createGenomeLoc("chr2", start, stop);
for ( final String contig : Arrays.asList("chr1", "chr2", "chr3")) {
for ( int start2 = start - 1; start2 <= stop + 1; start2++ ) {
for ( int stop2 = start2; stop2 < stop + 2; stop2++ ) {
final GenomeLoc g2 = genomeLocParser.createGenomeLoc(contig, start2, stop2);
ComparisonResult cmp = ComparisonResult.EQUALS;
if ( contig.equals("chr3") ) cmp = ComparisonResult.LESS_THAN;
else if ( contig.equals("chr1") ) cmp = ComparisonResult.GREATER_THAN;
else if ( start < start2 ) cmp = ComparisonResult.LESS_THAN;
else if ( start > start2 ) cmp = ComparisonResult.GREATER_THAN;
else if ( stop < stop2 ) cmp = ComparisonResult.LESS_THAN;
else if ( stop > stop2 ) cmp = ComparisonResult.GREATER_THAN;
tests.add(new Object[]{g1, g2, cmp});
}
}
}
}
return tests.toArray(new Object[][]{});
}
private enum ComparisonResult {
LESS_THAN(-1),
EQUALS(0),
GREATER_THAN(1);
final int cmp;
private ComparisonResult(int cmp) {
this.cmp = cmp;
}
}
@Test(dataProvider = "GenomeLocComparisons")
public void testGenomeLocComparisons(GenomeLoc g1, GenomeLoc g2, ComparisonResult expected) {
Assert.assertEquals(g1.compareTo(g2), expected.cmp, "Comparing genome locs failed");
Assert.assertEquals(g1.equals(g2), expected == ComparisonResult.EQUALS);
if ( expected == ComparisonResult.EQUALS )
Assert.assertEquals(g1.hashCode(), g2.hashCode(), "Equal genome locs don't have the same hash code");
}
}