Reengineer engine constructors

This commit is contained in:
Valentin Ruano-Rubio 2014-04-19 17:58:14 -04:00
parent 7455ac9796
commit 4e5850966a
8 changed files with 94 additions and 35 deletions

View File

@ -87,7 +87,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final Config configuration;
protected final VariantAnnotatorEngine annotationEngine;
protected VariantAnnotatorEngine annotationEngine;
protected final int numberOfGenomes;
@ -108,26 +108,49 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger);
}
};
/**
* Construct a new genotyper engine.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
* @param annotationEngine reference to the annotation engine. If {@code null}, no annotations will be processed.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
*/
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration) {
this(toolkit,configuration,resolveSampleNamesFromToolkit(toolkit));
}
/**
* Resolve the sample name set to be the set of all samples passed to the tool.
*
* @param toolkit reference to the toolkit.
*
* @throws IllegalArgumentException if the {@code toolkit} is {@code null}.
*
* @return never {@code null}, but empty if there is no samples.
*/
private static Set<String> resolveSampleNamesFromToolkit(final GenomeAnalysisEngine toolkit) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
return new LinkedHashSet<>(toolkit.getSampleDB().getSampleNames());
}
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
* @param sampleNames subset of sample to work on identified by their names. If {@code null}, the full toolkit
* sample set will be used instead.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
*/
public GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration,
final VariantAnnotatorEngine annotationEngine, final Set<String> sampleNames) {
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration,final Set<String> sampleNames) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null");
this.configuration = configuration;
this.annotationEngine = annotationEngine;
logger = Logger.getLogger(getClass());
this.toolkit = toolkit;
this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
@ -152,6 +175,15 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
this.logger = logger;
}
/**
* Changes the annotation engine for this genotyping-engine.
*
* @param annotationEngine the new annotation engine (can be {@code null}).
*/
public void setAnnotationEngine(final VariantAnnotatorEngine annotationEngine) {
this.annotationEngine = annotationEngine;
}
/**
* Returns a reference to the engine configuration
*
@ -350,7 +382,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*
* @param conf
* @param PofF
* @return {@true} iff the variant is confidently called.
* @return {@code true} iff the variant is confidently called.
*/
protected final boolean confidentlyCalled(final double conf, final double PofF) {
return conf >= configuration.STANDARD_CONFIDENCE_FOR_CALLING ||

View File

@ -283,7 +283,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, annotationEngine,samples, verboseWriter);
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
// initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo(UAC, annotationEngine, dbsnp);

View File

@ -51,7 +51,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -76,7 +75,6 @@ import java.util.*;
public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCollection> {
public static final String LOW_QUAL_FILTER_NAME = "LowQual";
private static final String GPSTRING = "GENERALPLOIDY";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL";
@ -90,7 +88,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
// the various loggers and writers
private final PrintStream verboseWriter;
private PrintStream verboseWriter;
private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine;
@ -112,7 +110,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
*/
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration) {
this(toolkit, configuration, null, null, null);
this(toolkit, configuration, null);
}
/**
@ -120,28 +118,32 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
*
* @param toolkit reference to the enclosing genome analysis engine.
* @param configuration configuration object.
* @param annotationEngine variant annotation engine. If {@code null}, no annotations will be processed.
* @param sampleNames subset of sample names to work on. If {@code null}, all it will use the {@code toolkit} full sample set.
* @param verboseWriter where to output additional verbose debugging information.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
*/
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
final VariantAnnotatorEngine annotationEngine,
final Set<String> sampleNames, final PrintStream verboseWriter) {
final Set<String> sampleNames) {
super(toolkit,configuration,annotationEngine,sampleNames);
super(toolkit,configuration,sampleNames);
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
this.verboseWriter = verboseWriter;
determineGLModelsToUse();
initializeGenotypeLikelihoodsCalculationModels();
}
/**
* Changes the verbose output writer for this engine.
*
* @param writer the new writer; it can be {@code null}.
*/
public void setVerboseWriter(final PrintStream writer) {
verboseWriter = writer;
}
/**
* Initialize {@link #glcm}.
*/

View File

@ -569,7 +569,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.CONTAMINATION_FRACTION_FILE = null;
simpleUAC.exactCallsLog = null;
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, null, samplesSet, null);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, samplesSet);
activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
@ -644,7 +644,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final MergeVariantsAcrossHaplotypes variantMerger = mergeVariantsViaLD ? new LDMerger(SCAC.DEBUG, 10, 1) : new MergeVariantsAcrossHaplotypes();
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(),SCAC, annotationEngine, null, variantMerger );
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(),SCAC);
genotypingEngine.setCrossHaplotypeEventMerger(variantMerger);
genotypingEngine.setAnnotationEngine(annotationEngine);
if ( bamWriter != null ) {
// we currently do not support multi-threaded BAM writing, so exception out

View File

@ -48,10 +48,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode;
@ -75,16 +73,36 @@ import java.util.*;
*/
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
private final static Logger logger = Logger.getLogger(HaplotypeCallerGenotypingEngine.class);
private final static List<Allele> NO_CALL = Collections.singletonList(Allele.NO_CALL);
private final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration,
final VariantAnnotatorEngine annotationEngine, final Set<String> sampleNames,
final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger) {
super(toolkit,configuration,annotationEngine,sampleNames);
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration) {
super(toolkit,configuration);
}
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
* @param sampleNames {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames);
}
/**
* Change the merge variant across haplotypes for this engine.
*
* @param crossHaplotypeEventMerger new merger, can be {@code null}.
*/
public void setCrossHaplotypeEventMerger(final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger) {
this.crossHaplotypeEventMerger = crossHaplotypeEventMerger;
}
@ -227,8 +245,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
if( configuration.DEBUG ) {
logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
if( configuration.DEBUG && logger != null ) {
if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
}
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap(haplotypeReadMap, alleleMapper, configuration.getSampleContamination());
@ -314,6 +332,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
//cleanUpSymbolicUnassembledEvents( haplotypes ); // We don't make symbolic alleles so this isn't needed currently
if ( !in_GGA_mode ) {
// run the event merger if we're not in GGA mode
if (crossHaplotypeEventMerger == null)
throw new IllegalStateException(" no variant merger was provided at set-up when needed in GGA mode");
final boolean mergedAnything = crossHaplotypeEventMerger.merge(haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc);
if ( mergedAnything )
cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events

View File

@ -167,7 +167,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
vcfWriter.writeHeader(vcfHeader);
// create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), new UnifiedArgumentCollection(),null,samples,null);
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), new UnifiedArgumentCollection(),samples);
// collect the actual rod bindings into a list for use later
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )

View File

@ -121,7 +121,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, null, samples, null);
UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -78,7 +78,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
final Set<String> fakeSamples = Collections.singleton("fake");
ugEngine = new UnifiedGenotypingEngine(engine, args, null, fakeSamples, null);
ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples);
}
private UnifiedGenotypingEngine getEngine() {