Reengineer engine constructors

This commit is contained in:
Valentin Ruano-Rubio 2014-04-19 17:58:14 -04:00
parent 7455ac9796
commit 4e5850966a
8 changed files with 94 additions and 35 deletions

View File

@ -87,7 +87,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected final Config configuration; protected final Config configuration;
protected final VariantAnnotatorEngine annotationEngine; protected VariantAnnotatorEngine annotationEngine;
protected final int numberOfGenomes; protected final int numberOfGenomes;
@ -108,26 +108,49 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger); return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger);
} }
}; };
/** /**
* Construct a new genotyper engine. * Construct a new genotyper engine.
* *
* @param toolkit reference to the genome-analysis toolkit. * @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object. * @param configuration engine configuration object.
* @param annotationEngine reference to the annotation engine. If {@code null}, no annotations will be processed. *
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
*/
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration) {
this(toolkit,configuration,resolveSampleNamesFromToolkit(toolkit));
}
/**
* Resolve the sample name set to be the set of all samples passed to the tool.
*
* @param toolkit reference to the toolkit.
*
* @throws IllegalArgumentException if the {@code toolkit} is {@code null}.
*
* @return never {@code null}, but empty if there is no samples.
*/
private static Set<String> resolveSampleNamesFromToolkit(final GenomeAnalysisEngine toolkit) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
return new LinkedHashSet<>(toolkit.getSampleDB().getSampleNames());
}
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
* @param sampleNames subset of sample to work on identified by their names. If {@code null}, the full toolkit * @param sampleNames subset of sample to work on identified by their names. If {@code null}, the full toolkit
* sample set will be used instead. * sample set will be used instead.
* *
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}. * @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
*/ */
public GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration, protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration,final Set<String> sampleNames) {
final VariantAnnotatorEngine annotationEngine, final Set<String> sampleNames) {
if (toolkit == null) if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null"); throw new IllegalArgumentException("the toolkit cannot be null");
if (configuration == null) if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null"); throw new IllegalArgumentException("the configuration cannot be null");
this.configuration = configuration; this.configuration = configuration;
this.annotationEngine = annotationEngine;
logger = Logger.getLogger(getClass()); logger = Logger.getLogger(getClass());
this.toolkit = toolkit; this.toolkit = toolkit;
this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames(); this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
@ -152,6 +175,15 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
this.logger = logger; this.logger = logger;
} }
/**
* Changes the annotation engine for this genotyping-engine.
*
* @param annotationEngine the new annotation engine (can be {@code null}).
*/
public void setAnnotationEngine(final VariantAnnotatorEngine annotationEngine) {
this.annotationEngine = annotationEngine;
}
/** /**
* Returns a reference to the engine configuration * Returns a reference to the engine configuration
* *
@ -350,7 +382,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
* *
* @param conf * @param conf
* @param PofF * @param PofF
* @return {@true} iff the variant is confidently called. * @return {@code true} iff the variant is confidently called.
*/ */
protected final boolean confidentlyCalled(final double conf, final double PofF) { protected final boolean confidentlyCalled(final double conf, final double PofF) {
return conf >= configuration.STANDARD_CONFIDENCE_FOR_CALLING || return conf >= configuration.STANDARD_CONFIDENCE_FOR_CALLING ||

View File

@ -283,7 +283,9 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP"); verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, annotationEngine,samples, verboseWriter); genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
// initialize the header // initialize the header
Set<VCFHeaderLine> headerInfo = getHeaderInfo(UAC, annotationEngine, dbsnp); Set<VCFHeaderLine> headerInfo = getHeaderInfo(UAC, annotationEngine, dbsnp);

View File

@ -51,7 +51,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcResult;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -76,7 +75,6 @@ import java.util.*;
public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCollection> { public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCollection> {
public static final String LOW_QUAL_FILTER_NAME = "LowQual"; public static final String LOW_QUAL_FILTER_NAME = "LowQual";
private static final String GPSTRING = "GENERALPLOIDY";
public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL"; public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL";
@ -90,7 +88,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
// the various loggers and writers // the various loggers and writers
private final PrintStream verboseWriter; private PrintStream verboseWriter;
private final GenomeLocParser genomeLocParser; private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine; private final boolean BAQEnabledOnCMDLine;
@ -112,7 +110,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}. * @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
*/ */
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration) { public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration) {
this(toolkit, configuration, null, null, null); this(toolkit, configuration, null);
} }
/** /**
@ -120,28 +118,32 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* *
* @param toolkit reference to the enclosing genome analysis engine. * @param toolkit reference to the enclosing genome analysis engine.
* @param configuration configuration object. * @param configuration configuration object.
* @param annotationEngine variant annotation engine. If {@code null}, no annotations will be processed.
* @param sampleNames subset of sample names to work on. If {@code null}, all it will use the {@code toolkit} full sample set. * @param sampleNames subset of sample names to work on. If {@code null}, all it will use the {@code toolkit} full sample set.
* @param verboseWriter where to output additional verbose debugging information.
* *
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}. * @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
*/ */
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration, public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
final VariantAnnotatorEngine annotationEngine, final Set<String> sampleNames) {
final Set<String> sampleNames, final PrintStream verboseWriter) {
super(toolkit,configuration,annotationEngine,sampleNames); super(toolkit,configuration,sampleNames);
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser(); genomeLocParser = toolkit.getGenomeLocParser();
this.verboseWriter = verboseWriter;
determineGLModelsToUse(); determineGLModelsToUse();
initializeGenotypeLikelihoodsCalculationModels(); initializeGenotypeLikelihoodsCalculationModels();
} }
/**
* Changes the verbose output writer for this engine.
*
* @param writer the new writer; it can be {@code null}.
*/
public void setVerboseWriter(final PrintStream writer) {
verboseWriter = writer;
}
/** /**
* Initialize {@link #glcm}. * Initialize {@link #glcm}.
*/ */

View File

@ -569,7 +569,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
simpleUAC.CONTAMINATION_FRACTION = 0.0; simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.CONTAMINATION_FRACTION_FILE = null; simpleUAC.CONTAMINATION_FRACTION_FILE = null;
simpleUAC.exactCallsLog = null; simpleUAC.exactCallsLog = null;
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, null, samplesSet, null); activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, samplesSet);
activeRegionEvaluationGenotyperEngine.setLogger(logger); activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null ) if( SCAC.CONTAMINATION_FRACTION_FILE != null )
@ -644,7 +644,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final MergeVariantsAcrossHaplotypes variantMerger = mergeVariantsViaLD ? new LDMerger(SCAC.DEBUG, 10, 1) : new MergeVariantsAcrossHaplotypes(); final MergeVariantsAcrossHaplotypes variantMerger = mergeVariantsViaLD ? new LDMerger(SCAC.DEBUG, 10, 1) : new MergeVariantsAcrossHaplotypes();
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(),SCAC, annotationEngine, null, variantMerger ); genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(),SCAC);
genotypingEngine.setCrossHaplotypeEventMerger(variantMerger);
genotypingEngine.setAnnotationEngine(annotationEngine);
if ( bamWriter != null ) { if ( bamWriter != null ) {
// we currently do not support multi-threaded BAM writing, so exception out // we currently do not support multi-threaded BAM writing, so exception out

View File

@ -48,10 +48,8 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
import com.google.java.contract.Ensures; import com.google.java.contract.Ensures;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode;
@ -75,16 +73,36 @@ import java.util.*;
*/ */
public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> { public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeCallerArgumentCollection> {
private final static Logger logger = Logger.getLogger(HaplotypeCallerGenotypingEngine.class);
private final static List<Allele> NO_CALL = Collections.singletonList(Allele.NO_CALL); private final static List<Allele> NO_CALL = Collections.singletonList(Allele.NO_CALL);
private final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger; private MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger;
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, /**
final VariantAnnotatorEngine annotationEngine, final Set<String> sampleNames, * {@inheritDoc}
final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger) { * @param toolkit {@inheritDoc}
super(toolkit,configuration,annotationEngine,sampleNames); * @param configuration {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration) {
super(toolkit,configuration);
}
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
* @param sampleNames {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames);
}
/**
* Change the merge variant across haplotypes for this engine.
*
* @param crossHaplotypeEventMerger new merger, can be {@code null}.
*/
public void setCrossHaplotypeEventMerger(final MergeVariantsAcrossHaplotypes crossHaplotypeEventMerger) {
this.crossHaplotypeEventMerger = crossHaplotypeEventMerger; this.crossHaplotypeEventMerger = crossHaplotypeEventMerger;
} }
@ -227,8 +245,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper); final Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper(mergeMap, eventMapper);
if( configuration.DEBUG ) { if( configuration.DEBUG && logger != null ) {
logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); if (logger != null) logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
} }
final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap(haplotypeReadMap, alleleMapper, configuration.getSampleContamination()); final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap(haplotypeReadMap, alleleMapper, configuration.getSampleContamination());
@ -314,6 +332,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
//cleanUpSymbolicUnassembledEvents( haplotypes ); // We don't make symbolic alleles so this isn't needed currently //cleanUpSymbolicUnassembledEvents( haplotypes ); // We don't make symbolic alleles so this isn't needed currently
if ( !in_GGA_mode ) { if ( !in_GGA_mode ) {
// run the event merger if we're not in GGA mode // run the event merger if we're not in GGA mode
if (crossHaplotypeEventMerger == null)
throw new IllegalStateException(" no variant merger was provided at set-up when needed in GGA mode");
final boolean mergedAnything = crossHaplotypeEventMerger.merge(haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc); final boolean mergedAnything = crossHaplotypeEventMerger.merge(haplotypes, haplotypeReadMap, startPosKeySet, ref, refLoc);
if ( mergedAnything ) if ( mergedAnything )
cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events cleanUpSymbolicUnassembledEvents( haplotypes ); // the newly created merged events could be overlapping the unassembled events

View File

@ -167,7 +167,7 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
vcfWriter.writeHeader(vcfHeader); vcfWriter.writeHeader(vcfHeader);
// create the genotyping engine // create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), new UnifiedArgumentCollection(),null,samples,null); genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), new UnifiedArgumentCollection(),samples);
// collect the actual rod bindings into a list for use later // collect the actual rod bindings into a list for use later
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections ) for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )

View File

@ -121,7 +121,7 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
String trackName = variantCollection.variants.getName(); String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, null, samples, null); UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));

View File

@ -78,7 +78,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
final UnifiedArgumentCollection args = new UnifiedArgumentCollection(); final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
final Set<String> fakeSamples = Collections.singleton("fake"); final Set<String> fakeSamples = Collections.singleton("fake");
ugEngine = new UnifiedGenotypingEngine(engine, args, null, fakeSamples, null); ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples);
} }
private UnifiedGenotypingEngine getEngine() { private UnifiedGenotypingEngine getEngine() {