The UG calculations are now driven by an independent engine.

This completely separates the genotyper walker from other walkers.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2758 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-02-01 20:57:31 +00:00
parent d8e75cf631
commit 506d39f751
7 changed files with 251 additions and 264 deletions

View File

@ -1,51 +0,0 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import java.util.Set;
import java.util.HashSet;
/**
* Class for maintaining the arguments that need to be passed to UnifiedGenotyper.runGenotyper()
* (so that they only need to be computed one time)
*/
public class UGCalculationArguments {
// should we annotate dbsnp?
protected boolean annotateDbsnp = false;
// should we annotate hapmap2?
protected boolean annotateHapmap2 = false;
// should we annotate hapmap3?
protected boolean annotateHapmap3 = false;
// the unified argument collection
protected UnifiedArgumentCollection UAC = null;
// the model used for calculating genotypes
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
// samples in input
protected Set<String> samples = new HashSet<String>();
}

View File

@ -26,19 +26,13 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*;
@ -65,7 +59,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
public PrintStream beagleWriter = null;
// the calculation arguments
private UGCalculationArguments UG_args = null;
private UnifiedGenotyperEngine UG_engine = null;
// Enable deletions in the pileup
public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -88,172 +82,13 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
}
/**
* Creates the argument calculation object for the UnifiedGenotyper.
*
* @param toolkit the GATK Engine
* @param UAC the UnifiedArgumentCollection
* @return UG calculation arguments object
**/
public static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
return getUnifiedCalculationArguments(toolkit, UAC, null);
}
/**
* Creates the argument calculation object for the UnifiedGenotyper.
*
* @param toolkit the GATK Engine
* @param UAC the UnifiedArgumentCollection
* @param writer the genotype writer
* @return UG calculation arguments object
*
**/
private static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, GenotypeWriter writer) {
UGCalculationArguments UG_args = new UGCalculationArguments();
UG_args.UAC = UAC;
// deal with input errors
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
}
if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
}
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
}
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
// if we're supposed to assume a single sample, do so
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
UG_args.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
UG_args.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
}
// in pooled mode we need to check that the format is acceptable
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED && writer != null ) {
// only multi-sample calls use Variations
if ( !writer.supportsMultiSample() )
throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
// when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
if ( toolkit.getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
}
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
UG_args.annotateDbsnp = true;
}
if ( rod.getName().equals("hapmap2") ) {
UG_args.annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
UG_args.annotateHapmap3 = true;
}
}
return UG_args;
}
/**
* Compute at a given locus.
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @param UG_args the calculation argument collection
* @return the VariantCallContext object
*/
public static VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, UGCalculationArguments UG_args) {
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
if ( rawContext.hasExceededMaxPileup() )
return null;
ReadBackedPileup rawPileup = rawContext.getBasePileup();
// filter the context based on min base and mapping qualities
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UG_args.UAC.MIN_BASE_QUALTY_SCORE, UG_args.UAC.MIN_MAPPING_QUALTY_SCORE);
// filter the context based on mapping quality and mismatch rate
pileup = filterPileup(pileup, refContext, UG_args.UAC);
// don't call when there is no coverage
if ( pileup.size() == 0 )
return null;
// are there too many deletions in the pileup?
if ( isValidDeletionFraction(UG_args.UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UG_args.UAC.MAX_DELETION_FRACTION )
return null;
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UG_args.UAC.ASSUME_SINGLE_SAMPLE, (UG_args.UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UG_args.UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
VariantCallContext call = UG_args.gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
Map<String, String> annotations;
if ( UG_args.UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
}
return call;
}
// filter based on maximum mismatches and bad mates
private static ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext, UnifiedArgumentCollection UAC) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
}
private static boolean isValidDeletionFraction(double d) {
return ( d >= 0.0 && d <= 1.0 );
}
// ------------------------------------------------------------------------------------------------
//
// The following methods are walker-specific; don't use them unless you are a traversal.
// If you are a walker, stick to the static methods defined above.
//
// ------------------------------------------------------------------------------------------------
/**
* Initialize the samples, output, and genotype calculation model
*
**/
public void initialize() {
UG_args = getUnifiedCalculationArguments(getToolkit(), UAC, writer);
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, writer, verboseWriter, beagleWriter);
// initialize the writers
if ( verboseWriter != null ) {
@ -267,16 +102,16 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
if ( beagleWriter != null ) {
beagleWriter.print("marker alleleA alleleB");
for ( String sample : UG_args.samples )
for ( String sample : UG_engine.samples )
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
beagleWriter.println();
}
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_args.samples, getHeaderInfo(UG_args));
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_engine.samples, getHeaderInfo());
}
private Set<VCFHeaderLine> getHeaderInfo(UGCalculationArguments UG_args) {
private Set<VCFHeaderLine> getHeaderInfo() {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF
@ -295,11 +130,11 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// annotation (INFO) fields from UnifiedGenotyper
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
if ( UG_args.annotateDbsnp )
if ( UG_engine.annotateDbsnp )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP Membership"));
if ( UG_args.annotateHapmap2 )
if ( UG_engine.annotateHapmap2 )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap2 Membership"));
if ( UG_args.annotateHapmap3 )
if ( UG_engine.annotateHapmap3 )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap3 Membership"));
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
@ -331,24 +166,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
* @return the VariantCallContext object
*/
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
if ( UG_args.gcm.get() == null ) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if ( writer != null ) {
if ( writer instanceof VCFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if ( writer instanceof GLFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if ( writer instanceof GeliGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
UG_args.gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(UG_args.samples, logger, UAC, format, verboseWriter, beagleWriter));
}
return runGenotyper(tracker, refContext, rawContext, UG_args);
return UG_engine.runGenotyper(tracker, refContext, rawContext);
}
public UGStatistics reduceInit() { return new UGStatistics(); }
@ -381,7 +199,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
// if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
if ( value.variation == null || (!writer.supportsMultiSample() && UG_args.samples.size() <= 1) ) {
if ( value.variation == null || (!writer.supportsMultiSample() && UG_engine.samples.size() <= 1) ) {
writer.addGenotypeCall(value.genotypes.get(0));
}

View File

@ -0,0 +1,227 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotator;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*;
import org.apache.log4j.Logger;
import java.util.*;
import java.io.PrintStream;
public class UnifiedGenotyperEngine {
// should we annotate dbsnp?
protected boolean annotateDbsnp = false;
// should we annotate hapmap2?
protected boolean annotateHapmap2 = false;
// should we annotate hapmap3?
protected boolean annotateHapmap3 = false;
// the unified argument collection
protected UnifiedArgumentCollection UAC = null;
// the model used for calculating genotypes
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
// the various loggers and writers
protected Logger logger = null;
protected GenotypeWriter genotypeWriter = null;
protected PrintStream verboseWriter = null;
protected PrintStream beagleWriter = null;
// samples in input
protected Set<String> samples = new HashSet<String>();
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
initialize(toolkit, UAC, null, null, null, null);
}
public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) {
initialize(toolkit, UAC, logger, genotypeWriter, verboseWriter, beagleWriter);
}
private void initialize(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, GenotypeWriter genotypeWriter, PrintStream verboseWriter, PrintStream beagleWriter) {
this.UAC = UAC;
this.logger = logger;
this.genotypeWriter = genotypeWriter;
this.verboseWriter = verboseWriter;
this.beagleWriter = beagleWriter;
// deal with input errors
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
}
if ( UAC.POOLSIZE < 1 && UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ) {
throw new IllegalArgumentException("Attempting to use the POOLED model with a pool size less than 1. Please set the pool size to an appropriate value.");
}
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
}
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
// if we're supposed to assume a single sample, do so
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
this.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
this.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
}
// in pooled mode we need to check that the format is acceptable
if ( UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED && genotypeWriter != null ) {
// only multi-sample calls use Variations
if ( !genotypeWriter.supportsMultiSample() )
throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
// when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
if ( toolkit.getArguments().numberOfThreads > 1 && genotypeWriter instanceof VCFGenotypeWriter )
((VCFGenotypeWriter)genotypeWriter).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
}
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
this.annotateDbsnp = true;
}
if ( rod.getName().equals("hapmap2") ) {
this.annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
this.annotateHapmap3 = true;
}
}
}
/**
* Compute at a given locus.
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @return the VariantCallContext object
*/
public VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
if ( gcm.get() == null ) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if ( genotypeWriter != null ) {
if ( genotypeWriter instanceof VCFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if ( genotypeWriter instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if ( genotypeWriter instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + genotypeWriter.getClass().getName());
}
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
}
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
// don't try to call if we couldn't read in all reads at this locus (since it wasn't properly downsampled)
if ( rawContext.hasExceededMaxPileup() )
return null;
ReadBackedPileup rawPileup = rawContext.getBasePileup();
// filter the context based on min base and mapping qualities
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
// filter the context based on mapping quality and mismatch rate
pileup = filterPileup(pileup, refContext);
// don't call when there is no coverage
if ( pileup.size() == 0 )
return null;
// are there too many deletions in the pileup?
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
return null;
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
// first off, we want to use the *unfiltered* context for the annotations
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
}
return call;
}
// filter based on maximum mismatches and bad mates
private ReadBackedPileup filterPileup(ReadBackedPileup pileup, ReferenceContext refContext) {
ArrayList<PileupElement> filteredPileup = new ArrayList<PileupElement>();
for ( PileupElement p : pileup ) {
if ( (UAC.USE_BADLY_MATED_READS || !p.getRead().getReadPairedFlag() || p.getRead().getMateUnmappedFlag() || p.getRead().getMateReferenceIndex() == p.getRead().getReferenceIndex()) &&
AlignmentUtils.mismatchesInRefWindow(p, refContext, true) <= UAC.MAX_MISMATCHES )
filteredPileup.add(p);
}
return new ReadBackedPileup(pileup.getLocation(), filteredPileup);
}
private static boolean isValidDeletionFraction(double d) {
return ( d >= 0.0 && d <= 1.0 );
}
}

View File

@ -49,7 +49,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
@Argument(fullName="forcePreviousReadBasesToMatchRef", doc="Forces previous read bases to match the reference", required = false)
boolean readBasesMustMatchRef = false;
private UGCalculationArguments ug;
private UnifiedGenotyperEngine ug;
// private ReferenceContextWindow refWindow;
// private Set<BaseTransitionTable> conditionalTables;
private List<Boolean> usePreviousBases;
@ -62,7 +62,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.baseModel = BaseMismatchModel.THREE_STATE;
uac.ALL_BASES = true;
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
// refWindow = new ReferenceContextWindow(nPreviousBases);
usePreviousBases = new ArrayList<Boolean>();
previousBaseLoci = new ArrayList<GenomeLoc>();
@ -359,7 +359,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if ( !BaseUtils.isRegularBase(ref.getBase()) )
return false;
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
VariantCallContext calls = ug.runGenotyper(tracker,ref,context);
if ( calls == null || calls.genotypes == null)
return false;
return ( calls.genotypes.size() > 0 && !calls.genotypes.get(0).isVariant(ref.getBase()) && calls.genotypes.get(0).getNegLog10PError() > confidentRefThreshold );

View File

@ -12,7 +12,6 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.genotype.*;
import java.util.*;
/**
* Walker to calculate the number of mismatches, their base counts, and their quality sums at confidence ref sites"
@ -39,13 +38,13 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
@Argument(fullName="skip", doc = "Only display every skip eligable sites. Defaults to all sites", required = false)
int skip = 1;
private UGCalculationArguments ug;
private UnifiedGenotyperEngine ug;
public void initialize() {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.baseModel = BaseMismatchModel.THREE_STATE;
uac.ALL_BASES = true;
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
// print the header
out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
@ -54,7 +53,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
public String map( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
String result = null;
ReadBackedPileup pileup = context.getPileup();
ReadBackedPileup pileup = context.getBasePileup();
if ( locusIsUsable(tracker, ref, pileup, context) ) {
Genotype g = getGenotype(tracker, ref, context);
if ( g != null && g.isPointGenotype() )
@ -163,7 +162,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
}
private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context, ug);
VariantCallContext calls = ug.runGenotyper(tracker,ref,context);
if ( calls == null || calls.variation == null || calls.genotypes == null )
return null;
else {

View File

@ -5,10 +5,6 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.VariationCall;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.cmdLine.Argument;
@ -16,8 +12,6 @@ import org.broadinstitute.sting.playground.utils.NamedTable;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
import java.util.List;
import cern.jet.stat.Probability;
/**
@ -34,13 +28,13 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
@Argument(fullName="limit", shortName="lim", doc="The pValue limit for which a read group will be deemed to be a contaminant", required=false)
private Double LIMIT = 1e-9;
private UGCalculationArguments ug;
private UnifiedGenotyperEngine ug;
private NamedTable altTable;
public void initialize() {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.CONFIDENCE_THRESHOLD = 50;
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
altTable = new NamedTable();
}
@ -58,7 +52,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
int altCount = 0;
int totalCount = 0;
ReadBackedPileup pileup = context.getPileup();
ReadBackedPileup pileup = context.getBasePileup();
int refIndex = BaseUtils.simpleBaseToBaseIndex(ref.getBase());
for (byte base : pileup.getBases() ) {
@ -73,7 +67,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
double altBalance = ((double) altCount)/((double) totalCount);
if (altBalance > 0.70) {
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker, ref, context, ug);
VariantCallContext ugResult = ug.runGenotyper(tracker, ref, context);
if (ugResult != null && ugResult.genotypes != null && ugResult.genotypes.size() > 0) {
return ugResult.genotypes.get(0).isHet();

View File

@ -26,14 +26,14 @@ import java.util.HashMap;
public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Integer> {
HashMap<String,Long> counts = new HashMap<String,Long>();
private UGCalculationArguments ug;
private UnifiedGenotyperEngine ug;
private NamedTable altTable;
public void initialize() {
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.CONFIDENCE_THRESHOLD = 50;
uac.ALL_BASES = true;
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
ug = new UnifiedGenotyperEngine(getToolkit(), uac);
altTable = new NamedTable();
}
@ -46,7 +46,7 @@ public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Int
char nextBase = Character.toUpperCase(contextBases[contextBases.length - 1]);
if (contextBases.length == 3 && refBase != 'N' && pileup.getBases() != null && pileup.getSecondaryBases() != null) {
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
VariantCallContext ugResult = ug.runGenotyper(tracker,ref,context);
if (ugResult != null && ugResult.variation != null) {
Genotype res = ugResult.genotypes.get(0);
String call = res.getBases();