1. For Matt: JIRA GSA-270. Other walkers needing to call into the Unified Genotyper now use static methods (e.g. runGenotyper()) instead of calling initialize and map.

2. Set the default confidence cutoff to 50 (instead of 0).



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2752 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-31 21:14:57 +00:00
parent e964660df3
commit f6da57dc79
8 changed files with 223 additions and 174 deletions

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.genotyper;
import java.util.Set;
import java.util.HashSet;
/**
* Class for maintaining the arguments that need to be passed to UnifiedGenotyper.runGenotyper()
* (so that they only need to be computed one time)
*/
public class UGCalculationArguments {
// should we annotate dbsnp?
protected boolean annotateDbsnp = false;
// should we annotate hapmap2?
protected boolean annotateHapmap2 = false;
// should we annotate hapmap3?
protected boolean annotateHapmap3 = false;
// the unified argument collection
protected UnifiedArgumentCollection UAC = null;
// the model used for calculating genotypes
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
// samples in input
protected Set<String> samples = new HashSet<String>();
}

View File

@ -67,7 +67,7 @@ public class UnifiedArgumentCollection {
// control the various parameters to be used
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
public double CONFIDENCE_THRESHOLD = 0.0;
public double CONFIDENCE_THRESHOLD = 50.0;
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
public int MIN_BASE_QUALTY_SCORE = 10;

View File

@ -64,18 +64,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
public PrintStream beagleWriter = null;
// the model used for calculating genotypes
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
// samples in input
private Set<String> samples = new HashSet<String>();
// should we annotate dbsnp?
private boolean annotateDbsnp = false;
// how about hapmap2?
private boolean annotateHapmap2 = false;
// how about hapmap3?
private boolean annotateHapmap3 = false;
// the calculation arguments
private UGCalculationArguments UG_args = null;
// Enable deletions in the pileup
public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -98,24 +88,32 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
}
/**
* Sets the argument collection for the UnifiedGenotyper.
* To be used with walkers that call the UnifiedGenotyper's map function
* and consequently can't set these arguments on the command-line
*
* @param UAC the UnifiedArgumentCollection
* Creates the argument calculation object for the UnifiedGenotyper.
*
* @param toolkit the GATK Engine
* @param UAC the UnifiedArgumentCollection
* @return UG calculation arguments object
**/
public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
this.UAC = UAC;
initialize();
public static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
return getUnifiedCalculationArguments(toolkit, UAC, null, null);
}
/**
* Initialize the samples, output, and genotype calculation model
* Creates the argument calculation object for the UnifiedGenotyper.
*
* @param toolkit the GATK Engine
* @param UAC the UnifiedArgumentCollection
* @param writer the genotype writer
* @param beagleWriter the beagle writer
* @return UG calculation arguments object
*
**/
public void initialize() {
private static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, GenotypeWriter writer, PrintStream beagleWriter) {
UGCalculationArguments UG_args = new UGCalculationArguments();
UG_args.UAC = UAC;
// deal with input errors
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
@ -126,7 +124,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( beagleWriter != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
}
if ( getToolkit().getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
}
@ -135,12 +133,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
// if we're supposed to assume a single sample, do so
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
UG_args.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
else
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
// for ( String sample : samples )
// logger.debug("SAMPLE: " + sample);
UG_args.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
}
// in pooled mode we need to check that the format is acceptable
@ -150,121 +145,38 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
// when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
if ( getToolkit().getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
if ( toolkit.getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
}
// initialize the writers
if ( verboseWriter != null ) {
if(UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE) {
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
char base = Character.toUpperCase(altAllele);
header.append("POfDGivenAFFor" + base + "\t");
header.append("PosteriorAFFor" + base + "\t");
}
verboseWriter.println(header);
}
}
if ( beagleWriter != null ) {
beagleWriter.print("marker alleleA alleleB");
for ( String sample : samples )
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
beagleWriter.println();
}
// check to see whether a dbsnp rod was included
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
ReferenceOrderedData rod = source.getReferenceOrderedData();
if ( rod.getType().equals(rodDbSNP.class) ) {
annotateDbsnp = true;
UG_args.annotateDbsnp = true;
}
if ( rod.getName().equals("hapmap2") ) {
annotateHapmap2 = true;
UG_args.annotateHapmap2 = true;
}
if ( rod.getName().equals("hapmap3") ) {
annotateHapmap3 = true;
UG_args.annotateHapmap3 = true;
}
}
// *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. ***
if ( writer == null )
return;
// *** If we got here, then we were instantiated by the GATK engine ***
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, getHeaderInfo());
}
private Set<VCFHeaderLine> getHeaderInfo() {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF
if ( !(writer instanceof VCFGenotypeWriter) )
return headerInfo;
// first, the basic info
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// annotation (INFO) fields from VariantAnnotator
if ( UAC.ALL_ANNOTATIONS )
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
else
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
if ( annotateDbsnp )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
// FORMAT fields if not in POOLED mode
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_NUMBER_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total number of alleles in called genotypes"));
}
// all of the arguments from the argument collection
Set<Object> args = new HashSet<Object>();
args.add(UAC);
args.addAll(getToolkit().getFilters());
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args);
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
return headerInfo;
return UG_args;
}
/**
* Compute at a given locus.
*
* @param tracker the meta data tracker
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @param UG_args the calculation argument collection
* @return the VariantCallContext object
*/
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
if ( gcm.get() == null ) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if ( writer != null ) {
if ( writer instanceof VCFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if ( writer instanceof GLFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if ( writer instanceof GeliGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
}
public static VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, UGCalculationArguments UG_args) {
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
@ -277,28 +189,28 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
ReadBackedPileup rawPileup = rawContext.getBasePileup();
// filter the context based on min base and mapping qualities
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UG_args.UAC.MIN_BASE_QUALTY_SCORE, UG_args.UAC.MIN_MAPPING_QUALTY_SCORE);
// filter the context based on mapping quality and mismatch rate
pileup = filterPileup(pileup, refContext, UAC);
pileup = filterPileup(pileup, refContext, UG_args.UAC);
// don't call when there is no coverage
if ( pileup.size() == 0 )
return null;
// are there too many deletions in the pileup?
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
if ( isValidDeletionFraction(UG_args.UAC.MAX_DELETION_FRACTION) &&
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UG_args.UAC.MAX_DELETION_FRACTION )
return null;
// stratify the AlignmentContext and cut by sample
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UG_args.UAC.ASSUME_SINGLE_SAMPLE, (UG_args.UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
if ( stratifiedContexts == null )
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UG_args.UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
VariantCallContext call = UG_args.gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
@ -306,10 +218,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
Map<String, String> annotations;
if ( UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
if ( UG_args.UAC.ALL_ANNOTATIONS )
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
else
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
}
@ -334,9 +246,117 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// ------------------------------------------------------------------------------------------------
//
// Reduce
// The following methods are walker-specific; don't use them unless you are a traversal.
// If you are a walker, stick to the static methods defined above.
//
// ------------------------------------------------------------------------------------------------
/**
* Initialize the samples, output, and genotype calculation model
*
**/
public void initialize() {
UG_args = getUnifiedCalculationArguments(getToolkit(), UAC, writer, beagleWriter);
// initialize the writers
if ( verboseWriter != null ) {
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
char base = Character.toUpperCase(altAllele);
header.append("POfDGivenAFFor" + base + "\t");
header.append("PosteriorAFFor" + base + "\t");
}
verboseWriter.println(header);
}
}
if ( beagleWriter != null ) {
beagleWriter.print("marker alleleA alleleB");
for ( String sample : UG_args.samples )
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
beagleWriter.println();
}
// initialize the header
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_args.samples, getHeaderInfo(UG_args));
}
private Set<VCFHeaderLine> getHeaderInfo(UGCalculationArguments UG_args) {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
// this is only applicable to VCF
if ( !(writer instanceof VCFGenotypeWriter) )
return headerInfo;
// first, the basic info
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// annotation (INFO) fields from VariantAnnotator
if ( UAC.ALL_ANNOTATIONS )
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
else
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
// annotation (INFO) fields from UnifiedGenotyper
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
if ( UG_args.annotateDbsnp )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP Membership"));
if ( UG_args.annotateHapmap2 )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap2 Membership"));
if ( UG_args.annotateHapmap3 )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap3 Membership"));
if ( !UAC.NO_SLOD )
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
// FORMAT fields if not in POOLED mode
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_NUMBER_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total number of alleles in called genotypes"));
}
// all of the arguments from the argument collection
Set<Object> args = new HashSet<Object>();
args.add(UAC);
args.addAll(getToolkit().getFilters());
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args);
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
return headerInfo;
}
/**
* Compute at a given locus.
*
* @param tracker the meta data tracker
* @param refContext the reference base
* @param rawContext contextual information around the locus
* @return the VariantCallContext object
*/
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
if ( UG_args.gcm.get() == null ) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if ( writer != null ) {
if ( writer instanceof VCFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if ( writer instanceof GLFGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if ( writer instanceof GeliGenotypeWriter )
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
UG_args.gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(UG_args.samples, logger, UAC, format, verboseWriter, beagleWriter));
}
return runGenotyper(tracker, refContext, rawContext, UG_args);
}
public UGStatistics reduceInit() { return new UGStatistics(); }
public UGStatistics treeReduce(UGStatistics lhs, UGStatistics rhs) {
@ -367,7 +387,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
}
// if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
if ( value.variation == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) {
if ( value.variation == null || (!writer.supportsMultiSample() && UG_args.samples.size() <= 1) ) {
writer.addGenotypeCall(value.genotypes.get(0));
}

View File

@ -49,7 +49,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
@Argument(fullName="forcePreviousReadBasesToMatchRef", doc="Forces previous read bases to match the reference", required = false)
boolean readBasesMustMatchRef = false;
private UnifiedGenotyper ug;
private UGCalculationArguments ug;
// private ReferenceContextWindow refWindow;
// private Set<BaseTransitionTable> conditionalTables;
private List<Boolean> usePreviousBases;
@ -59,12 +59,10 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
if ( nPreviousBases > 3 || ( nPreviousReadBases > 3 && readBasesMustMatchRef ) ) {
throw new StingException("You have opted to use a number of previous bases in excess of 3. In order to do this you must change the reference window size in the walker itself.");
}
ug = new UnifiedGenotyper();
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
ug.initialize();
uac.baseModel = BaseMismatchModel.THREE_STATE;
uac.ALL_BASES = true;
ug.setUnifiedArgumentCollection(uac);
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
// refWindow = new ReferenceContextWindow(nPreviousBases);
usePreviousBases = new ArrayList<Boolean>();
previousBaseLoci = new ArrayList<GenomeLoc>();
@ -361,7 +359,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
if ( !BaseUtils.isRegularBase(ref.getBase()) )
return false;
VariantCallContext calls = ug.map(tracker,ref,context);
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
if ( calls == null || calls.genotypes == null)
return false;
return ( calls.genotypes.size() > 0 && !calls.genotypes.get(0).isVariant(ref.getBase()) && calls.genotypes.get(0).getNegLog10PError() > confidentRefThreshold );

View File

@ -39,15 +39,13 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
@Argument(fullName="skip", doc = "Only display every skip eligable sites. Defaults to all sites", required = false)
int skip = 1;
private UnifiedGenotyper ug;
private UGCalculationArguments ug;
public void initialize() {
ug = new UnifiedGenotyper();
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
ug.initialize();
uac.baseModel = BaseMismatchModel.THREE_STATE;
uac.ALL_BASES = true;
ug.setUnifiedArgumentCollection(uac);
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
// print the header
out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
@ -165,7 +163,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
}
private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
VariantCallContext calls = ug.map(tracker,ref,context);
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context, ug);
if ( calls == null || calls.variation == null || calls.genotypes == null )
return null;
else {

View File

@ -1,10 +1,7 @@
package org.broadinstitute.sting.playground.gatk.walkers.contamination;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@ -37,18 +34,14 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
@Argument(fullName="limit", shortName="lim", doc="The pValue limit for which a read group will be deemed to be a contaminant", required=false)
private Double LIMIT = 1e-9;
private UnifiedArgumentCollection uac;
private UnifiedGenotyper ug;
private UGCalculationArguments ug;
private NamedTable altTable;
public void initialize() {
uac = new UnifiedArgumentCollection();
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;
uac.CONFIDENCE_THRESHOLD = 50;
ug = new UnifiedGenotyper();
ug.initialize();
ug.setUnifiedArgumentCollection(uac);
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
altTable = new NamedTable();
}
@ -81,7 +74,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
double altBalance = ((double) altCount)/((double) totalCount);
if (altBalance > 0.70) {
VariantCallContext ugResult = ug.map(tracker, ref, context);
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker, ref, context, ug);
if (ugResult != null && ugResult.genotypes != null && ugResult.genotypes.size() > 0) {
return ugResult.genotypes.get(0).isHet();

View File

@ -4,20 +4,13 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
import org.broadinstitute.sting.playground.utils.NamedTable;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.genotype.VariationCall;
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.HashMap;
import java.util.List;
/**
* Created By User: Michael Melgar
@ -33,19 +26,15 @@ import java.util.List;
public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Integer> {
HashMap<String,Long> counts = new HashMap<String,Long>();
private UnifiedArgumentCollection uac;
private UnifiedGenotyper ug;
private UGCalculationArguments ug;
private NamedTable altTable;
public void initialize() {
uac = new UnifiedArgumentCollection();
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
uac.genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;
uac.CONFIDENCE_THRESHOLD = 50;
uac.ALL_BASES = true;
ug = new UnifiedGenotyper();
ug.initialize();
ug.setUnifiedArgumentCollection(uac);
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
altTable = new NamedTable();
}
@ -58,7 +47,7 @@ public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Int
char nextBase = Character.toUpperCase(contextBases[contextBases.length - 1]);
if (contextBases.length == 3 && refBase != 'N' && pileup.getBases() != null && pileup.getSecondaryBases() != null) {
VariantCallContext ugResult = ug.map(tracker,ref,context);
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
if (ugResult != null && ugResult.variation != null) {
Genotype res = ugResult.genotypes.get(0);
String call = res.getBases();

View File

@ -200,7 +200,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
" -gm JOINT_ESTIMATE" +
" -vf GELI",
1,
Arrays.asList("f09ac61858c2633e5d1326fcf098b36d"));
Arrays.asList("3c6d76d55d608482940cd725b87ef07d"));
executeTest(String.format("testMultiTechnologies"), spec);
}
@ -212,7 +212,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// --------------------------------------------------------------------------------------------------------------
@Test
public void testOtherOutput() {
String[] md5s = {"a5dce541f00d3fe364d110f1cae53538", "0147670a1b62bb3d218d5ed3f9fc4656"};
String[] md5s = {"78482125d51f9eb2ee850a6b25921e84", "cf39a4a90e9f01b6f1183e6b1fd7520e"};
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper" +
" -R " + oneKGLocation + "reference/human_b36_both.fasta" +