1. For Matt: JIRA GSA-270. Other walkers needing to call into the Unified Genotyper now use static methods (e.g. runGenotyper()) instead of calling initialize and map.
2. Set the default confidence cutoff to 50 (instead of 0). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2752 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e964660df3
commit
f6da57dc79
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
/**
|
||||
* Class for maintaining the arguments that need to be passed to UnifiedGenotyper.runGenotyper()
|
||||
* (so that they only need to be computed one time)
|
||||
*/
|
||||
public class UGCalculationArguments {
|
||||
// should we annotate dbsnp?
|
||||
protected boolean annotateDbsnp = false;
|
||||
// should we annotate hapmap2?
|
||||
protected boolean annotateHapmap2 = false;
|
||||
// should we annotate hapmap3?
|
||||
protected boolean annotateHapmap3 = false;
|
||||
|
||||
// the unified argument collection
|
||||
protected UnifiedArgumentCollection UAC = null;
|
||||
|
||||
// the model used for calculating genotypes
|
||||
protected ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
||||
|
||||
// samples in input
|
||||
protected Set<String> samples = new HashSet<String>();
|
||||
}
|
||||
|
|
@ -67,7 +67,7 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
// control the various parameters to be used
|
||||
@Argument(fullName = "min_confidence_threshold", shortName = "confidence", doc = "The phred-scaled confidence threshold by which variants should be filtered", required = false)
|
||||
public double CONFIDENCE_THRESHOLD = 0.0;
|
||||
public double CONFIDENCE_THRESHOLD = 50.0;
|
||||
|
||||
@Argument(fullName = "min_base_quality_score", shortName = "mbq", doc = "Minimum base quality required to consider a base for calling", required = false)
|
||||
public int MIN_BASE_QUALTY_SCORE = 10;
|
||||
|
|
|
|||
|
|
@ -64,18 +64,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
|
||||
public PrintStream beagleWriter = null;
|
||||
|
||||
// the model used for calculating genotypes
|
||||
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
||||
|
||||
// samples in input
|
||||
private Set<String> samples = new HashSet<String>();
|
||||
|
||||
// should we annotate dbsnp?
|
||||
private boolean annotateDbsnp = false;
|
||||
// how about hapmap2?
|
||||
private boolean annotateHapmap2 = false;
|
||||
// how about hapmap3?
|
||||
private boolean annotateHapmap3 = false;
|
||||
// the calculation arguments
|
||||
private UGCalculationArguments UG_args = null;
|
||||
|
||||
// Enable deletions in the pileup
|
||||
public boolean includeReadsWithDeletionAtLoci() { return true; }
|
||||
|
|
@ -98,24 +88,32 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
double percentCalledOfCallable() { return (100.0 * nBasesCalledConfidently) / nBasesCallable; }
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the argument collection for the UnifiedGenotyper.
|
||||
* To be used with walkers that call the UnifiedGenotyper's map function
|
||||
* and consequently can't set these arguments on the command-line
|
||||
*
|
||||
* @param UAC the UnifiedArgumentCollection
|
||||
* Creates the argument calculation object for the UnifiedGenotyper.
|
||||
*
|
||||
* @param toolkit the GATK Engine
|
||||
* @param UAC the UnifiedArgumentCollection
|
||||
* @return UG calculation arguments object
|
||||
**/
|
||||
public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
|
||||
this.UAC = UAC;
|
||||
initialize();
|
||||
public static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC) {
|
||||
return getUnifiedCalculationArguments(toolkit, UAC, null, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the samples, output, and genotype calculation model
|
||||
* Creates the argument calculation object for the UnifiedGenotyper.
|
||||
*
|
||||
* @param toolkit the GATK Engine
|
||||
* @param UAC the UnifiedArgumentCollection
|
||||
* @param writer the genotype writer
|
||||
* @param beagleWriter the beagle writer
|
||||
* @return UG calculation arguments object
|
||||
*
|
||||
**/
|
||||
public void initialize() {
|
||||
private static UGCalculationArguments getUnifiedCalculationArguments(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, GenotypeWriter writer, PrintStream beagleWriter) {
|
||||
UGCalculationArguments UG_args = new UGCalculationArguments();
|
||||
UG_args.UAC = UAC;
|
||||
|
||||
// deal with input errors
|
||||
if ( UAC.POOLSIZE > 0 && UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||
throw new IllegalArgumentException("Attempting to use a model other than POOLED with pooled data. Please set the model to POOLED.");
|
||||
|
|
@ -126,7 +124,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
if ( beagleWriter != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
||||
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
|
||||
}
|
||||
if ( getToolkit().getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
|
||||
if ( toolkit.getArguments().numberOfThreads > 1 && UAC.ASSUME_SINGLE_SAMPLE != null ) {
|
||||
// the ASSUME_SINGLE_SAMPLE argument can't be handled (at least for now) while we are multi-threaded because the IO system doesn't know how to get the sample name
|
||||
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
||||
}
|
||||
|
|
@ -135,12 +133,9 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||
// if we're supposed to assume a single sample, do so
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
UG_args.samples.add(UAC.ASSUME_SINGLE_SAMPLE);
|
||||
else
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
|
||||
// for ( String sample : samples )
|
||||
// logger.debug("SAMPLE: " + sample);
|
||||
UG_args.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
||||
}
|
||||
|
||||
// in pooled mode we need to check that the format is acceptable
|
||||
|
|
@ -150,121 +145,38 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
throw new IllegalArgumentException("The POOLED model is not compatible with the specified format; try using VCF instead");
|
||||
|
||||
// when using VCF with multiple threads, we need to turn down the validation stringency so that writing temporary files will work
|
||||
if ( getToolkit().getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
|
||||
if ( toolkit.getArguments().numberOfThreads > 1 && writer instanceof VCFGenotypeWriter )
|
||||
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
|
||||
}
|
||||
|
||||
// initialize the writers
|
||||
if ( verboseWriter != null ) {
|
||||
if(UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE) {
|
||||
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
|
||||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
char base = Character.toUpperCase(altAllele);
|
||||
header.append("POfDGivenAFFor" + base + "\t");
|
||||
header.append("PosteriorAFFor" + base + "\t");
|
||||
}
|
||||
verboseWriter.println(header);
|
||||
}
|
||||
}
|
||||
if ( beagleWriter != null ) {
|
||||
beagleWriter.print("marker alleleA alleleB");
|
||||
for ( String sample : samples )
|
||||
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
||||
beagleWriter.println();
|
||||
}
|
||||
|
||||
// check to see whether a dbsnp rod was included
|
||||
List<ReferenceOrderedDataSource> dataSources = getToolkit().getRodDataSources();
|
||||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
ReferenceOrderedData rod = source.getReferenceOrderedData();
|
||||
if ( rod.getType().equals(rodDbSNP.class) ) {
|
||||
annotateDbsnp = true;
|
||||
UG_args.annotateDbsnp = true;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap2") ) {
|
||||
annotateHapmap2 = true;
|
||||
UG_args.annotateHapmap2 = true;
|
||||
}
|
||||
if ( rod.getName().equals("hapmap3") ) {
|
||||
annotateHapmap3 = true;
|
||||
UG_args.annotateHapmap3 = true;
|
||||
}
|
||||
}
|
||||
|
||||
// *** If we were called by another walker, then we don't ***
|
||||
// *** want to do any of the other initialization steps. ***
|
||||
if ( writer == null )
|
||||
return;
|
||||
|
||||
// *** If we got here, then we were instantiated by the GATK engine ***
|
||||
|
||||
// initialize the header
|
||||
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, getHeaderInfo());
|
||||
}
|
||||
|
||||
private Set<VCFHeaderLine> getHeaderInfo() {
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// this is only applicable to VCF
|
||||
if ( !(writer instanceof VCFGenotypeWriter) )
|
||||
return headerInfo;
|
||||
|
||||
// first, the basic info
|
||||
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
|
||||
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
// annotation (INFO) fields from VariantAnnotator
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
|
||||
else
|
||||
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
|
||||
if ( annotateDbsnp )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP membership"));
|
||||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
|
||||
|
||||
// FORMAT fields if not in POOLED mode
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_NUMBER_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total number of alleles in called genotypes"));
|
||||
}
|
||||
|
||||
// all of the arguments from the argument collection
|
||||
Set<Object> args = new HashSet<Object>();
|
||||
args.add(UAC);
|
||||
args.addAll(getToolkit().getFilters());
|
||||
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args);
|
||||
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
|
||||
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
|
||||
|
||||
return headerInfo;
|
||||
return UG_args;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute at a given locus.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @param UG_args the calculation argument collection
|
||||
* @return the VariantCallContext object
|
||||
*/
|
||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
|
||||
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
|
||||
if ( gcm.get() == null ) {
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
if ( writer != null ) {
|
||||
if ( writer instanceof VCFGenotypeWriter )
|
||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
else if ( writer instanceof GLFGenotypeWriter )
|
||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
||||
else if ( writer instanceof GeliGenotypeWriter )
|
||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
else
|
||||
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
|
||||
}
|
||||
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
|
||||
}
|
||||
public static VariantCallContext runGenotyper(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, UGCalculationArguments UG_args) {
|
||||
|
||||
char ref = Character.toUpperCase(refContext.getBase());
|
||||
if ( !BaseUtils.isRegularBase(ref) )
|
||||
|
|
@ -277,28 +189,28 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
ReadBackedPileup rawPileup = rawContext.getBasePileup();
|
||||
|
||||
// filter the context based on min base and mapping qualities
|
||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UAC.MIN_BASE_QUALTY_SCORE, UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
ReadBackedPileup pileup = rawPileup.getBaseAndMappingFilteredPileup(UG_args.UAC.MIN_BASE_QUALTY_SCORE, UG_args.UAC.MIN_MAPPING_QUALTY_SCORE);
|
||||
|
||||
// filter the context based on mapping quality and mismatch rate
|
||||
pileup = filterPileup(pileup, refContext, UAC);
|
||||
pileup = filterPileup(pileup, refContext, UG_args.UAC);
|
||||
|
||||
// don't call when there is no coverage
|
||||
if ( pileup.size() == 0 )
|
||||
return null;
|
||||
|
||||
// are there too many deletions in the pileup?
|
||||
if ( isValidDeletionFraction(UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UAC.MAX_DELETION_FRACTION )
|
||||
if ( isValidDeletionFraction(UG_args.UAC.MAX_DELETION_FRACTION) &&
|
||||
(double)pileup.getNumberOfDeletions() / (double)pileup.size() > UG_args.UAC.MAX_DELETION_FRACTION )
|
||||
return null;
|
||||
|
||||
// stratify the AlignmentContext and cut by sample
|
||||
// Note that for testing purposes, we may want to throw multi-samples at pooled mode
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UAC.ASSUME_SINGLE_SAMPLE, (UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
Map<String, StratifiedAlignmentContext> stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(pileup, UG_args.UAC.ASSUME_SINGLE_SAMPLE, (UG_args.UAC.genotypeModel == GenotypeCalculationModel.Model.POOLED ? PooledCalculationModel.POOL_SAMPLE_NAME : null));
|
||||
if ( stratifiedContexts == null )
|
||||
return null;
|
||||
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
|
||||
VariantCallContext call = gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UG_args.UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_REFERENCE_ERROR);
|
||||
VariantCallContext call = UG_args.gcm.get().callLocus(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
|
||||
|
||||
// annotate the call, if possible
|
||||
if ( call != null && call.variation != null && call.variation instanceof ArbitraryFieldsBacked ) {
|
||||
|
|
@ -306,10 +218,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
stratifiedContexts = StratifiedAlignmentContext.splitContextBySample(rawContext.getBasePileup());
|
||||
|
||||
Map<String, String> annotations;
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
if ( UG_args.UAC.ALL_ANNOTATIONS )
|
||||
annotations = VariantAnnotator.getAllAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
|
||||
else
|
||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, annotateDbsnp, annotateHapmap2, annotateHapmap3);
|
||||
annotations = VariantAnnotator.getAnnotations(tracker, refContext, stratifiedContexts, call.variation, UG_args.annotateDbsnp, UG_args.annotateHapmap2, UG_args.annotateHapmap3);
|
||||
((ArbitraryFieldsBacked)call.variation).setFields(annotations);
|
||||
}
|
||||
|
||||
|
|
@ -334,9 +246,117 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Reduce
|
||||
// The following methods are walker-specific; don't use them unless you are a traversal.
|
||||
// If you are a walker, stick to the static methods defined above.
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Initialize the samples, output, and genotype calculation model
|
||||
*
|
||||
**/
|
||||
public void initialize() {
|
||||
|
||||
UG_args = getUnifiedCalculationArguments(getToolkit(), UAC, writer, beagleWriter);
|
||||
|
||||
// initialize the writers
|
||||
if ( verboseWriter != null ) {
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
||||
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
|
||||
for ( char altAllele : BaseUtils.BASES ) {
|
||||
char base = Character.toUpperCase(altAllele);
|
||||
header.append("POfDGivenAFFor" + base + "\t");
|
||||
header.append("PosteriorAFFor" + base + "\t");
|
||||
}
|
||||
verboseWriter.println(header);
|
||||
}
|
||||
}
|
||||
if ( beagleWriter != null ) {
|
||||
beagleWriter.print("marker alleleA alleleB");
|
||||
for ( String sample : UG_args.samples )
|
||||
beagleWriter.print(String.format(" %s %s %s", sample, sample, sample));
|
||||
beagleWriter.println();
|
||||
}
|
||||
|
||||
// initialize the header
|
||||
GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), UG_args.samples, getHeaderInfo(UG_args));
|
||||
}
|
||||
|
||||
private Set<VCFHeaderLine> getHeaderInfo(UGCalculationArguments UG_args) {
|
||||
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
|
||||
|
||||
// this is only applicable to VCF
|
||||
if ( !(writer instanceof VCFGenotypeWriter) )
|
||||
return headerInfo;
|
||||
|
||||
// first, the basic info
|
||||
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
|
||||
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
|
||||
|
||||
// annotation (INFO) fields from VariantAnnotator
|
||||
if ( UAC.ALL_ANNOTATIONS )
|
||||
headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions());
|
||||
else
|
||||
headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions());
|
||||
|
||||
// annotation (INFO) fields from UnifiedGenotyper
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Allele Frequency"));
|
||||
if ( UG_args.annotateDbsnp )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.DBSNP_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "dbSNP Membership"));
|
||||
if ( UG_args.annotateHapmap2 )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP2_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap2 Membership"));
|
||||
if ( UG_args.annotateHapmap3 )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.HAPMAP3_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "HapMap3 Membership"));
|
||||
if ( !UAC.NO_SLOD )
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Float, "Strand Bias"));
|
||||
|
||||
// FORMAT fields if not in POOLED mode
|
||||
if ( UAC.genotypeModel != GenotypeCalculationModel.Model.POOLED ) {
|
||||
headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings());
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"));
|
||||
headerInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_NUMBER_KEY, 1, VCFInfoHeaderLine.INFO_TYPE.Integer, "Total number of alleles in called genotypes"));
|
||||
}
|
||||
|
||||
// all of the arguments from the argument collection
|
||||
Set<Object> args = new HashSet<Object>();
|
||||
args.add(UAC);
|
||||
args.addAll(getToolkit().getFilters());
|
||||
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args);
|
||||
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() )
|
||||
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
|
||||
|
||||
return headerInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute at a given locus.
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param refContext the reference base
|
||||
* @param rawContext contextual information around the locus
|
||||
* @return the VariantCallContext object
|
||||
*/
|
||||
public VariantCallContext map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
|
||||
|
||||
// initialize the GenotypeCalculationModel for this thread if that hasn't been done yet
|
||||
if ( UG_args.gcm.get() == null ) {
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
if ( writer != null ) {
|
||||
if ( writer instanceof VCFGenotypeWriter )
|
||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
else if ( writer instanceof GLFGenotypeWriter )
|
||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
||||
else if ( writer instanceof GeliGenotypeWriter )
|
||||
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
|
||||
else
|
||||
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
|
||||
}
|
||||
UG_args.gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(UG_args.samples, logger, UAC, format, verboseWriter, beagleWriter));
|
||||
}
|
||||
|
||||
return runGenotyper(tracker, refContext, rawContext, UG_args);
|
||||
}
|
||||
|
||||
public UGStatistics reduceInit() { return new UGStatistics(); }
|
||||
|
||||
public UGStatistics treeReduce(UGStatistics lhs, UGStatistics rhs) {
|
||||
|
|
@ -367,7 +387,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
}
|
||||
|
||||
// if we have a single-sample call (single sample from PointEstimate model returns no VariationCall data)
|
||||
if ( value.variation == null || (!writer.supportsMultiSample() && samples.size() <= 1) ) {
|
||||
if ( value.variation == null || (!writer.supportsMultiSample() && UG_args.samples.size() <= 1) ) {
|
||||
writer.addGenotypeCall(value.genotypes.get(0));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
@Argument(fullName="forcePreviousReadBasesToMatchRef", doc="Forces previous read bases to match the reference", required = false)
|
||||
boolean readBasesMustMatchRef = false;
|
||||
|
||||
private UnifiedGenotyper ug;
|
||||
private UGCalculationArguments ug;
|
||||
// private ReferenceContextWindow refWindow;
|
||||
// private Set<BaseTransitionTable> conditionalTables;
|
||||
private List<Boolean> usePreviousBases;
|
||||
|
|
@ -59,12 +59,10 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
if ( nPreviousBases > 3 || ( nPreviousReadBases > 3 && readBasesMustMatchRef ) ) {
|
||||
throw new StingException("You have opted to use a number of previous bases in excess of 3. In order to do this you must change the reference window size in the walker itself.");
|
||||
}
|
||||
ug = new UnifiedGenotyper();
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
ug.initialize();
|
||||
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
||||
uac.ALL_BASES = true;
|
||||
ug.setUnifiedArgumentCollection(uac);
|
||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
||||
// refWindow = new ReferenceContextWindow(nPreviousBases);
|
||||
usePreviousBases = new ArrayList<Boolean>();
|
||||
previousBaseLoci = new ArrayList<GenomeLoc>();
|
||||
|
|
@ -361,7 +359,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
if ( !BaseUtils.isRegularBase(ref.getBase()) )
|
||||
return false;
|
||||
VariantCallContext calls = ug.map(tracker,ref,context);
|
||||
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
|
||||
if ( calls == null || calls.genotypes == null)
|
||||
return false;
|
||||
return ( calls.genotypes.size() > 0 && !calls.genotypes.get(0).isVariant(ref.getBase()) && calls.genotypes.get(0).getNegLog10PError() > confidentRefThreshold );
|
||||
|
|
|
|||
|
|
@ -39,15 +39,13 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
|
|||
@Argument(fullName="skip", doc = "Only display every skip eligable sites. Defaults to all sites", required = false)
|
||||
int skip = 1;
|
||||
|
||||
private UnifiedGenotyper ug;
|
||||
private UGCalculationArguments ug;
|
||||
|
||||
public void initialize() {
|
||||
ug = new UnifiedGenotyper();
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
ug.initialize();
|
||||
uac.baseModel = BaseMismatchModel.THREE_STATE;
|
||||
uac.ALL_BASES = true;
|
||||
ug.setUnifiedArgumentCollection(uac);
|
||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
||||
|
||||
// print the header
|
||||
out.printf("loc ref genotype genotypeQ depth nMM qSumMM A C G T%n");
|
||||
|
|
@ -165,7 +163,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
|
|||
}
|
||||
|
||||
private Genotype getGenotype( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
VariantCallContext calls = ug.map(tracker,ref,context);
|
||||
VariantCallContext calls = UnifiedGenotyper.runGenotyper(tracker,ref,context, ug);
|
||||
if ( calls == null || calls.variation == null || calls.genotypes == null )
|
||||
return null;
|
||||
else {
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers.contamination;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
|
|
@ -37,18 +34,14 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
|||
@Argument(fullName="limit", shortName="lim", doc="The pValue limit for which a read group will be deemed to be a contaminant", required=false)
|
||||
private Double LIMIT = 1e-9;
|
||||
|
||||
private UnifiedArgumentCollection uac;
|
||||
private UnifiedGenotyper ug;
|
||||
private UGCalculationArguments ug;
|
||||
private NamedTable altTable;
|
||||
|
||||
public void initialize() {
|
||||
uac = new UnifiedArgumentCollection();
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;
|
||||
uac.CONFIDENCE_THRESHOLD = 50;
|
||||
|
||||
ug = new UnifiedGenotyper();
|
||||
ug.initialize();
|
||||
ug.setUnifiedArgumentCollection(uac);
|
||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
||||
|
||||
altTable = new NamedTable();
|
||||
}
|
||||
|
|
@ -81,7 +74,7 @@ public class FindContaminatingReadGroupsWalker extends LocusWalker<Integer, Inte
|
|||
double altBalance = ((double) altCount)/((double) totalCount);
|
||||
|
||||
if (altBalance > 0.70) {
|
||||
VariantCallContext ugResult = ug.map(tracker, ref, context);
|
||||
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker, ref, context, ug);
|
||||
|
||||
if (ugResult != null && ugResult.genotypes != null && ugResult.genotypes.size() > 0) {
|
||||
return ugResult.genotypes.get(0).isHet();
|
||||
|
|
|
|||
|
|
@ -4,20 +4,13 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.*;
|
||||
import org.broadinstitute.sting.playground.utils.NamedTable;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.VariationCall;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created By User: Michael Melgar
|
||||
|
|
@ -33,19 +26,15 @@ import java.util.List;
|
|||
public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Integer> {
|
||||
|
||||
HashMap<String,Long> counts = new HashMap<String,Long>();
|
||||
private UnifiedArgumentCollection uac;
|
||||
private UnifiedGenotyper ug;
|
||||
private UGCalculationArguments ug;
|
||||
private NamedTable altTable;
|
||||
|
||||
public void initialize() {
|
||||
uac = new UnifiedArgumentCollection();
|
||||
UnifiedArgumentCollection uac = new UnifiedArgumentCollection();
|
||||
uac.genotypeModel = GenotypeCalculationModel.Model.EM_POINT_ESTIMATE;
|
||||
uac.CONFIDENCE_THRESHOLD = 50;
|
||||
uac.ALL_BASES = true;
|
||||
|
||||
ug = new UnifiedGenotyper();
|
||||
ug.initialize();
|
||||
ug.setUnifiedArgumentCollection(uac);
|
||||
ug = UnifiedGenotyper.getUnifiedCalculationArguments(getToolkit(), uac);
|
||||
|
||||
altTable = new NamedTable();
|
||||
}
|
||||
|
|
@ -58,7 +47,7 @@ public class SecondaryBaseTransitionTableWalker extends LocusWalker<Integer, Int
|
|||
char nextBase = Character.toUpperCase(contextBases[contextBases.length - 1]);
|
||||
|
||||
if (contextBases.length == 3 && refBase != 'N' && pileup.getBases() != null && pileup.getSecondaryBases() != null) {
|
||||
VariantCallContext ugResult = ug.map(tracker,ref,context);
|
||||
VariantCallContext ugResult = UnifiedGenotyper.runGenotyper(tracker,ref,context,ug);
|
||||
if (ugResult != null && ugResult.variation != null) {
|
||||
Genotype res = ugResult.genotypes.get(0);
|
||||
String call = res.getBases();
|
||||
|
|
|
|||
|
|
@ -200,7 +200,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -gm JOINT_ESTIMATE" +
|
||||
" -vf GELI",
|
||||
1,
|
||||
Arrays.asList("f09ac61858c2633e5d1326fcf098b36d"));
|
||||
Arrays.asList("3c6d76d55d608482940cd725b87ef07d"));
|
||||
|
||||
executeTest(String.format("testMultiTechnologies"), spec);
|
||||
}
|
||||
|
|
@ -212,7 +212,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testOtherOutput() {
|
||||
String[] md5s = {"a5dce541f00d3fe364d110f1cae53538", "0147670a1b62bb3d218d5ed3f9fc4656"};
|
||||
String[] md5s = {"78482125d51f9eb2ee850a6b25921e84", "cf39a4a90e9f01b6f1183e6b1fd7520e"};
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper" +
|
||||
" -R " + oneKGLocation + "reference/human_b36_both.fasta" +
|
||||
|
|
|
|||
Loading…
Reference in New Issue