Addressed revisions

This commit is contained in:
Valentin Ruano-Rubio 2014-04-18 15:43:54 -04:00
parent 08203b516e
commit 7455ac9796
19 changed files with 348 additions and 201 deletions

View File

@ -47,10 +47,11 @@
package org.broadinstitute.sting.gatk.arguments; package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode;
import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode; import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode;
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.sting.utils.collections.DefaultHashMap;
import org.broadinstitute.sting.utils.variant.HomoSapiens; import org.broadinstitute.sting.utils.variant.HomoSapiensConstants;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.File; import java.io.File;
@ -98,16 +99,16 @@ public class StandardCallerArgumentCollection implements Cloneable {
* which determines how many chromosomes each individual in the species carries. * which determines how many chromosomes each individual in the species carries.
*/ */
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept", required = false) @Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept", required = false)
public Double snpHeterozygosity = HomoSapiens.SNP_HETEROZYGOSITY; public Double snpHeterozygosity = HomoSapiensConstants.SNP_HETEROZYGOSITY;
/** /**
* This argument informs the prior probability of having an indel at a site. * This argument informs the prior probability of having an indel at a site.
*/ */
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept", required = false) @Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept", required = false)
public double indelHeterozygosity = HomoSapiens.INDEL_HETEROZYGOSITY; public double indelHeterozygosity = HomoSapiensConstants.INDEL_HETEROZYGOSITY;
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false) @Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
public org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode genotypingMode = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode.DISCOVERY; public GenotypingOutputMode genotypingOutputMode = GenotypingOutputMode.DISCOVERY;
/** /**
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with * The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
@ -215,7 +216,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
* Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy * Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
*/ */
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false) @Argument(shortName="ploidy", fullName="sample_ploidy", doc="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
public int samplePloidy = HomoSapiens.DEFAULT_PLOIDY; public int samplePloidy = HomoSapiensConstants.DEFAULT_PLOIDY;
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false) @Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
public OutputMode outputMode = OutputMode.EMIT_VARIANTS_ONLY; public OutputMode outputMode = OutputMode.EMIT_VARIANTS_ONLY;
@ -232,7 +233,6 @@ public class StandardCallerArgumentCollection implements Cloneable {
@Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false) @Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false)
public boolean annotateAllSitesWithPLs = false; public boolean annotateAllSitesWithPLs = false;
/** /**
* Creates a Standard caller argument collection with default values. * Creates a Standard caller argument collection with default values.
*/ */
@ -257,12 +257,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
if (!field.getDeclaringClass().isAssignableFrom(clazz)) if (!field.getDeclaringClass().isAssignableFrom(clazz))
continue; continue;
final int fieldModifiers = field.getModifiers(); final int fieldModifiers = field.getModifiers();
if (Modifier.isPrivate((fieldModifiers))) if ((fieldModifiers & UNCOPYABLE_MODIFIER_MASK) != 0) continue;
continue;
if (Modifier.isFinal(fieldModifiers))
continue;
if (Modifier.isStatic(fieldModifiers))
continue;
field.set(result,field.get(this)); field.set(result,field.get(this));
} }
return result; return result;
@ -283,4 +278,10 @@ public class StandardCallerArgumentCollection implements Cloneable {
throw new IllegalStateException("unreachable code"); throw new IllegalStateException("unreachable code");
} }
} }
/**
* Holds a modifiers mask that identifies those fields that cannot be copied between
* StandardCallerArgumentCollections.
*/
private final int UNCOPYABLE_MODIFIER_MASK = Modifier.PRIVATE | Modifier.STATIC | Modifier.FINAL;
} }

View File

@ -134,7 +134,7 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General
final List<Allele> alleles = new ArrayList<Allele>(); final List<Allele> alleles = new ArrayList<Allele>();
if ( allAllelesToUse != null ) { if ( allAllelesToUse != null ) {
alleles.addAll(allAllelesToUse); alleles.addAll(allAllelesToUse);
} else if ( UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { } else if ( UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), true, logger, UAC.alleles); final VariantContext vc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), true, logger, UAC.alleles);
// ignore places where we don't have a SNP // ignore places where we don't have a SNP

View File

@ -71,7 +71,7 @@ public abstract class GenotypeLikelihoodsCalculationModel {
public static final String DUMMY_LANE = "Lane1"; public static final String DUMMY_LANE = "Lane1";
public static final String DUMMY_SAMPLE_NAME = "DummySample1"; public static final String DUMMY_SAMPLE_NAME = "DummySample1";
public enum Name { public enum Model {
SNP, SNP,
INDEL, INDEL,
GENERALPLOIDYSNP, GENERALPLOIDYSNP,

View File

@ -141,6 +141,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
/** /**
* Changes the logger for this genotyper engine. * Changes the logger for this genotyper engine.
*
* @param logger new logger. * @param logger new logger.
* *
* @throws IllegalArgumentException if {@code logger} is {@code null}. * @throws IllegalArgumentException if {@code logger} is {@code null}.
@ -153,6 +154,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
/** /**
* Returns a reference to the engine configuration * Returns a reference to the engine configuration
*
* @return never {@code null}. * @return never {@code null}.
*/ */
public Config getConfiguration() { public Config getConfiguration() {
@ -160,19 +162,22 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
} }
/** /**
* Completes a variant context with genotype calls and associated annotations give the genotype likelihoods and * Completes a variant context with genotype calls and associated annotations given the genotype likelihoods and
* the model that need to be applied. * the model that need to be applied.
*
* @param vc variant-context to complete. * @param vc variant-context to complete.
* @param modelName model name. * @param model model name.
*
* @throws IllegalArgumentException if {@code model} or {@code vc} is {@code null}.
* *
* @return can be {@code null} indicating that genotyping it not possible with the information provided. * @return can be {@code null} indicating that genotyping it not possible with the information provided.
*/ */
public VariantCallContext calculateGenotypes(final VariantContext vc, final GeneralPloidySNPGenotypeLikelihoodsCalculationModel.Name modelName) { public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
if (vc == null) if (vc == null)
throw new IllegalArgumentException("vc cannot be null"); throw new IllegalArgumentException("vc cannot be null");
if (modelName == null) if (model == null)
throw new IllegalArgumentException("the model cannot be null"); throw new IllegalArgumentException("the model cannot be null");
return calculateGenotypes(null,null,null,null,vc,modelName,false,null); return calculateGenotypes(null,null,null,null,vc,model,false,null);
} }
/** /**
@ -189,7 +194,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*/ */
protected VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext, protected VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
final boolean inheritAttributesFromInputVC, final boolean inheritAttributesFromInputVC,
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
@ -207,7 +212,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice // note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
double log10Confidence = double log10Confidence =
! outputAlternativeAlleles.siteIsMonomorphic || ! outputAlternativeAlleles.siteIsMonomorphic ||
configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs
? AFresult.getLog10PosteriorOfAFEq0() + 0.0 ? AFresult.getLog10PosteriorOfAFEq0() + 0.0
: AFresult.getLog10PosteriorOfAFGT0() + 0.0; : AFresult.getLog10PosteriorOfAFGT0() + 0.0;
@ -227,8 +232,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), loc.getContig(), loc.getStart(), loc.getStop(), outputAlleles); final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), loc.getContig(), loc.getStart(), loc.getStop(), outputAlleles);
// Seems that when log10PError is 0.0, you must pass -0.0 to get a nice output at the other end otherwise is a "-0". // Seems that when log10PError is 0.0, you must pass -0.0 to get a nice output at the other end otherwise is a "-0".
// Truth is that this should be fixed in the "variant" dependency code but perhaps it can be ammeneded also in the VariantContextWriter. // Truth is that this should be fixed in the "variant" dependency code but perhaps it can be amended also in the VariantContextWriter.
//TODO report this bug or make sure our VCFWriters do not output '-0' QUAL column values. //TODO Please remove this comment when this has been fixed (PT https://www.pivotaltracker.com/story/show/69492530)
//TODO and change the code below accordingly.
builder.log10PError(log10Confidence == 0.0 ? -0.0 : log10Confidence); builder.log10PError(log10Confidence == 0.0 ? -0.0 : log10Confidence);
if ( ! passesCallThreshold(phredScaledConfidence) ) if ( ! passesCallThreshold(phredScaledConfidence) )
builder.filter(LOW_QUAL_FILTER_NAME); builder.filter(LOW_QUAL_FILTER_NAME);
@ -348,7 +354,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
*/ */
protected final boolean confidentlyCalled(final double conf, final double PofF) { protected final boolean confidentlyCalled(final double conf, final double PofF) {
return conf >= configuration.STANDARD_CONFIDENCE_FOR_CALLING || return conf >= configuration.STANDARD_CONFIDENCE_FOR_CALLING ||
(configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
&& QualityUtils.phredScaleErrorRate(PofF) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING); && QualityUtils.phredScaleErrorRate(PofF) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING);
} }
@ -358,7 +364,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
* *
* @return a valid heterozygosity in (0,1). * @return a valid heterozygosity in (0,1).
*/ */
private double getModelTheta(final GenotypeLikelihoodsCalculationModel.Name model) { private double getModelTheta(final GenotypeLikelihoodsCalculationModel.Model model) {
switch (model) { switch (model) {
case SNP: case SNP:
case GENERALPLOIDYSNP: case GENERALPLOIDYSNP:
@ -402,7 +408,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
* @param ref the reference context. * @param ref the reference context.
* @param rawContext the read alignment at that location. * @param rawContext the read alignment at that location.
* @return it might be null if no enough information is provided to do even an empty call. For example when * @return it might be null if no enough information is provided to do even an empty call. For example when
* we have limmited-context (i.e. any of the tracker, reference or alignment is {@code null}. * we have limited-context (i.e. any of the tracker, reference or alignment is {@code null}.
*/ */
protected final VariantCallContext emptyCallContext(final RefMetaDataTracker tracker, final ReferenceContext ref, protected final VariantCallContext emptyCallContext(final RefMetaDataTracker tracker, final ReferenceContext ref,
final AlignmentContext rawContext) { final AlignmentContext rawContext) {
@ -414,7 +420,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
VariantContext vc; VariantContext vc;
if ( configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker,
rawContext.getLocation(), false, logger, configuration.alleles); rawContext.getLocation(), false, logger, configuration.alleles);
if (ggaVc == null) if (ggaVc == null)
@ -468,7 +474,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return new VariantCallContext(vc, QualityUtils.phredScaleLog10CorrectRate(log10POfRef) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING, false); return new VariantCallContext(vc, QualityUtils.phredScaleLog10CorrectRate(log10POfRef) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING, false);
} }
protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Name model ) { protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
switch (model) { switch (model) {
case SNP: case SNP:
case GENERALPLOIDYSNP: case GENERALPLOIDYSNP:
@ -488,16 +494,30 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
* *
* @param depth the depth of the sample * @param depth the depth of the sample
* @param theta the heterozygosity of this species (between 0 and 1) * @param theta the heterozygosity of this species (between 0 and 1)
*
* @throws IllegalArgumentException if {@code depth} is less than 0 or {@code theta} is not in the (0,1) range.
*
* @return a valid log10 probability of the sample being hom-ref * @return a valid log10 probability of the sample being hom-ref
*/ */
@Requires({"depth >= 0", "theta >= 0.0 && theta <= 1.0"})
@Ensures("MathUtils.goodLog10Probability(result)") @Ensures("MathUtils.goodLog10Probability(result)")
protected final double estimateLog10ReferenceConfidenceForOneSample(final int depth, final double theta) { protected final double estimateLog10ReferenceConfidenceForOneSample(final int depth, final double theta) {
if (theta <= 0 || theta >= 1) throw new IllegalArgumentException("theta must be greater than 0 and less than 1");
final double log10PofNonRef = Math.log10(theta / 2.0) + getRefBinomialProbLog10(depth); final double log10PofNonRef = Math.log10(theta / 2.0) + getRefBinomialProbLog10(depth);
return MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef)); return MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef));
} }
/**
* Calculates the hom-reference binomial log10 probability given the depth.
*
* @param depth the query depth.
*
* @throws IllegalArgumentException if {@code depth} is less than 0.
*
* @return a valid log10 probability between 0 and {@link Double#NEGATIVE_INFINITY}.
*/
protected final double getRefBinomialProbLog10(final int depth) { protected final double getRefBinomialProbLog10(final int depth) {
if (depth < 0)
throw new IllegalArgumentException("depth cannot be less than 0");
return MathUtils.log10BinomialProbability(depth, 0); return MathUtils.log10BinomialProbability(depth, 0);
} }
@ -601,37 +621,39 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc,
final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef, final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef,
final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes, final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
final GenotypeLikelihoodsCalculationModel.Name model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final HashMap<String, Object> attributes = new HashMap<>(); final HashMap<String, Object> attributes = new HashMap<>();
final boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; final boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
// inherit attributes from input vc if requested
// inherit attributed from input vc if requested
if (inheritAttributesFromInputVC) if (inheritAttributesFromInputVC)
attributes.putAll(vc.getAttributes()); attributes.putAll(vc.getAttributes());
// if the site was downsampled, record that fact // if the site was down-sampled, record that fact
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
// add the MLE AC and AF annotations // add the MLE AC and AF annotations
if ( alleleCountsofMLE.size() > 0 ) { if ( alleleCountsofMLE.size() > 0 ) {
attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE); attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
int AN = 0; final ArrayList<Double> MLEfrequencies = calculateMLEAlleleFrequencies(alleleCountsofMLE, genotypes);
for (final Genotype g : genotypes) {
for (final Allele a : g.getAlleles())
if (!a.isNoCall()) AN++;
}
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
for ( int AC : alleleCountsofMLE )
MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies); attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
} }
return attributes; return attributes;
} }
private ArrayList<Double> calculateMLEAlleleFrequencies(List<Integer> alleleCountsofMLE, GenotypesContext genotypes) {
int AN = 0;
for (final Genotype g : genotypes)
for (final Allele a : g.getAlleles())
if (!a.isNoCall()) AN++;
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
for (final int AC : alleleCountsofMLE )
MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
return MLEfrequencies;
}
} }

View File

@ -54,7 +54,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
* *
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt; * @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/ */
public enum GenotypingMode { public enum GenotypingOutputMode {
/** /**
* The genotyper will choose the most likely alternate allele * The genotyper will choose the most likely alternate allele

View File

@ -215,7 +215,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
final boolean ignoreSNPAllelesWhenGenotypingIndels) { final boolean ignoreSNPAllelesWhenGenotypingIndels) {
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
if (UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES) { if (UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) {
VariantContext vc = null; VariantContext vc = null;
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) { for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
if (vc_input != null && if (vc_input != null &&

View File

@ -77,7 +77,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
super(UAC, logger); super(UAC, logger);
useAlleleFromVCF = UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES; useAlleleFromVCF = UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap(); perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
} }

View File

@ -54,7 +54,7 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection { public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false) @Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
public GenotypeLikelihoodsCalculationModel.Name GLmodel = GenotypeLikelihoodsCalculationModel.Name.SNP; public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
/** /**
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily * The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
@ -187,14 +187,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
@Argument(shortName = "min_call_power", fullName = "min_power_threshold_for_calling", doc="The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false) @Argument(shortName = "min_call_power", fullName = "min_power_threshold_for_calling", doc="The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false)
double minPower = 0.95; double minPower = 0.95;
@Hidden
@Argument(shortName = "min_depth", fullName = "min_reference_depth", doc="The minimum depth required in the reference sample in order to make a call.", required = false)
int minReferenceDepth = 100;
@Hidden
@Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;
/** /**
* Create a new UAC with defaults for all UAC arguments * Create a new UAC with defaults for all UAC arguments
*/ */

View File

@ -274,8 +274,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// warn the user for misusing EMIT_ALL_SITES // warn the user for misusing EMIT_ALL_SITES
if ( UAC.outputMode == OutputMode.EMIT_ALL_SITES && if ( UAC.outputMode == OutputMode.EMIT_ALL_SITES &&
UAC.genotypingMode == GenotypingMode.DISCOVERY && UAC.genotypingOutputMode == GenotypingOutputMode.DISCOVERY &&
UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Name.SNP ) UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP )
logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode"); logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
// initialize the verbose writer // initialize the verbose writer

View File

@ -58,10 +58,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.gga.GenotypingGivenAllelesUtils; import org.broadinstitute.sting.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
@ -86,7 +86,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
// the model used for calculating genotypes // the model used for calculating genotypes
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm; private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm;
private final List<GenotypeLikelihoodsCalculationModel.Name> modelsToUse = new ArrayList<>(2); private final List<GenotypeLikelihoodsCalculationModel.Model> modelsToUse = new ArrayList<>(2);
// the various loggers and writers // the various loggers and writers
@ -129,23 +129,25 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration, public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
final VariantAnnotatorEngine annotationEngine, final VariantAnnotatorEngine annotationEngine,
final Set<String> sampleNames, final PrintStream verboseWriter) { final Set<String> sampleNames, final PrintStream verboseWriter) {
super(toolkit,configuration,annotationEngine,sampleNames); super(toolkit,configuration,annotationEngine,sampleNames);
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser(); genomeLocParser = toolkit.getGenomeLocParser();
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
determineGLModelsToUse(); determineGLModelsToUse();
// do argument checking initializeGenotypeLikelihoodsCalculationModels();
if (configuration.annotateAllSitesWithPLs) { }
if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Name.SNP))
throw new IllegalArgumentException("Invalid genotype likelihood model specification: " + /**
"only diploid SNP model can be used in conjunction with option allSitePLs"); * Initialize {@link #glcm}.
} */
private void initializeGenotypeLikelihoodsCalculationModels() {
glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>() {
glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>() {
@Override @Override
protected Map<String,GenotypeLikelihoodsCalculationModel> initialValue() { protected Map<String,GenotypeLikelihoodsCalculationModel> initialValue() {
return getGenotypeLikelihoodsCalculationObject(logger,UnifiedGenotypingEngine.this.configuration); return getGenotypeLikelihoodsCalculationObject(logger,UnifiedGenotypingEngine.this.configuration);
@ -166,20 +168,23 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final AlignmentContext rawContext) { final AlignmentContext rawContext) {
final List<VariantCallContext> results = new ArrayList<>(2); final List<VariantCallContext> results = new ArrayList<>(2);
final List<GenotypeLikelihoodsCalculationModel.Name> models = getGLModelsToUse(tracker, rawContext); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, rawContext);
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<>(); final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<>();
if ( models.isEmpty() ) { final VariantCallContext defaultResult = configuration.outputMode == OutputMode.EMIT_ALL_SITES
results.add(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ? emptyCallContext(tracker, refContext, rawContext) : null); && configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
} ? emptyCallContext(tracker, refContext, rawContext)
: null;
if ( models.isEmpty() )
results.add(defaultResult);
else { else {
for ( final GenotypeLikelihoodsCalculationModel.Name model : models ) { for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
perReadAlleleLikelihoodMap.clear(); perReadAlleleLikelihoodMap.clear();
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model); final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model);
if ( stratifiedContexts == null ) { if ( stratifiedContexts == null )
results.add(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ? emptyCallContext(tracker, refContext, rawContext) : null); results.add(defaultResult);
}
else { else {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap); final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
if ( vc != null ) if ( vc != null )
@ -195,6 +200,28 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
return results; return results;
} }
private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<>();
final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
for (final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass : glmClasses) {
final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
try {
final Object args[] = new Object[]{UAC,logger};
final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
}
catch (Exception e) {
throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage());
}
}
return glcm;
}
/** /**
* Compute GLs at a given locus. Entry point for engine calls from UGCalcLikelihoods. * Compute GLs at a given locus. Entry point for engine calls from UGCalcLikelihoods.
* *
@ -208,12 +235,12 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final ReferenceContext refContext, final ReferenceContext refContext,
final AlignmentContext rawContext, final AlignmentContext rawContext,
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final List<GenotypeLikelihoodsCalculationModel.Name> models = getGLModelsToUse(tracker, rawContext); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, rawContext);
if ( models.isEmpty() ) { if ( models.isEmpty() ) {
return null; return null;
} }
for ( final GenotypeLikelihoodsCalculationModel.Name model : models ) { for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model); final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model);
// return the first valid one we encounter // return the first valid one we encounter
if ( stratifiedContexts != null ) if ( stratifiedContexts != null )
@ -237,13 +264,13 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final ReferenceContext refContext, final ReferenceContext refContext,
final AlignmentContext rawContext, final AlignmentContext rawContext,
final VariantContext vc) { final VariantContext vc) {
final List<GenotypeLikelihoodsCalculationModel.Name> models = getGLModelsToUse(tracker, rawContext); final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, rawContext);
if ( models.isEmpty() ) { if ( models.isEmpty() ) {
return null; return null;
} }
// return the first one // return the first one
final GenotypeLikelihoodsCalculationModel.Name model = models.get(0); final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model); final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model);
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, null); return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, null);
} }
@ -255,7 +282,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
* @return the VariantCallContext object * @return the VariantCallContext object
*/ */
public VariantCallContext calculateGenotypes(VariantContext vc) { public VariantCallContext calculateGenotypes(VariantContext vc) {
return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Name.valueOf("SNP"), null); return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Model.valueOf("SNP"), null);
} }
@ -272,14 +299,14 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final AlignmentContextUtils.ReadOrientation type, final AlignmentContextUtils.ReadOrientation type,
final List<Allele> alternateAllelesToUse, final List<Allele> alternateAllelesToUse,
final boolean useBAQedPileup, final boolean useBAQedPileup,
final GenotypeLikelihoodsCalculationModel.Name model, final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap); return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
} }
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model) { public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
return calculateGenotypes(null, null, null, null, vc, model, null); return calculateGenotypes(null, null, null, null, vc, model, null);
} }
@ -288,20 +315,20 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final AlignmentContext rawContext, final AlignmentContext rawContext,
final Map<String, AlignmentContext> stratifiedContexts, final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final VariantContext vc,
final GenotypeLikelihoodsCalculationModel.Name model, final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false, perReadAlleleLikelihoodMap); return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false, perReadAlleleLikelihoodMap);
} }
@Override @Override
protected boolean forceKeepAllele(final Allele allele) { protected boolean forceKeepAllele(final Allele allele) {
return configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs; return configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs;
} }
@Override @Override
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext, public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext,
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts, final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
final boolean inheritAttributesFromInputVC, final boolean inheritAttributesFromInputVC,
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
@ -326,7 +353,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc, protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc,
final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef, final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef,
final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes, final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
final GenotypeLikelihoodsCalculationModel.Name model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) { final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final Map<String,Object> result = super.composeCallAttributes(inheritAttributesFromInputVC, vc,rawContext,stratifiedContexts,tracker,refContext,alleleCountsofMLE,bestGuessIsRef, final Map<String,Object> result = super.composeCallAttributes(inheritAttributesFromInputVC, vc,rawContext,stratifiedContexts,tracker,refContext,alleleCountsofMLE,bestGuessIsRef,
AFresult,allAllelesToUse,genotypes,model,perReadAlleleLikelihoodMap); AFresult,allAllelesToUse,genotypes,model,perReadAlleleLikelihoodMap);
@ -336,45 +363,62 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
result.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size()); result.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
if ( configuration.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) { if ( configuration.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
final double strandScore = calculateSLOD(stratifiedContexts, tracker, refContext, AFresult, allAllelesToUse, model, perReadAlleleLikelihoodMap);
// the overall lod
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
final VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
final AFCalcResult forwardAFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
final VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
final AFCalcResult reverseAFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
// strand score is max bias between forward and reverse strands
double strandScore = Math.max(forwardLod, reverseLod);
// rescale by a factor of 10
strandScore *= 10.0;
//logger.debug(String.format("SLOD=%f", strandScore));
if ( !Double.isNaN(strandScore) ) if ( !Double.isNaN(strandScore) )
result.put("SB", strandScore); result.put("SB", strandScore);
} }
return result; return result;
} }
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Name model) { private double calculateSLOD(final Map<String, AlignmentContext> stratifiedContexts,
final RefMetaDataTracker tracker,
final ReferenceContext refContext, final AFCalcResult AFresult,
final List<Allele> allAllelesToUse,
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
// the overall lod
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
// the forward lod
final AFCalcResult forwardAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.FORWARD,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
// the reverse lod
final AFCalcResult reverseAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.REVERSE,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
// strand score is max bias between forward and reverse strands
double strandScore = Math.max(forwardLod, reverseLod);
// rescale by a factor of 10
strandScore *= 10.0;
//logger.debug(String.format("SLOD=%f", strandScore));
return strandScore;
}
private AFCalcResult getDirectionalAfCalcResult(final AlignmentContextUtils.ReadOrientation orientation,
final Map<String, AlignmentContext> stratifiedContexts,
final RefMetaDataTracker tracker,
final ReferenceContext refContext, List<Allele> allAllelesToUse,
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation,
allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
}
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
final AlignmentContext rawContext,
final GenotypeLikelihoodsCalculationModel.Model model) {
if ( !BaseUtils.isRegularBase(refContext.getBase()) ) if ( !BaseUtils.isRegularBase(refContext.getBase()) )
return null; return null;
@ -394,7 +438,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
case SNP: case SNP:
case GENERALPLOIDYSNP: case GENERALPLOIDYSNP:
if ( !(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode != GenotypingMode.GENOTYPE_GIVEN_ALLELES) ) { if ( !(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingOutputMode != GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) ) {
int numDeletions = 0; int numDeletions = 0;
for ( final PileupElement p : rawContext.getBasePileup() ) { for ( final PileupElement p : rawContext.getBasePileup() ) {
if ( p.isDeletion() ) if ( p.isDeletion() )
@ -411,7 +455,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
} }
} }
protected void printVerboseData(final String pos, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model) { protected void printVerboseData(final String pos, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
Allele refAllele = null, altAllele = null; Allele refAllele = null, altAllele = null;
for ( Allele allele : vc.getAlleles() ) { for ( Allele allele : vc.getAlleles() ) {
if ( allele.isReference() ) if ( allele.isReference() )
@ -441,26 +485,69 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
verboseWriter.println(); verboseWriter.println();
} }
/**
* Determine the models to be use for genotype likelihood calculation.
*
* <p>
* Whether to use the general ones or the concrete diploid ones need to depend on what the user has said
* in its parameter glm. Iff he selected GENERALPLOIDYINDEL or GENERALPLOIDYSNP is the general set, otherwise
* </p>
*
* the standard set (SNP and INDEL).
* <p>
* Even if the user did not select to use both models, GGA force the inclusion of both: snp and indels.
* </p>
* <p>
* Also, we must fail
* </p>
*
* The models are added to the field {@link #modelsToUse}.
*/
private void determineGLModelsToUse() { private void determineGLModelsToUse() {
String modelPrefix = "";
if ( !configuration.GLmodel.name().contains(GPSTRING) && configuration.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
modelPrefix = GPSTRING;
// GGA mode => must initialize both the SNP and indel models modelsToUse.clear();
if ( configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ||
configuration.GLmodel.name().toUpperCase().contains("BOTH") ) { boolean useGeneral = false;
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + "SNP")); boolean useSNP = false;
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + "INDEL")); boolean useINDEL = false;
}
else { switch (configuration.GLmodel) {
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + configuration.GLmodel.name().toUpperCase())); case BOTH:
useSNP = useINDEL = true; break;
case SNP:
useSNP = true; break;
case INDEL:
useINDEL = true; break;
case GENERALPLOIDYINDEL:
useINDEL = useGeneral = true; break;
case GENERALPLOIDYSNP:
useSNP = useGeneral = true; break;
default: //Paranoia
throw new IllegalStateException("unexpected genotype likelihood model " + configuration.GLmodel);
} }
// Force the use of both models in GGA:
if (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
useSNP = useINDEL = true;
// If annotateAllSitesWithPLs requested , SNP model must be used.
if (!useSNP && configuration.annotateAllSitesWithPLs)
throw new UserException.BadArgumentValue("glm","Invalid genotype likelihood model specification: " +
"only diploid SNP model can be used in conjunction with option allSitePLs");
// Finally add the relevant model
if (useSNP)
modelsToUse.add(useGeneral ? GenotypeLikelihoodsCalculationModel.Model.GENERALPLOIDYSNP :
GenotypeLikelihoodsCalculationModel.Model.SNP);
if (useINDEL)
modelsToUse.add(useGeneral ? GenotypeLikelihoodsCalculationModel.Model.GENERALPLOIDYINDEL :
GenotypeLikelihoodsCalculationModel.Model.INDEL);
} }
// decide whether we are currently processing SNPs, indels, neither, or both // decide whether we are currently processing SNPs, indels, neither, or both
private List<GenotypeLikelihoodsCalculationModel.Name> getGLModelsToUse(final RefMetaDataTracker tracker, private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
final AlignmentContext rawContext) { final AlignmentContext rawContext) {
if ( configuration.genotypingMode != GenotypingMode.GENOTYPE_GIVEN_ALLELES ) if ( configuration.genotypingOutputMode != GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES )
return modelsToUse; return modelsToUse;
if ( modelsToUse.size() != 2 ) if ( modelsToUse.size() != 2 )
@ -480,24 +567,5 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
} }
} }
private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<>();
final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
for (final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass : glmClasses) {
final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
try {
final Object args[] = new Object[]{UAC,logger};
final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
}
catch (Exception e) {
throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage());
}
}
return glcm;
}
} }

View File

@ -130,6 +130,7 @@ import java.util.*;
* <h3>Caveats</h3> * <h3>Caveats</h3>
* <ul> * <ul>
* <li>The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.</li> * <li>The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.</li>
* <li>Currently the -ploidy parameter only support the default 2 (diploid). Eventually one will be able to change its value for haploid and polyploid analyses.</li>
* </ul> * </ul>
* *
* @author rpoplin * @author rpoplin
@ -532,7 +533,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if ( emitReferenceConfidence() ) { if ( emitReferenceConfidence() ) {
if (SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES) if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time"); throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time");
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0; SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
@ -562,7 +563,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class); final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY; simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
simpleUAC.genotypingMode = GenotypingMode.DISCOVERY; simpleUAC.genotypingOutputMode = GenotypingOutputMode.DISCOVERY;
simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
simpleUAC.CONTAMINATION_FRACTION = 0.0; simpleUAC.CONTAMINATION_FRACTION = 0.0;
@ -574,7 +575,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
if( SCAC.CONTAMINATION_FRACTION_FILE != null ) if( SCAC.CONTAMINATION_FRACTION_FILE != null )
SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger)); SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger));
if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES && consensusMode ) if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
// initialize the output VCF header // initialize the output VCF header
@ -653,7 +654,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
} }
trimmer.initialize(getToolkit().getGenomeLocParser(), SCAC.DEBUG, trimmer.initialize(getToolkit().getGenomeLocParser(), SCAC.DEBUG,
SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence()); SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence());
} }
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) { private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
@ -723,7 +724,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
public ActivityProfileState isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { public ActivityProfileState isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), false, logger, SCAC.alleles); final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), false, logger, SCAC.alleles);
if( vcFromAllelesRod != null ) { if( vcFromAllelesRod != null ) {
return new ActivityProfileState(ref.getLocus(), 1.0); return new ActivityProfileState(ref.getLocus(), 1.0);
@ -748,7 +749,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
} }
final List<Allele> alleles = Arrays.asList(FAKE_REF_ALLELE , FAKE_ALT_ALLELE); final List<Allele> alleles = Arrays.asList(FAKE_REF_ALLELE , FAKE_ALT_ALLELE);
final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Name.SNP); final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.SNP);
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() ); return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() );
@ -772,7 +773,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
return referenceModelForNoVariation(originalActiveRegion, true); return referenceModelForNoVariation(originalActiveRegion, true);
final List<VariantContext> givenAlleles = new ArrayList<>(); final List<VariantContext> givenAlleles = new ArrayList<>();
if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) { if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
for ( final VariantContext vc : metaDataTracker.getValues(SCAC.alleles) ) { for ( final VariantContext vc : metaDataTracker.getValues(SCAC.alleles) ) {
if ( vc.isNotFiltered() ) { if ( vc.isNotFiltered() ) {
givenAlleles.add(vc); // do something with these VCs during GGA mode givenAlleles.add(vc); // do something with these VCs during GGA mode

View File

@ -54,7 +54,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode;
import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode; import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
@ -96,13 +96,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
@Override @Override
protected boolean forceKeepAllele(final Allele allele) { protected boolean forceKeepAllele(final Allele allele) {
return allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE || return allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE ||
configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ||
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE; configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE;
} }
@Override @Override
protected boolean forceSiteEmission() { protected boolean forceSiteEmission() {
return configuration.outputMode == OutputMode.EMIT_ALL_SITES || configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES; return configuration.outputMode == OutputMode.EMIT_ALL_SITES || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
} }
/** /**
@ -208,8 +208,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if( mergedVC == null ) { continue; } if( mergedVC == null ) { continue; }
final GenotypeLikelihoodsCalculationModel.Name calculationModel = mergedVC.isSNP() final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
? GenotypeLikelihoodsCalculationModel.Name.SNP : GenotypeLikelihoodsCalculationModel.Name.INDEL; ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
if (emitReferenceConfidence) { if (emitReferenceConfidence) {
final List<Allele> alleleList = new ArrayList<>(); final List<Allele> alleleList = new ArrayList<>();

View File

@ -339,7 +339,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
// TODO -- if we change this tool to actually validate against the called allele, then this if statement is needed; // TODO -- if we change this tool to actually validate against the called allele, then this if statement is needed;
// TODO -- for now, though, we need to be able to validate the right allele (because we only test isVariant below) [EB] // TODO -- for now, though, we need to be able to validate the right allele (because we only test isVariant below) [EB]
//if (!bamIsTruth) //if (!bamIsTruth)
uac.genotypingMode = GenotypingMode.GENOTYPE_GIVEN_ALLELES; uac.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq; if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
if (deletions >= 0) if (deletions >= 0)
@ -349,12 +349,12 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf; if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf; if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Name.SNP; uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac); snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac);
// Adding the INDEL calling arguments for UG // Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone(); UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Name.INDEL; uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel); indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel);
// make sure we have callConf set to the threshold set by the UAC so we can use it later. // make sure we have callConf set to the threshold set by the UAC so we can use it later.

View File

@ -59,7 +59,7 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.sting.utils.variant.HomoSapiens; import org.broadinstitute.sting.utils.variant.HomoSapiensConstants;
import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.vcf.*; import org.broadinstitute.variant.vcf.*;
@ -170,7 +170,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
* *
*/ */
@Argument(fullName="globalPrior",shortName="G",doc="The global Dirichlet prior parameters for the allele frequency",required=false) @Argument(fullName="globalPrior",shortName="G",doc="The global Dirichlet prior parameters for the allele frequency",required=false)
public double globalPrior = HomoSapiens.SNP_HETEROZYGOSITY; public double globalPrior = HomoSapiensConstants.SNP_HETEROZYGOSITY;
/** /**
* When a variant is not seen in a panel, whether to infer (and with what effective strength) that only reference * When a variant is not seen in a panel, whether to infer (and with what effective strength) that only reference

View File

@ -115,9 +115,9 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
public void initialize() { public void initialize() {
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Name.BOTH; UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
UAC.outputMode = OutputMode.EMIT_ALL_SITES; UAC.outputMode = OutputMode.EMIT_ALL_SITES;
UAC.genotypingMode = GenotypingMode.GENOTYPE_GIVEN_ALLELES; UAC.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
String trackName = variantCollection.variants.getName(); String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator; import java.util.Comparator;
import java.util.Random;
/** /**
* BaseUtils contains some basic utilities for manipulating nucleotides. * BaseUtils contains some basic utilities for manipulating nucleotides.
@ -553,6 +554,62 @@ public class BaseUtils {
return getRandomBaseIndex(-1); return getRandomBaseIndex(-1);
} }
/**
* Return random bases.
*
* @param length base count and length of returned array.
*
* @throws IllegalArgumentException if {@code length} is less than 0.
*
* @return never {@code null}
*/
@SuppressWarnings("unused")
public static byte[] getRandomBases(final int length) {
if (length < 0)
throw new IllegalArgumentException("length must zero or greater");
final byte[] result = new byte[length];
fillWithRandomBases(result);
return result;
}
/**
* Fills an array with random bases.
*
* @param dest the array to fill.
*
* @throws IllegalArgumentException if {@code result} is {@code null}.
*/
public static void fillWithRandomBases(final byte[] dest) {
fillWithRandomBases(dest,0,dest.length);
}
/**
* Fill an array section with random bases.
*
* @param dest array to fill.
* @param fromIndex first index to be filled (inclusive).
* @param toIndex index after last to be filled (exclusive).
*
* @throws IllegalArgumentException if {@code dest} is {@code null},
* {@code fromIndex} or {@code toIndex} is negative,
* {@code fromIndex} or {@code toIndex} are greater than {@code dest} length,
* or {@code fromIndex} greater than {@code toIndex}.
*/
public static void fillWithRandomBases(final byte[] dest, final int fromIndex, final int toIndex) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
if (dest == null)
throw new IllegalArgumentException("the dest array cannot be null");
if (fromIndex > toIndex)
throw new IllegalArgumentException("fromIndex cannot be larger than toIndex");
if (fromIndex < 0)
throw new IllegalArgumentException("both indexes must be positive");
if (toIndex > dest.length)
throw new IllegalArgumentException("both indexes must be less or equal to the destination array length");
for (int i = fromIndex; i < toIndex; i++)
dest[i] = baseIndexToSimpleBase(rnd.nextInt(4));
}
/** /**
* Return a random base index, excluding some base index. * Return a random base index, excluding some base index.
* *

View File

@ -44,7 +44,7 @@ public class GATKVariantContextUtils {
private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class); private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class);
public static final int DEFAULT_PLOIDY = HomoSapiens.DEFAULT_PLOIDY; public static final int DEFAULT_PLOIDY = HomoSapiensConstants.DEFAULT_PLOIDY;
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.

View File

@ -28,11 +28,11 @@ package org.broadinstitute.sting.utils.variant;
/** /**
* <i>Homo sapiens</i> genome constants. * <i>Homo sapiens</i> genome constants.
* *
* <p>NOTE: reference to this constants is a indication that your code is (human) species assumption dependant.</p> * <p>NOTE: reference to these constants is an indication that your code is (human) species assumption dependant.</p>
* *
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt; * @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/ */
public class HomoSapiens { public class HomoSapiensConstants {
/** /**
* Standard heterozygous rate for SNP variation. * Standard heterozygous rate for SNP variation.

View File

@ -52,9 +52,6 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
private GenomeLocParser genomeLocParser; private GenomeLocParser genomeLocParser;
@BeforeSuite @BeforeSuite
public void setup() throws IOException { public void setup() throws IOException {
// alleles // alleles
@ -1765,28 +1762,29 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
Assert.assertEquals(GATKVariantContextUtils.overlapsRegion(vc, genomeLoc), expected); Assert.assertEquals(GATKVariantContextUtils.overlapsRegion(vc, genomeLoc), expected);
} }
private final String[] OVERLAP_WITH_CHROMOSOMES = { "chr1", "chr20" };
private final int[] OVERLAP_WITH_EVENT_SIZES = { -10, -1, 0, 1, 10 }; // 0 == SNP , -X xbp deletion, +X xbp insertion.
private final int[] OVERLAP_WITH_EVENT_STARTS = { 10000000, 10000001,
10000005, 10000010,
10000009, 10000011,
20000000 };
@DataProvider(name="overlapWithData") @DataProvider(name="overlapWithData")
public Object[][] overlapWithData() { public Object[][] overlapWithData() {
final String[] chrs = new String[] { "chr1", "chr20" };
final int[] eventSizes = new int[] { -10 , -1 , 0 , 1 , 10 }; // 0 == SNP.
final int[] eventStarts = new int[] { 10000000, 10000001, 10000005, 10000010, 10000009, 10000011, 20000000};
final int totalLocations = chrs.length * eventSizes.length * eventStarts.length + 1; final int totalLocations = OVERLAP_WITH_CHROMOSOMES.length * OVERLAP_WITH_EVENT_SIZES.length * OVERLAP_WITH_EVENT_STARTS.length + 1;
final int totalEvents = chrs.length * eventSizes.length * eventStarts.length; final int totalEvents = OVERLAP_WITH_CHROMOSOMES.length * OVERLAP_WITH_EVENT_SIZES.length * OVERLAP_WITH_EVENT_STARTS.length;
final GenomeLoc[] locs = new GenomeLoc[totalLocations]; final GenomeLoc[] locs = new GenomeLoc[totalLocations];
final VariantContext[] events = new VariantContext[totalEvents]; final VariantContext[] events = new VariantContext[totalEvents];
int nextIndex = 0; generateAllLocationsAndVariantContextCombinations(OVERLAP_WITH_CHROMOSOMES, OVERLAP_WITH_EVENT_SIZES,
for (final String chr : chrs ) OVERLAP_WITH_EVENT_STARTS, locs, events);
for (final int size : eventSizes )
for (final int starts : eventStarts ) {
locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size));
events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList(
Allele.create(randomBases(size <= 0 ? 1 : size + 1,true),true),Allele.create(randomBases(size < 0 ? - size + 1 : 1,false),false))).make();
}
locs[nextIndex++] = GenomeLoc.UNMAPPED; return generateAllParameterCombinationsForOverlapWithData(locs, events);
}
private Object[][] generateAllParameterCombinationsForOverlapWithData(GenomeLoc[] locs, VariantContext[] events) {
final List<Object[]> result = new LinkedList<>(); final List<Object[]> result = new LinkedList<>();
for (final GenomeLoc loc : locs) for (final GenomeLoc loc : locs)
for (final VariantContext event : events) for (final VariantContext event : events)
@ -1795,17 +1793,25 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
return result.toArray(new Object[result.size()][]); return result.toArray(new Object[result.size()][]);
} }
private void generateAllLocationsAndVariantContextCombinations(final String[] chrs, final int[] eventSizes,
final int[] eventStarts, final GenomeLoc[] locs,
final VariantContext[] events) {
int nextIndex = 0;
for (final String chr : chrs )
for (final int size : eventSizes )
for (final int starts : eventStarts ) {
locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size));
events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList(
Allele.create(randomBases(size <= 0 ? 1 : size + 1, true), true), Allele.create(randomBases(size < 0 ? -size + 1 : 1, false), false))).make();
}
locs[nextIndex++] = GenomeLoc.UNMAPPED;
}
private byte[] randomBases(final int length, final boolean reference) { private byte[] randomBases(final int length, final boolean reference) {
final byte[] bases = new byte[length]; final byte[] bases = new byte[length];
bases[0] = (byte) (reference ? 'A' : 'C'); bases[0] = (byte) (reference ? 'A' : 'C');
for (int i = 1; i < bases.length; i++) BaseUtils.fillWithRandomBases(bases, 1, bases.length);
switch (GenomeAnalysisEngine.getRandomGenerator().nextInt(4)) {
case 0: bases[i] = 'A'; break;
case 1: bases[i] = 'C'; break;
case 2: bases[i] = 'G'; break;
default:
bases[i] = 'T';
}
return bases; return bases;
} }
} }