Addressed revisions
This commit is contained in:
parent
08203b516e
commit
7455ac9796
|
|
@ -47,10 +47,11 @@
|
|||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory;
|
||||
import org.broadinstitute.sting.utils.collections.DefaultHashMap;
|
||||
import org.broadinstitute.sting.utils.variant.HomoSapiens;
|
||||
import org.broadinstitute.sting.utils.variant.HomoSapiensConstants;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -98,16 +99,16 @@ public class StandardCallerArgumentCollection implements Cloneable {
|
|||
* which determines how many chromosomes each individual in the species carries.
|
||||
*/
|
||||
@Argument(fullName = "heterozygosity", shortName = "hets", doc = "Heterozygosity value used to compute prior likelihoods for any locus. See the GATKDocs for full details on the meaning of this population genetics concept", required = false)
|
||||
public Double snpHeterozygosity = HomoSapiens.SNP_HETEROZYGOSITY;
|
||||
public Double snpHeterozygosity = HomoSapiensConstants.SNP_HETEROZYGOSITY;
|
||||
|
||||
/**
|
||||
* This argument informs the prior probability of having an indel at a site.
|
||||
*/
|
||||
@Argument(fullName = "indel_heterozygosity", shortName = "indelHeterozygosity", doc = "Heterozygosity for indel calling. See the GATKDocs for heterozygosity for full details on the meaning of this population genetics concept", required = false)
|
||||
public double indelHeterozygosity = HomoSapiens.INDEL_HETEROZYGOSITY;
|
||||
public double indelHeterozygosity = HomoSapiensConstants.INDEL_HETEROZYGOSITY;
|
||||
|
||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||
public org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode genotypingMode = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode.DISCOVERY;
|
||||
public GenotypingOutputMode genotypingOutputMode = GenotypingOutputMode.DISCOVERY;
|
||||
|
||||
/**
|
||||
* The minimum phred-scaled Qscore threshold to separate high confidence from low confidence calls. Only genotypes with
|
||||
|
|
@ -215,7 +216,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
|
|||
* Sample ploidy - equivalent to number of chromosomes per pool. In pooled experiments this should be = # of samples in pool * individual sample ploidy
|
||||
*/
|
||||
@Argument(shortName="ploidy", fullName="sample_ploidy", doc="Ploidy (number of chromosomes) per sample. For pooled data, set to (Number of samples in each pool * Sample Ploidy).", required=false)
|
||||
public int samplePloidy = HomoSapiens.DEFAULT_PLOIDY;
|
||||
public int samplePloidy = HomoSapiensConstants.DEFAULT_PLOIDY;
|
||||
|
||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||
public OutputMode outputMode = OutputMode.EMIT_VARIANTS_ONLY;
|
||||
|
|
@ -232,7 +233,6 @@ public class StandardCallerArgumentCollection implements Cloneable {
|
|||
@Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false)
|
||||
public boolean annotateAllSitesWithPLs = false;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a Standard caller argument collection with default values.
|
||||
*/
|
||||
|
|
@ -257,12 +257,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
|
|||
if (!field.getDeclaringClass().isAssignableFrom(clazz))
|
||||
continue;
|
||||
final int fieldModifiers = field.getModifiers();
|
||||
if (Modifier.isPrivate((fieldModifiers)))
|
||||
continue;
|
||||
if (Modifier.isFinal(fieldModifiers))
|
||||
continue;
|
||||
if (Modifier.isStatic(fieldModifiers))
|
||||
continue;
|
||||
if ((fieldModifiers & UNCOPYABLE_MODIFIER_MASK) != 0) continue;
|
||||
field.set(result,field.get(this));
|
||||
}
|
||||
return result;
|
||||
|
|
@ -283,4 +278,10 @@ public class StandardCallerArgumentCollection implements Cloneable {
|
|||
throw new IllegalStateException("unreachable code");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds a modifiers mask that identifies those fields that cannot be copied between
|
||||
* StandardCallerArgumentCollections.
|
||||
*/
|
||||
private final int UNCOPYABLE_MODIFIER_MASK = Modifier.PRIVATE | Modifier.STATIC | Modifier.FINAL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ public class GeneralPloidySNPGenotypeLikelihoodsCalculationModel extends General
|
|||
final List<Allele> alleles = new ArrayList<Allele>();
|
||||
if ( allAllelesToUse != null ) {
|
||||
alleles.addAll(allAllelesToUse);
|
||||
} else if ( UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
} else if ( UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext vc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), true, logger, UAC.alleles);
|
||||
|
||||
// ignore places where we don't have a SNP
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ public abstract class GenotypeLikelihoodsCalculationModel {
|
|||
public static final String DUMMY_LANE = "Lane1";
|
||||
public static final String DUMMY_SAMPLE_NAME = "DummySample1";
|
||||
|
||||
public enum Name {
|
||||
public enum Model {
|
||||
SNP,
|
||||
INDEL,
|
||||
GENERALPLOIDYSNP,
|
||||
|
|
|
|||
|
|
@ -141,6 +141,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
|
||||
/**
|
||||
* Changes the logger for this genotyper engine.
|
||||
*
|
||||
* @param logger new logger.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code logger} is {@code null}.
|
||||
|
|
@ -153,6 +154,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
|
||||
/**
|
||||
* Returns a reference to the engine configuration
|
||||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public Config getConfiguration() {
|
||||
|
|
@ -160,19 +162,22 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
}
|
||||
|
||||
/**
|
||||
* Completes a variant context with genotype calls and associated annotations give the genotype likelihoods and
|
||||
* Completes a variant context with genotype calls and associated annotations given the genotype likelihoods and
|
||||
* the model that need to be applied.
|
||||
*
|
||||
* @param vc variant-context to complete.
|
||||
* @param modelName model name.
|
||||
* @param model model name.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code model} or {@code vc} is {@code null}.
|
||||
*
|
||||
* @return can be {@code null} indicating that genotyping it not possible with the information provided.
|
||||
*/
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GeneralPloidySNPGenotypeLikelihoodsCalculationModel.Name modelName) {
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
if (vc == null)
|
||||
throw new IllegalArgumentException("vc cannot be null");
|
||||
if (modelName == null)
|
||||
if (model == null)
|
||||
throw new IllegalArgumentException("the model cannot be null");
|
||||
return calculateGenotypes(null,null,null,null,vc,modelName,false,null);
|
||||
return calculateGenotypes(null,null,null,null,vc,model,false,null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -189,7 +194,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
*/
|
||||
protected VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model,
|
||||
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final boolean inheritAttributesFromInputVC,
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
|
|
@ -207,7 +212,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
|
||||
double log10Confidence =
|
||||
! outputAlternativeAlleles.siteIsMonomorphic ||
|
||||
configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs
|
||||
configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs
|
||||
? AFresult.getLog10PosteriorOfAFEq0() + 0.0
|
||||
: AFresult.getLog10PosteriorOfAFGT0() + 0.0;
|
||||
|
||||
|
|
@ -227,8 +232,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), loc.getContig(), loc.getStart(), loc.getStop(), outputAlleles);
|
||||
|
||||
// Seems that when log10PError is 0.0, you must pass -0.0 to get a nice output at the other end otherwise is a "-0".
|
||||
// Truth is that this should be fixed in the "variant" dependency code but perhaps it can be ammeneded also in the VariantContextWriter.
|
||||
//TODO report this bug or make sure our VCFWriters do not output '-0' QUAL column values.
|
||||
// Truth is that this should be fixed in the "variant" dependency code but perhaps it can be amended also in the VariantContextWriter.
|
||||
//TODO Please remove this comment when this has been fixed (PT https://www.pivotaltracker.com/story/show/69492530)
|
||||
//TODO and change the code below accordingly.
|
||||
builder.log10PError(log10Confidence == 0.0 ? -0.0 : log10Confidence);
|
||||
if ( ! passesCallThreshold(phredScaledConfidence) )
|
||||
builder.filter(LOW_QUAL_FILTER_NAME);
|
||||
|
|
@ -348,7 +354,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
*/
|
||||
protected final boolean confidentlyCalled(final double conf, final double PofF) {
|
||||
return conf >= configuration.STANDARD_CONFIDENCE_FOR_CALLING ||
|
||||
(configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES
|
||||
(configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
|
||||
&& QualityUtils.phredScaleErrorRate(PofF) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING);
|
||||
}
|
||||
|
||||
|
|
@ -358,7 +364,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
*
|
||||
* @return a valid heterozygosity in (0,1).
|
||||
*/
|
||||
private double getModelTheta(final GenotypeLikelihoodsCalculationModel.Name model) {
|
||||
private double getModelTheta(final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
switch (model) {
|
||||
case SNP:
|
||||
case GENERALPLOIDYSNP:
|
||||
|
|
@ -402,7 +408,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
* @param ref the reference context.
|
||||
* @param rawContext the read alignment at that location.
|
||||
* @return it might be null if no enough information is provided to do even an empty call. For example when
|
||||
* we have limmited-context (i.e. any of the tracker, reference or alignment is {@code null}.
|
||||
* we have limited-context (i.e. any of the tracker, reference or alignment is {@code null}.
|
||||
*/
|
||||
protected final VariantCallContext emptyCallContext(final RefMetaDataTracker tracker, final ReferenceContext ref,
|
||||
final AlignmentContext rawContext) {
|
||||
|
|
@ -414,7 +420,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
|
||||
VariantContext vc;
|
||||
|
||||
if ( configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker,
|
||||
rawContext.getLocation(), false, logger, configuration.alleles);
|
||||
if (ggaVc == null)
|
||||
|
|
@ -468,7 +474,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
return new VariantCallContext(vc, QualityUtils.phredScaleLog10CorrectRate(log10POfRef) >= configuration.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||
}
|
||||
|
||||
protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Name model ) {
|
||||
protected final double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
switch (model) {
|
||||
case SNP:
|
||||
case GENERALPLOIDYSNP:
|
||||
|
|
@ -488,16 +494,30 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
*
|
||||
* @param depth the depth of the sample
|
||||
* @param theta the heterozygosity of this species (between 0 and 1)
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code depth} is less than 0 or {@code theta} is not in the (0,1) range.
|
||||
*
|
||||
* @return a valid log10 probability of the sample being hom-ref
|
||||
*/
|
||||
@Requires({"depth >= 0", "theta >= 0.0 && theta <= 1.0"})
|
||||
@Ensures("MathUtils.goodLog10Probability(result)")
|
||||
protected final double estimateLog10ReferenceConfidenceForOneSample(final int depth, final double theta) {
|
||||
if (theta <= 0 || theta >= 1) throw new IllegalArgumentException("theta must be greater than 0 and less than 1");
|
||||
final double log10PofNonRef = Math.log10(theta / 2.0) + getRefBinomialProbLog10(depth);
|
||||
return MathUtils.log10OneMinusX(Math.pow(10.0, log10PofNonRef));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the hom-reference binomial log10 probability given the depth.
|
||||
*
|
||||
* @param depth the query depth.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code depth} is less than 0.
|
||||
*
|
||||
* @return a valid log10 probability between 0 and {@link Double#NEGATIVE_INFINITY}.
|
||||
*/
|
||||
protected final double getRefBinomialProbLog10(final int depth) {
|
||||
if (depth < 0)
|
||||
throw new IllegalArgumentException("depth cannot be less than 0");
|
||||
return MathUtils.log10BinomialProbability(depth, 0);
|
||||
}
|
||||
|
||||
|
|
@ -601,37 +621,39 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc,
|
||||
final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef,
|
||||
final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
|
||||
final GenotypeLikelihoodsCalculationModel.Name model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final HashMap<String, Object> attributes = new HashMap<>();
|
||||
|
||||
|
||||
final boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||
|
||||
|
||||
// inherit attributed from input vc if requested
|
||||
// inherit attributes from input vc if requested
|
||||
if (inheritAttributesFromInputVC)
|
||||
attributes.putAll(vc.getAttributes());
|
||||
// if the site was downsampled, record that fact
|
||||
// if the site was down-sampled, record that fact
|
||||
if ( !limitedContext && rawContext.hasPileupBeenDownsampled() )
|
||||
attributes.put(VCFConstants.DOWNSAMPLED_KEY, true);
|
||||
|
||||
|
||||
// add the MLE AC and AF annotations
|
||||
if ( alleleCountsofMLE.size() > 0 ) {
|
||||
attributes.put(VCFConstants.MLE_ALLELE_COUNT_KEY, alleleCountsofMLE);
|
||||
int AN = 0;
|
||||
for (final Genotype g : genotypes) {
|
||||
for (final Allele a : g.getAlleles())
|
||||
if (!a.isNoCall()) AN++;
|
||||
}
|
||||
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
|
||||
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
|
||||
for ( int AC : alleleCountsofMLE )
|
||||
MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
|
||||
final ArrayList<Double> MLEfrequencies = calculateMLEAlleleFrequencies(alleleCountsofMLE, genotypes);
|
||||
attributes.put(VCFConstants.MLE_ALLELE_FREQUENCY_KEY, MLEfrequencies);
|
||||
}
|
||||
|
||||
return attributes;
|
||||
}
|
||||
|
||||
private ArrayList<Double> calculateMLEAlleleFrequencies(List<Integer> alleleCountsofMLE, GenotypesContext genotypes) {
|
||||
int AN = 0;
|
||||
for (final Genotype g : genotypes)
|
||||
for (final Allele a : g.getAlleles())
|
||||
if (!a.isNoCall()) AN++;
|
||||
|
||||
final ArrayList<Double> MLEfrequencies = new ArrayList<Double>(alleleCountsofMLE.size());
|
||||
// the MLEAC is allowed to be larger than the AN (e.g. in the case of all PLs being 0, the GT is ./. but the exact model may arbitrarily choose an AC>1)
|
||||
for (final int AC : alleleCountsofMLE )
|
||||
MLEfrequencies.add(Math.min(1.0, (double)AC / (double)AN));
|
||||
return MLEfrequencies;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
*
|
||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||
*/
|
||||
public enum GenotypingMode {
|
||||
public enum GenotypingOutputMode {
|
||||
|
||||
/**
|
||||
* The genotyper will choose the most likely alternate allele
|
||||
|
|
@ -215,7 +215,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
|||
final boolean ignoreSNPAllelesWhenGenotypingIndels) {
|
||||
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
if (UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES) {
|
||||
if (UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) {
|
||||
VariantContext vc = null;
|
||||
for (final VariantContext vc_input : tracker.getValues(UAC.alleles, ref.getLocus())) {
|
||||
if (vc_input != null &&
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
|||
|
||||
protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||
super(UAC, logger);
|
||||
useAlleleFromVCF = UAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES;
|
||||
useAlleleFromVCF = UAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
|
||||
perReadAlleleLikelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
|
|||
public class UnifiedArgumentCollection extends StandardCallerArgumentCollection {
|
||||
|
||||
@Argument(fullName = "genotype_likelihoods_model", shortName = "glm", doc = "Genotype likelihoods calculation model to employ -- SNP is the default option, while INDEL is also available for calling indels and BOTH is available for calling both together", required = false)
|
||||
public GenotypeLikelihoodsCalculationModel.Name GLmodel = GenotypeLikelihoodsCalculationModel.Name.SNP;
|
||||
public GenotypeLikelihoodsCalculationModel.Model GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
|
||||
/**
|
||||
* The PCR error rate is independent of the sequencing error rate, which is necessary because we cannot necessarily
|
||||
|
|
@ -187,14 +187,6 @@ public class UnifiedArgumentCollection extends StandardCallerArgumentCollection
|
|||
@Argument(shortName = "min_call_power", fullName = "min_power_threshold_for_calling", doc="The minimum confidence in the error model to make a call. Number should be between 0 (no power requirement) and 1 (maximum power required).", required = false)
|
||||
double minPower = 0.95;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName = "min_depth", fullName = "min_reference_depth", doc="The minimum depth required in the reference sample in order to make a call.", required = false)
|
||||
int minReferenceDepth = 100;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName="ef", fullName="exclude_filtered_reference_sites", doc="Don't include in the analysis sites where the reference sample VCF is filtered. Default: false.", required=false)
|
||||
boolean EXCLUDE_FILTERED_REFERENCE_SITES = false;
|
||||
|
||||
/**
|
||||
* Create a new UAC with defaults for all UAC arguments
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -274,8 +274,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
|
||||
// warn the user for misusing EMIT_ALL_SITES
|
||||
if ( UAC.outputMode == OutputMode.EMIT_ALL_SITES &&
|
||||
UAC.genotypingMode == GenotypingMode.DISCOVERY &&
|
||||
UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Name.SNP )
|
||||
UAC.genotypingOutputMode == GenotypingOutputMode.DISCOVERY &&
|
||||
UAC.GLmodel != GenotypeLikelihoodsCalculationModel.Model.SNP )
|
||||
logger.warn("WARNING: note that the EMIT_ALL_SITES option is intended only for point mutations (SNPs) in DISCOVERY mode or generally when running in GENOTYPE_GIVEN_ALLELES mode; it will by no means produce a comprehensive set of indels in DISCOVERY mode");
|
||||
|
||||
// initialize the verbose writer
|
||||
|
|
|
|||
|
|
@ -58,10 +58,10 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.gga.GenotypingGivenAllelesUtils;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
|
@ -86,7 +86,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
|
||||
// the model used for calculating genotypes
|
||||
private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm;
|
||||
private final List<GenotypeLikelihoodsCalculationModel.Name> modelsToUse = new ArrayList<>(2);
|
||||
private final List<GenotypeLikelihoodsCalculationModel.Model> modelsToUse = new ArrayList<>(2);
|
||||
|
||||
|
||||
// the various loggers and writers
|
||||
|
|
@ -129,23 +129,25 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
|
||||
final VariantAnnotatorEngine annotationEngine,
|
||||
final Set<String> sampleNames, final PrintStream verboseWriter) {
|
||||
|
||||
super(toolkit,configuration,annotationEngine,sampleNames);
|
||||
|
||||
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
|
||||
genomeLocParser = toolkit.getGenomeLocParser();
|
||||
// note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ
|
||||
|
||||
this.verboseWriter = verboseWriter;
|
||||
|
||||
determineGLModelsToUse();
|
||||
|
||||
// do argument checking
|
||||
if (configuration.annotateAllSitesWithPLs) {
|
||||
if (!modelsToUse.contains(GenotypeLikelihoodsCalculationModel.Name.SNP))
|
||||
throw new IllegalArgumentException("Invalid genotype likelihood model specification: " +
|
||||
"only diploid SNP model can be used in conjunction with option allSitePLs");
|
||||
}
|
||||
initializeGenotypeLikelihoodsCalculationModels();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize {@link #glcm}.
|
||||
*/
|
||||
private void initializeGenotypeLikelihoodsCalculationModels() {
|
||||
glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>() {
|
||||
|
||||
glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>() {
|
||||
@Override
|
||||
protected Map<String,GenotypeLikelihoodsCalculationModel> initialValue() {
|
||||
return getGenotypeLikelihoodsCalculationObject(logger,UnifiedGenotypingEngine.this.configuration);
|
||||
|
|
@ -166,20 +168,23 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
final AlignmentContext rawContext) {
|
||||
final List<VariantCallContext> results = new ArrayList<>(2);
|
||||
|
||||
final List<GenotypeLikelihoodsCalculationModel.Name> models = getGLModelsToUse(tracker, rawContext);
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, rawContext);
|
||||
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap = new HashMap<>();
|
||||
|
||||
if ( models.isEmpty() ) {
|
||||
results.add(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ? emptyCallContext(tracker, refContext, rawContext) : null);
|
||||
}
|
||||
final VariantCallContext defaultResult = configuration.outputMode == OutputMode.EMIT_ALL_SITES
|
||||
&& configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
|
||||
? emptyCallContext(tracker, refContext, rawContext)
|
||||
: null;
|
||||
|
||||
if ( models.isEmpty() )
|
||||
results.add(defaultResult);
|
||||
else {
|
||||
for ( final GenotypeLikelihoodsCalculationModel.Name model : models ) {
|
||||
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
||||
perReadAlleleLikelihoodMap.clear();
|
||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model);
|
||||
if ( stratifiedContexts == null ) {
|
||||
results.add(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ? emptyCallContext(tracker, refContext, rawContext) : null);
|
||||
}
|
||||
if ( stratifiedContexts == null )
|
||||
results.add(defaultResult);
|
||||
else {
|
||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
|
||||
if ( vc != null )
|
||||
|
|
@ -195,6 +200,28 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
return results;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
|
||||
final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<>();
|
||||
final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
|
||||
|
||||
for (final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass : glmClasses) {
|
||||
final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
|
||||
try {
|
||||
final Object args[] = new Object[]{UAC,logger};
|
||||
final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
|
||||
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return glcm;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute GLs at a given locus. Entry point for engine calls from UGCalcLikelihoods.
|
||||
*
|
||||
|
|
@ -208,12 +235,12 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final List<GenotypeLikelihoodsCalculationModel.Name> models = getGLModelsToUse(tracker, rawContext);
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, rawContext);
|
||||
if ( models.isEmpty() ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
for ( final GenotypeLikelihoodsCalculationModel.Name model : models ) {
|
||||
for ( final GenotypeLikelihoodsCalculationModel.Model model : models ) {
|
||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model);
|
||||
// return the first valid one we encounter
|
||||
if ( stratifiedContexts != null )
|
||||
|
|
@ -237,13 +264,13 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final VariantContext vc) {
|
||||
final List<GenotypeLikelihoodsCalculationModel.Name> models = getGLModelsToUse(tracker, rawContext);
|
||||
final List<GenotypeLikelihoodsCalculationModel.Model> models = getGLModelsToUse(tracker, rawContext);
|
||||
if ( models.isEmpty() ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// return the first one
|
||||
final GenotypeLikelihoodsCalculationModel.Name model = models.get(0);
|
||||
final GenotypeLikelihoodsCalculationModel.Model model = models.get(0);
|
||||
final Map<String, AlignmentContext> stratifiedContexts = getFilteredAndStratifiedContexts(refContext, rawContext, model);
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, null);
|
||||
}
|
||||
|
|
@ -255,7 +282,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
* @return the VariantCallContext object
|
||||
*/
|
||||
public VariantCallContext calculateGenotypes(VariantContext vc) {
|
||||
return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Name.valueOf("SNP"), null);
|
||||
return calculateGenotypes(null, null, null, null, vc, GenotypeLikelihoodsCalculationModel.Model.valueOf("SNP"), null);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -272,14 +299,14 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
final AlignmentContextUtils.ReadOrientation type,
|
||||
final List<Allele> alternateAllelesToUse,
|
||||
final boolean useBAQedPileup,
|
||||
final GenotypeLikelihoodsCalculationModel.Name model,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
|
||||
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model) {
|
||||
public VariantCallContext calculateGenotypes(final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
return calculateGenotypes(null, null, null, null, vc, model, null);
|
||||
}
|
||||
|
||||
|
|
@ -288,20 +315,20 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
final AlignmentContext rawContext,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final GenotypeLikelihoodsCalculationModel.Name model,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
return calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, false, perReadAlleleLikelihoodMap);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean forceKeepAllele(final Allele allele) {
|
||||
return configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs;
|
||||
return configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs;
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantCallContext calculateGenotypes(final RefMetaDataTracker tracker, final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext, Map<String, AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model,
|
||||
final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final boolean inheritAttributesFromInputVC,
|
||||
final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null;
|
||||
|
|
@ -326,7 +353,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
protected Map<String,Object> composeCallAttributes(final boolean inheritAttributesFromInputVC, final VariantContext vc,
|
||||
final AlignmentContext rawContext, final Map<String, AlignmentContext> stratifiedContexts, final RefMetaDataTracker tracker, final ReferenceContext refContext, final List<Integer> alleleCountsofMLE, final boolean bestGuessIsRef,
|
||||
final AFCalcResult AFresult, final List<Allele> allAllelesToUse, final GenotypesContext genotypes,
|
||||
final GenotypeLikelihoodsCalculationModel.Name model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final GenotypeLikelihoodsCalculationModel.Model model, final Map<String, org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final Map<String,Object> result = super.composeCallAttributes(inheritAttributesFromInputVC, vc,rawContext,stratifiedContexts,tracker,refContext,alleleCountsofMLE,bestGuessIsRef,
|
||||
AFresult,allAllelesToUse,genotypes,model,perReadAlleleLikelihoodMap);
|
||||
|
||||
|
|
@ -336,45 +363,62 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
result.put(NUMBER_OF_DISCOVERED_ALLELES_KEY, vc.getAlternateAlleles().size());
|
||||
|
||||
if ( configuration.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) {
|
||||
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
final VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
final AFCalcResult forwardAFresult = afcm.get().getLog10PNonRef(vcForward, getAlleleFrequencyPriors(model));
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
|
||||
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
final VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
final AFCalcResult reverseAFresult = afcm.get().getLog10PNonRef(vcReverse, getAlleleFrequencyPriors(model));
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(AFresult.log10AlleleFrequencyPosteriors, true);
|
||||
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
|
||||
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
|
||||
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
|
||||
// strand score is max bias between forward and reverse strands
|
||||
double strandScore = Math.max(forwardLod, reverseLod);
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
|
||||
final double strandScore = calculateSLOD(stratifiedContexts, tracker, refContext, AFresult, allAllelesToUse, model, perReadAlleleLikelihoodMap);
|
||||
if ( !Double.isNaN(strandScore) )
|
||||
result.put("SB", strandScore);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Name model) {
|
||||
private double calculateSLOD(final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext, final AFCalcResult AFresult,
|
||||
final List<Allele> allAllelesToUse,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
// the overall lod
|
||||
//double overallLog10PofNull = AFresult.log10AlleleFrequencyPosteriors[0];
|
||||
final double overallLog10PofF = AFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
final AFCalcResult forwardAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.FORWARD,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
|
||||
final double forwardLog10PofNull = forwardAFresult.getLog10LikelihoodOfAFEq0();
|
||||
final double forwardLog10PofF = forwardAFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF);
|
||||
|
||||
// the reverse lod
|
||||
final AFCalcResult reverseAFresult = getDirectionalAfCalcResult(AlignmentContextUtils.ReadOrientation.REVERSE,stratifiedContexts, tracker, refContext, allAllelesToUse, model, perReadAlleleLikelihoodMap);
|
||||
final double reverseLog10PofNull = reverseAFresult.getLog10LikelihoodOfAFEq0();
|
||||
final double reverseLog10PofF = reverseAFresult.getLog10LikelihoodOfAFGT0();
|
||||
//if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF);
|
||||
|
||||
final double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF;
|
||||
final double reverseLod = reverseLog10PofF + forwardLog10PofNull - overallLog10PofF;
|
||||
//if ( DEBUG_SLOD ) System.out.println("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
|
||||
|
||||
// strand score is max bias between forward and reverse strands
|
||||
double strandScore = Math.max(forwardLod, reverseLod);
|
||||
// rescale by a factor of 10
|
||||
strandScore *= 10.0;
|
||||
//logger.debug(String.format("SLOD=%f", strandScore));
|
||||
return strandScore;
|
||||
}
|
||||
|
||||
private AFCalcResult getDirectionalAfCalcResult(final AlignmentContextUtils.ReadOrientation orientation,
|
||||
final Map<String, AlignmentContext> stratifiedContexts,
|
||||
final RefMetaDataTracker tracker,
|
||||
final ReferenceContext refContext, List<Allele> allAllelesToUse,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
|
||||
final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts,orientation,
|
||||
allAllelesToUse, false, model, perReadAlleleLikelihoodMap);
|
||||
return afcm.get().getLog10PNonRef(vc, getAlleleFrequencyPriors(model));
|
||||
}
|
||||
|
||||
private Map<String, AlignmentContext> getFilteredAndStratifiedContexts(final ReferenceContext refContext,
|
||||
final AlignmentContext rawContext,
|
||||
final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
|
||||
if ( !BaseUtils.isRegularBase(refContext.getBase()) )
|
||||
return null;
|
||||
|
|
@ -394,7 +438,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
case SNP:
|
||||
case GENERALPLOIDYSNP:
|
||||
|
||||
if ( !(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingMode != GenotypingMode.GENOTYPE_GIVEN_ALLELES) ) {
|
||||
if ( !(configuration.outputMode == OutputMode.EMIT_ALL_SITES && configuration.genotypingOutputMode != GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES) ) {
|
||||
int numDeletions = 0;
|
||||
for ( final PileupElement p : rawContext.getBasePileup() ) {
|
||||
if ( p.isDeletion() )
|
||||
|
|
@ -411,7 +455,7 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
}
|
||||
}
|
||||
|
||||
protected void printVerboseData(final String pos, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Name model) {
|
||||
protected void printVerboseData(final String pos, final VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) {
|
||||
Allele refAllele = null, altAllele = null;
|
||||
for ( Allele allele : vc.getAlleles() ) {
|
||||
if ( allele.isReference() )
|
||||
|
|
@ -441,26 +485,69 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
verboseWriter.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the models to be use for genotype likelihood calculation.
|
||||
*
|
||||
* <p>
|
||||
* Whether to use the general ones or the concrete diploid ones need to depend on what the user has said
|
||||
* in its parameter glm. Iff he selected GENERALPLOIDYINDEL or GENERALPLOIDYSNP is the general set, otherwise
|
||||
* </p>
|
||||
*
|
||||
* the standard set (SNP and INDEL).
|
||||
* <p>
|
||||
* Even if the user did not select to use both models, GGA force the inclusion of both: snp and indels.
|
||||
* </p>
|
||||
* <p>
|
||||
* Also, we must fail
|
||||
* </p>
|
||||
*
|
||||
* The models are added to the field {@link #modelsToUse}.
|
||||
*/
|
||||
private void determineGLModelsToUse() {
|
||||
String modelPrefix = "";
|
||||
if ( !configuration.GLmodel.name().contains(GPSTRING) && configuration.samplePloidy != GATKVariantContextUtils.DEFAULT_PLOIDY )
|
||||
modelPrefix = GPSTRING;
|
||||
|
||||
// GGA mode => must initialize both the SNP and indel models
|
||||
if ( configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ||
|
||||
configuration.GLmodel.name().toUpperCase().contains("BOTH") ) {
|
||||
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + "SNP"));
|
||||
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + "INDEL"));
|
||||
}
|
||||
else {
|
||||
modelsToUse.add(GenotypeLikelihoodsCalculationModel.Name.valueOf(modelPrefix + configuration.GLmodel.name().toUpperCase()));
|
||||
modelsToUse.clear();
|
||||
|
||||
boolean useGeneral = false;
|
||||
boolean useSNP = false;
|
||||
boolean useINDEL = false;
|
||||
|
||||
switch (configuration.GLmodel) {
|
||||
case BOTH:
|
||||
useSNP = useINDEL = true; break;
|
||||
case SNP:
|
||||
useSNP = true; break;
|
||||
case INDEL:
|
||||
useINDEL = true; break;
|
||||
case GENERALPLOIDYINDEL:
|
||||
useINDEL = useGeneral = true; break;
|
||||
case GENERALPLOIDYSNP:
|
||||
useSNP = useGeneral = true; break;
|
||||
default: //Paranoia
|
||||
throw new IllegalStateException("unexpected genotype likelihood model " + configuration.GLmodel);
|
||||
}
|
||||
|
||||
// Force the use of both models in GGA:
|
||||
if (configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
|
||||
useSNP = useINDEL = true;
|
||||
|
||||
// If annotateAllSitesWithPLs requested , SNP model must be used.
|
||||
if (!useSNP && configuration.annotateAllSitesWithPLs)
|
||||
throw new UserException.BadArgumentValue("glm","Invalid genotype likelihood model specification: " +
|
||||
"only diploid SNP model can be used in conjunction with option allSitePLs");
|
||||
|
||||
// Finally add the relevant model
|
||||
if (useSNP)
|
||||
modelsToUse.add(useGeneral ? GenotypeLikelihoodsCalculationModel.Model.GENERALPLOIDYSNP :
|
||||
GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
if (useINDEL)
|
||||
modelsToUse.add(useGeneral ? GenotypeLikelihoodsCalculationModel.Model.GENERALPLOIDYINDEL :
|
||||
GenotypeLikelihoodsCalculationModel.Model.INDEL);
|
||||
}
|
||||
|
||||
// decide whether we are currently processing SNPs, indels, neither, or both
|
||||
private List<GenotypeLikelihoodsCalculationModel.Name> getGLModelsToUse(final RefMetaDataTracker tracker,
|
||||
private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
|
||||
final AlignmentContext rawContext) {
|
||||
if ( configuration.genotypingMode != GenotypingMode.GENOTYPE_GIVEN_ALLELES )
|
||||
if ( configuration.genotypingOutputMode != GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES )
|
||||
return modelsToUse;
|
||||
|
||||
if ( modelsToUse.size() != 2 )
|
||||
|
|
@ -480,24 +567,5 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
|
|||
}
|
||||
}
|
||||
|
||||
private static Map<String, GenotypeLikelihoodsCalculationModel> getGenotypeLikelihoodsCalculationObject(Logger logger, UnifiedArgumentCollection UAC) {
|
||||
|
||||
final Map<String, GenotypeLikelihoodsCalculationModel> glcm = new HashMap<>();
|
||||
final List<Class<? extends GenotypeLikelihoodsCalculationModel>> glmClasses = new PluginManager<GenotypeLikelihoodsCalculationModel>(GenotypeLikelihoodsCalculationModel.class).getPlugins();
|
||||
|
||||
for (final Class<? extends GenotypeLikelihoodsCalculationModel> glmClass : glmClasses) {
|
||||
final String key = glmClass.getSimpleName().replaceAll("GenotypeLikelihoodsCalculationModel","").toUpperCase();
|
||||
try {
|
||||
final Object args[] = new Object[]{UAC,logger};
|
||||
final Constructor c = glmClass.getDeclaredConstructor(UnifiedArgumentCollection.class, Logger.class);
|
||||
glcm.put(key, (GenotypeLikelihoodsCalculationModel)c.newInstance(args));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new UserException("The likelihoods model provided for the -glm argument (" + UAC.GLmodel + ") is not a valid option: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
return glcm;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ import java.util.*;
|
|||
* <h3>Caveats</h3>
|
||||
* <ul>
|
||||
* <li>The system is under active and continuous development. All outputs, the underlying likelihood model, and command line arguments are likely to change often.</li>
|
||||
* <li>Currently the -ploidy parameter only support the default 2 (diploid). Eventually one will be able to change its value for haploid and polyploid analyses.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @author rpoplin
|
||||
|
|
@ -532,7 +533,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
|
||||
if ( emitReferenceConfidence() ) {
|
||||
|
||||
if (SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES)
|
||||
if (SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES)
|
||||
throw new UserException.BadArgumentValue("ERC/gt_mode","you cannot request reference confidence output and Genotyping Giving Alleles at the same time");
|
||||
|
||||
SCAC.STANDARD_CONFIDENCE_FOR_EMITTING = -0.0;
|
||||
|
|
@ -562,7 +563,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
|
||||
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
|
||||
simpleUAC.outputMode = OutputMode.EMIT_VARIANTS_ONLY;
|
||||
simpleUAC.genotypingMode = GenotypingMode.DISCOVERY;
|
||||
simpleUAC.genotypingOutputMode = GenotypingOutputMode.DISCOVERY;
|
||||
simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, SCAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
|
||||
simpleUAC.CONTAMINATION_FRACTION = 0.0;
|
||||
|
|
@ -574,7 +575,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
|
||||
SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger));
|
||||
|
||||
if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
|
||||
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
|
||||
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
|
||||
|
||||
// initialize the output VCF header
|
||||
|
|
@ -653,7 +654,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
|
||||
trimmer.initialize(getToolkit().getGenomeLocParser(), SCAC.DEBUG,
|
||||
SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence());
|
||||
SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence());
|
||||
}
|
||||
|
||||
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
|
||||
|
|
@ -723,7 +724,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
|
||||
public ActivityProfileState isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) {
|
||||
|
||||
if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(tracker, ref.getLocus(), false, logger, SCAC.alleles);
|
||||
if( vcFromAllelesRod != null ) {
|
||||
return new ActivityProfileState(ref.getLocus(), 1.0);
|
||||
|
|
@ -748,7 +749,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
|
||||
final List<Allele> alleles = Arrays.asList(FAKE_REF_ALLELE , FAKE_ALT_ALLELE);
|
||||
final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Name.SNP);
|
||||
final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
|
||||
|
||||
return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() );
|
||||
|
|
@ -772,7 +773,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
return referenceModelForNoVariation(originalActiveRegion, true);
|
||||
|
||||
final List<VariantContext> givenAlleles = new ArrayList<>();
|
||||
if( SCAC.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
|
||||
for ( final VariantContext vc : metaDataTracker.getValues(SCAC.alleles) ) {
|
||||
if ( vc.isNotFiltered() ) {
|
||||
givenAlleles.add(vc); // do something with these VCs during GGA mode
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingEngine;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypingOutputMode;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.OutputMode;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
|
@ -96,13 +96,13 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
@Override
|
||||
protected boolean forceKeepAllele(final Allele allele) {
|
||||
return allele == GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE ||
|
||||
configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES ||
|
||||
configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ||
|
||||
configuration.emitReferenceConfidence != ReferenceConfidenceMode.NONE;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean forceSiteEmission() {
|
||||
return configuration.outputMode == OutputMode.EMIT_ALL_SITES || configuration.genotypingMode == GenotypingMode.GENOTYPE_GIVEN_ALLELES;
|
||||
return configuration.outputMode == OutputMode.EMIT_ALL_SITES || configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -208,8 +208,8 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
|
|||
|
||||
if( mergedVC == null ) { continue; }
|
||||
|
||||
final GenotypeLikelihoodsCalculationModel.Name calculationModel = mergedVC.isSNP()
|
||||
? GenotypeLikelihoodsCalculationModel.Name.SNP : GenotypeLikelihoodsCalculationModel.Name.INDEL;
|
||||
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
|
||||
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
|
||||
if (emitReferenceConfidence) {
|
||||
final List<Allele> alleleList = new ArrayList<>();
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
// TODO -- if we change this tool to actually validate against the called allele, then this if statement is needed;
|
||||
// TODO -- for now, though, we need to be able to validate the right allele (because we only test isVariant below) [EB]
|
||||
//if (!bamIsTruth)
|
||||
uac.genotypingMode = GenotypingMode.GENOTYPE_GIVEN_ALLELES;
|
||||
uac.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
|
||||
|
||||
if (mbq >= 0) uac.MIN_BASE_QUALTY_SCORE = mbq;
|
||||
if (deletions >= 0)
|
||||
|
|
@ -349,12 +349,12 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
|
|||
if (emitConf >= 0) uac.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
|
||||
if (callConf >= 0) uac.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
|
||||
|
||||
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Name.SNP;
|
||||
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
|
||||
snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac);
|
||||
|
||||
// Adding the INDEL calling arguments for UG
|
||||
UnifiedArgumentCollection uac_indel = uac.clone();
|
||||
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Name.INDEL;
|
||||
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
|
||||
indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel);
|
||||
|
||||
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
|||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.sting.utils.variant.HomoSapiens;
|
||||
import org.broadinstitute.sting.utils.variant.HomoSapiensConstants;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -170,7 +170,7 @@ public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
|||
*
|
||||
*/
|
||||
@Argument(fullName="globalPrior",shortName="G",doc="The global Dirichlet prior parameters for the allele frequency",required=false)
|
||||
public double globalPrior = HomoSapiens.SNP_HETEROZYGOSITY;
|
||||
public double globalPrior = HomoSapiensConstants.SNP_HETEROZYGOSITY;
|
||||
|
||||
/**
|
||||
* When a variant is not seen in a panel, whether to infer (and with what effective strength) that only reference
|
||||
|
|
|
|||
|
|
@ -115,9 +115,9 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
|
|||
|
||||
public void initialize() {
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Name.BOTH;
|
||||
UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH;
|
||||
UAC.outputMode = OutputMode.EMIT_ALL_SITES;
|
||||
UAC.genotypingMode = GenotypingMode.GENOTYPE_GIVEN_ALLELES;
|
||||
UAC.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
|
||||
|
||||
String trackName = variantCollection.variants.getName();
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.Random;
|
||||
|
||||
/**
|
||||
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
||||
|
|
@ -553,6 +554,62 @@ public class BaseUtils {
|
|||
return getRandomBaseIndex(-1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return random bases.
|
||||
*
|
||||
* @param length base count and length of returned array.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code length} is less than 0.
|
||||
*
|
||||
* @return never {@code null}
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public static byte[] getRandomBases(final int length) {
|
||||
if (length < 0)
|
||||
throw new IllegalArgumentException("length must zero or greater");
|
||||
final byte[] result = new byte[length];
|
||||
fillWithRandomBases(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills an array with random bases.
|
||||
*
|
||||
* @param dest the array to fill.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code result} is {@code null}.
|
||||
*/
|
||||
public static void fillWithRandomBases(final byte[] dest) {
|
||||
fillWithRandomBases(dest,0,dest.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill an array section with random bases.
|
||||
*
|
||||
* @param dest array to fill.
|
||||
* @param fromIndex first index to be filled (inclusive).
|
||||
* @param toIndex index after last to be filled (exclusive).
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code dest} is {@code null},
|
||||
* {@code fromIndex} or {@code toIndex} is negative,
|
||||
* {@code fromIndex} or {@code toIndex} are greater than {@code dest} length,
|
||||
* or {@code fromIndex} greater than {@code toIndex}.
|
||||
*/
|
||||
public static void fillWithRandomBases(final byte[] dest, final int fromIndex, final int toIndex) {
|
||||
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
|
||||
if (dest == null)
|
||||
throw new IllegalArgumentException("the dest array cannot be null");
|
||||
if (fromIndex > toIndex)
|
||||
throw new IllegalArgumentException("fromIndex cannot be larger than toIndex");
|
||||
if (fromIndex < 0)
|
||||
throw new IllegalArgumentException("both indexes must be positive");
|
||||
if (toIndex > dest.length)
|
||||
throw new IllegalArgumentException("both indexes must be less or equal to the destination array length");
|
||||
|
||||
for (int i = fromIndex; i < toIndex; i++)
|
||||
dest[i] = baseIndexToSimpleBase(rnd.nextInt(4));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a random base index, excluding some base index.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class GATKVariantContextUtils {
|
|||
|
||||
private static Logger logger = Logger.getLogger(GATKVariantContextUtils.class);
|
||||
|
||||
public static final int DEFAULT_PLOIDY = HomoSapiens.DEFAULT_PLOIDY;
|
||||
public static final int DEFAULT_PLOIDY = HomoSapiensConstants.DEFAULT_PLOIDY;
|
||||
|
||||
public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||
|
||||
|
|
|
|||
|
|
@ -28,11 +28,11 @@ package org.broadinstitute.sting.utils.variant;
|
|||
/**
|
||||
* <i>Homo sapiens</i> genome constants.
|
||||
*
|
||||
* <p>NOTE: reference to this constants is a indication that your code is (human) species assumption dependant.</p>
|
||||
* <p>NOTE: reference to these constants is an indication that your code is (human) species assumption dependant.</p>
|
||||
*
|
||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||
*/
|
||||
public class HomoSapiens {
|
||||
public class HomoSapiensConstants {
|
||||
|
||||
/**
|
||||
* Standard heterozygous rate for SNP variation.
|
||||
|
|
@ -52,9 +52,6 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
|
||||
|
||||
|
||||
@BeforeSuite
|
||||
public void setup() throws IOException {
|
||||
// alleles
|
||||
|
|
@ -1765,28 +1762,29 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(GATKVariantContextUtils.overlapsRegion(vc, genomeLoc), expected);
|
||||
}
|
||||
|
||||
|
||||
private final String[] OVERLAP_WITH_CHROMOSOMES = { "chr1", "chr20" };
|
||||
private final int[] OVERLAP_WITH_EVENT_SIZES = { -10, -1, 0, 1, 10 }; // 0 == SNP , -X xbp deletion, +X xbp insertion.
|
||||
private final int[] OVERLAP_WITH_EVENT_STARTS = { 10000000, 10000001,
|
||||
10000005, 10000010,
|
||||
10000009, 10000011,
|
||||
20000000 };
|
||||
|
||||
@DataProvider(name="overlapWithData")
|
||||
public Object[][] overlapWithData() {
|
||||
final String[] chrs = new String[] { "chr1", "chr20" };
|
||||
final int[] eventSizes = new int[] { -10 , -1 , 0 , 1 , 10 }; // 0 == SNP.
|
||||
final int[] eventStarts = new int[] { 10000000, 10000001, 10000005, 10000010, 10000009, 10000011, 20000000};
|
||||
|
||||
final int totalLocations = chrs.length * eventSizes.length * eventStarts.length + 1;
|
||||
final int totalEvents = chrs.length * eventSizes.length * eventStarts.length;
|
||||
final int totalLocations = OVERLAP_WITH_CHROMOSOMES.length * OVERLAP_WITH_EVENT_SIZES.length * OVERLAP_WITH_EVENT_STARTS.length + 1;
|
||||
final int totalEvents = OVERLAP_WITH_CHROMOSOMES.length * OVERLAP_WITH_EVENT_SIZES.length * OVERLAP_WITH_EVENT_STARTS.length;
|
||||
final GenomeLoc[] locs = new GenomeLoc[totalLocations];
|
||||
final VariantContext[] events = new VariantContext[totalEvents];
|
||||
|
||||
int nextIndex = 0;
|
||||
for (final String chr : chrs )
|
||||
for (final int size : eventSizes )
|
||||
for (final int starts : eventStarts ) {
|
||||
locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size));
|
||||
events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList(
|
||||
Allele.create(randomBases(size <= 0 ? 1 : size + 1,true),true),Allele.create(randomBases(size < 0 ? - size + 1 : 1,false),false))).make();
|
||||
}
|
||||
generateAllLocationsAndVariantContextCombinations(OVERLAP_WITH_CHROMOSOMES, OVERLAP_WITH_EVENT_SIZES,
|
||||
OVERLAP_WITH_EVENT_STARTS, locs, events);
|
||||
|
||||
locs[nextIndex++] = GenomeLoc.UNMAPPED;
|
||||
return generateAllParameterCombinationsForOverlapWithData(locs, events);
|
||||
}
|
||||
|
||||
private Object[][] generateAllParameterCombinationsForOverlapWithData(GenomeLoc[] locs, VariantContext[] events) {
|
||||
final List<Object[]> result = new LinkedList<>();
|
||||
for (final GenomeLoc loc : locs)
|
||||
for (final VariantContext event : events)
|
||||
|
|
@ -1795,17 +1793,25 @@ public class GATKVariantContextUtilsUnitTest extends BaseTest {
|
|||
return result.toArray(new Object[result.size()][]);
|
||||
}
|
||||
|
||||
private void generateAllLocationsAndVariantContextCombinations(final String[] chrs, final int[] eventSizes,
|
||||
final int[] eventStarts, final GenomeLoc[] locs,
|
||||
final VariantContext[] events) {
|
||||
int nextIndex = 0;
|
||||
for (final String chr : chrs )
|
||||
for (final int size : eventSizes )
|
||||
for (final int starts : eventStarts ) {
|
||||
locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size));
|
||||
events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList(
|
||||
Allele.create(randomBases(size <= 0 ? 1 : size + 1, true), true), Allele.create(randomBases(size < 0 ? -size + 1 : 1, false), false))).make();
|
||||
}
|
||||
|
||||
locs[nextIndex++] = GenomeLoc.UNMAPPED;
|
||||
}
|
||||
|
||||
private byte[] randomBases(final int length, final boolean reference) {
|
||||
final byte[] bases = new byte[length];
|
||||
bases[0] = (byte) (reference ? 'A' : 'C');
|
||||
for (int i = 1; i < bases.length; i++)
|
||||
switch (GenomeAnalysisEngine.getRandomGenerator().nextInt(4)) {
|
||||
case 0: bases[i] = 'A'; break;
|
||||
case 1: bases[i] = 'C'; break;
|
||||
case 2: bases[i] = 'G'; break;
|
||||
default:
|
||||
bases[i] = 'T';
|
||||
}
|
||||
BaseUtils.fillWithRandomBases(bases, 1, bases.length);
|
||||
return bases;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue