getGenotypes() {
- decode();
- return notToBeDirectlyAccessedGenotypes;
- }
-
- /**
- * Force us to decode the genotypes, if not already done
- */
- public void decode() {
- if ( ! loaded ) {
- //System.out.printf("Loading genotypes... %s:%d%n", contig, start);
- LazyData parsed = parser.parse(unparsedGenotypeData);
- notToBeDirectlyAccessedGenotypes = parsed.genotypes;
- sampleNamesInOrder = parsed.sampleNamesInOrder;
- sampleNameToOffset = parsed.sampleNameToOffset;
- loaded = true;
- unparsedGenotypeData = null; // don't hold the unparsed data any longer
-
- // warning -- this path allows us to create a VariantContext that doesn't run validateGenotypes()
- // That said, it's not such an important routine -- it's just checking that the genotypes
- // are well formed w.r.t. the alleles list, but this will be enforced within the VCFCodec
- }
- }
-
- /**
- * Overrides the ensure* functionality. If the data hasn't been loaded
- * yet and we want to build the cache, just decode it and we're done. If we've
- * already decoded the data, though, go through the super class
- */
- @Override
- protected synchronized void ensureSampleNameMap() {
- if ( ! loaded ) {
- decode(); // will load up all of the necessary data
- } else {
- super.ensureSampleNameMap();
- }
- }
-
- @Override
- protected synchronized void ensureSampleOrdering() {
- if ( ! loaded ) {
- decode(); // will load up all of the necessary data
- } else {
- super.ensureSampleOrdering();
- }
- }
-
- @Override
- protected void invalidateSampleNameMap() {
- // if the cache is invalidated, and we haven't loaded our data yet, do so
- if ( ! loaded ) decode();
- super.invalidateSampleNameMap();
- }
-
- @Override
- protected void invalidateSampleOrdering() {
- // if the cache is invalidated, and we haven't loaded our data yet, do so
- if ( ! loaded ) decode();
- super.invalidateSampleOrdering();
- }
-
- @Override
- public boolean isEmpty() {
- // optimization -- we know the number of samples in the unparsed data, so use it here to
- // avoid parsing just to know if the genotypes context is empty
- return loaded ? super.isEmpty() : nUnparsedGenotypes == 0;
- }
-
- @Override
- public int size() {
- // optimization -- we know the number of samples in the unparsed data, so use it here to
- // avoid parsing just to know the size of the context
- return loaded ? super.size() : nUnparsedGenotypes;
- }
-
- public Object getUnparsedGenotypeData() {
- return unparsedGenotypeData;
- }
-}
diff --git a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java
deleted file mode 100644
index 1fce89431..000000000
--- a/public/java/src/org/broadinstitute/variant/variantcontext/VariantContext.java
+++ /dev/null
@@ -1,1571 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-*
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package org.broadinstitute.variant.variantcontext;
-
-import org.broad.tribble.Feature;
-import org.broad.tribble.TribbleException;
-import org.broad.tribble.util.ParsingUtils;
-import org.broadinstitute.variant.utils.GeneralUtils;
-import org.broadinstitute.variant.vcf.*;
-
-import java.util.*;
-
-/**
- * Class VariantContext
- *
- * == High-level overview ==
- *
- * The VariantContext object is a single general class system for representing genetic variation data composed of:
- *
- * * Allele: representing single genetic haplotypes (A, T, ATC, -)
- * * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus
- * * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes
- * for multiple individuals containing alleles at that locus
- *
- * The class system works by defining segregating alleles, creating a variant context representing the segregating
- * information at a locus, and potentially creating and associating genotypes with individuals in the context.
- *
- * All of the classes are highly validating -- call validate() if you modify them -- so you can rely on the
- * self-consistency of the data once you have a VariantContext in hand. The system has a rich set of assessor
- * and manipulator routines, as well as more complex static support routines in VariantContextUtils.
- *
- * The VariantContext (and Genotype) objects are attributed (supporting addition of arbitrary key/value pairs) and
- * filtered (can represent a variation that is viewed as suspect).
- *
- * VariantContexts are dynamically typed, so whether a VariantContext is a SNP, Indel, or NoVariant depends
- * on the properties of the alleles in the context. See the detailed documentation on the Type parameter below.
- *
- * It's also easy to create subcontexts based on selected genotypes.
- *
- * == Working with Variant Contexts ==
- * By default, VariantContexts are immutable. In order to access (in the rare circumstances where you need them)
- * setter routines, you need to create MutableVariantContexts and MutableGenotypes.
- *
- * === Some example data ===
- *
- * Allele A, Aref, T, Tref;
- * Allele del, delRef, ATC, ATCref;
- *
- * A [ref] / T at 10
- * GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
- *
- * - / ATC [ref] from 20-23
- * GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
- *
- * // - [ref] / ATC immediately after 20
- * GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
- *
- * === Alleles ===
- *
- * See the documentation in the Allele class itself
- *
- * What are they?
- *
- * Alleles can be either reference or non-reference
- *
- * Example alleles used here:
- *
- * del = new Allele("-");
- * A = new Allele("A");
- * Aref = new Allele("A", true);
- * T = new Allele("T");
- * ATC = new Allele("ATC");
- *
- * === Creating variant contexts ===
- *
- * ==== By hand ====
- *
- * Here's an example of a A/T polymorphism with the A being reference:
- *
- *
- * VariantContext vc = new VariantContext(name, snpLoc, Arrays.asList(Aref, T));
- *
- *
- * If you want to create a non-variant site, just put in a single reference allele
- *
- *
- * VariantContext vc = new VariantContext(name, snpLoc, Arrays.asList(Aref));
- *
- *
- * A deletion is just as easy:
- *
- *
- * VariantContext vc = new VariantContext(name, delLoc, Arrays.asList(ATCref, del));
- *
- *
- * The only 2 things that distinguishes between a insertion and deletion are the reference allele
- * and the location of the variation. An insertion has a Null reference allele and at least
- * one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after
- * a 1-bp GenomeLoc (at say 20).
- *
- *
- * VariantContext vc = new VariantContext("name", insLoc, Arrays.asList(delRef, ATC));
- *
- *
- * ==== Converting rods and other data structures to VCs ====
- *
- * You can convert many common types into VariantContexts using the general function:
- *
- *
- * VariantContextAdaptors.convertToVariantContext(name, myObject)
- *
- *
- * dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that
- * object will be returned. A null return type indicates that the type isn't yet supported. This is the best
- * and easiest way to create contexts using RODs.
- *
- *
- * === Working with genotypes ===
- *
- *
- * List alleles = Arrays.asList(Aref, T);
- * Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
- * Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
- * Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
- * VariantContext vc = new VariantContext(snpLoc, alleles, Arrays.asList(g1, g2, g3));
- *
- *
- * At this point we have 3 genotypes in our context, g1-g3.
- *
- * You can assess a good deal of information about the genotypes through the VariantContext:
- *
- *
- * vc.hasGenotypes()
- * vc.isMonomorphicInSamples()
- * vc.isPolymorphicInSamples()
- * vc.getSamples().size()
- *
- * vc.getGenotypes()
- * vc.getGenotypes().get("g1")
- * vc.hasGenotype("g1")
- *
- * vc.getCalledChrCount()
- * vc.getCalledChrCount(Aref)
- * vc.getCalledChrCount(T)
- *
- *
- * === NO_CALL alleles ===
- *
- * The system allows one to create Genotypes carrying special NO_CALL alleles that aren't present in the
- * set of context alleles and that represent undetermined alleles in a genotype:
- *
- * Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "NO_DATA_FOR_SAMPLE", 10);
- *
- *
- * === subcontexts ===
- * It's also very easy get subcontext based only the data in a subset of the genotypes:
- *
- *
- * VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
- * VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
- *
- *
- *
- * Fully decoding. Currently VariantContexts support some fields, particularly those
- * stored as generic attributes, to be of any type. For example, a field AB might
- * be naturally a floating point number, 0.51, but when it's read into a VC its
- * not decoded into the Java presentation but left as a string "0.51". A fully
- * decoded VariantContext is one where all values have been converted to their
- * corresponding Java object types, based on the types declared in a VCFHeader.
- *
- * The fullyDecode() takes a header object and creates a new fully decoded VariantContext
- * where all fields are converted to their true java representation. The VCBuilder
- * can be told that all fields are fully decoded, in which case no work is done when
- * asking for a fully decoded version of the VC.
- *
- *
- * @author depristo
- */
-public class VariantContext implements Feature { // to enable tribble integration
- private final static boolean WARN_ABOUT_BAD_END = true;
- private final static int MAX_ALLELE_SIZE_FOR_NON_SV = 150;
- private boolean fullyDecoded = false;
- protected CommonInfo commonInfo = null;
- public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
-
- public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet());
-
- /** The location of this VariantContext */
- final protected String contig;
- final protected long start;
- final protected long stop;
- private final String ID;
-
- /** The type (cached for performance reasons) of this context */
- protected Type type = null;
-
- /** A set of the alleles segregating in this context */
- final protected List alleles;
-
- /** A mapping from sampleName -> genotype objects for all genotypes associated with this context */
- protected GenotypesContext genotypes = null;
-
- /** Counts for each of the possible Genotype types in this context */
- protected int[] genotypeCounts = null;
-
- public final static GenotypesContext NO_GENOTYPES = GenotypesContext.NO_GENOTYPES;
-
- // a fast cached access point to the ref / alt alleles for biallelic case
- private Allele REF = null;
-
- // set to the alt allele when biallelic, otherwise == null
- private Allele ALT = null;
-
- /* cached monomorphic value: null -> not yet computed, False, True */
- private Boolean monomorphic = null;
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // validation mode
- //
- // ---------------------------------------------------------------------------------------------------------
-
- public enum Validation {
- ALLELES,
- GENOTYPES
- }
-
- private final static EnumSet NO_VALIDATION = EnumSet.noneOf(Validation.class);
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // constructors: see VariantContextBuilder
- //
- // ---------------------------------------------------------------------------------------------------------
-
- /**
- * Copy constructor
- *
- * @param other the VariantContext to copy
- */
- protected VariantContext(VariantContext other) {
- this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
- other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
- other.getFiltersMaybeNull(),
- other.getAttributes(),
- other.fullyDecoded, NO_VALIDATION);
- }
-
- /**
- * the actual constructor. Private access only
- *
- * @param source source
- * @param contig the contig
- * @param start the start base (one based)
- * @param stop the stop reference base (one based)
- * @param alleles alleles
- * @param genotypes genotypes map
- * @param log10PError qual
- * @param filters filters: use null for unfiltered and empty set for passes filters
- * @param attributes attributes
- * @param validationToPerform set of validation steps to take
- */
- protected VariantContext(final String source,
- final String ID,
- final String contig,
- final long start,
- final long stop,
- final Collection alleles,
- final GenotypesContext genotypes,
- final double log10PError,
- final Set filters,
- final Map attributes,
- final boolean fullyDecoded,
- final EnumSet validationToPerform ) {
- if ( contig == null ) { throw new IllegalArgumentException("Contig cannot be null"); }
- this.contig = contig;
- this.start = start;
- this.stop = stop;
-
- // intern for efficiency. equals calls will generate NPE if ID is inappropriately passed in as null
- if ( ID == null || ID.equals("") ) throw new IllegalArgumentException("ID field cannot be the null or the empty string");
- this.ID = ID.equals(VCFConstants.EMPTY_ID_FIELD) ? VCFConstants.EMPTY_ID_FIELD : ID;
-
- this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
-
- if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
-
- // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles
- this.alleles = makeAlleles(alleles);
-
- if ( genotypes == null || genotypes == NO_GENOTYPES ) {
- this.genotypes = NO_GENOTYPES;
- } else {
- this.genotypes = genotypes.immutable();
- }
-
- // cache the REF and ALT alleles
- int nAlleles = alleles.size();
- for ( Allele a : alleles ) {
- if ( a.isReference() ) {
- REF = a;
- } else if ( nAlleles == 2 ) { // only cache ALT when biallelic
- ALT = a;
- }
- }
-
- this.fullyDecoded = fullyDecoded;
-
- if ( ! validationToPerform.isEmpty() ) {
- validate(validationToPerform);
- }
- }
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // Selectors
- //
- // ---------------------------------------------------------------------------------------------------------
-
- /**
- * This method subsets down to a set of samples.
- *
- * At the same time returns the alleles to just those in use by the samples,
- * if rederiveAllelesFromGenotypes is true, otherwise the full set of alleles
- * in this VC is returned as the set of alleles in the subContext, even if
- * some of those alleles aren't in the samples
- *
- * WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
- *
- * @param sampleNames the sample names
- * @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
- * @return new VariantContext subsetting to just the given samples
- */
- public VariantContext subContextFromSamples(Set sampleNames, final boolean rederiveAllelesFromGenotypes ) {
- if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
- return this; // fast path when you don't have any work to do
- } else {
- VariantContextBuilder builder = new VariantContextBuilder(this);
- GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
-
- if ( rederiveAllelesFromGenotypes )
- builder.alleles(allelesOfGenotypes(newGenotypes));
- else {
- builder.alleles(alleles);
- }
-
- return builder.genotypes(newGenotypes).make();
- }
- }
-
- /**
- * @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
- *
- * @param sampleNames
- * @return
- */
- public VariantContext subContextFromSamples(final Set sampleNames) {
- return subContextFromSamples(sampleNames, true);
- }
-
- public VariantContext subContextFromSample(String sampleName) {
- return subContextFromSamples(Collections.singleton(sampleName));
- }
-
- /**
- * helper routine for subcontext
- * @param genotypes genotypes
- * @return allele set
- */
- private final Set allelesOfGenotypes(Collection genotypes) {
- final Set alleles = new HashSet();
-
- boolean addedref = false;
- for ( final Genotype g : genotypes ) {
- for ( final Allele a : g.getAlleles() ) {
- addedref = addedref || a.isReference();
- if ( a.isCalled() )
- alleles.add(a);
- }
- }
- if ( ! addedref ) alleles.add(getReference());
-
- return alleles;
- }
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // type operations
- //
- // ---------------------------------------------------------------------------------------------------------
-
- /**
- * see: http://www.ncbi.nlm.nih.gov/bookshelf/br.fcgi?book=handbook&part=ch5&rendertype=table&id=ch5.ch5_t3
- *
- * Format:
- * dbSNP variation class
- * Rules for assigning allele classes
- * Sample allele definition
- *
- * Single Nucleotide Polymorphisms (SNPs)a
- * Strictly defined as single base substitutions involving A, T, C, or G.
- * A/T
- *
- * Deletion/Insertion Polymorphisms (DIPs)
- * Designated using the full sequence of the insertion as one allele, and either a fully
- * defined string for the variant allele or a '-' character to specify the deleted allele.
- * This class will be assigned to a variation if the variation alleles are of different lengths or
- * if one of the alleles is deleted ('-').
- * T/-/CCTA/G
- *
- * No-variation
- * Reports may be submitted for segments of sequence that are assayed and determined to be invariant
- * in the sample.
- * (NoVariation)
- *
- * Mixed
- * Mix of other classes
- *
- * Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population
- *
- *
- * Not currently supported:
- *
- * Heterozygous sequence
- * The term heterozygous is used to specify a region detected by certain methods that do not
- * resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
- * sequence must be provided to define a sequence context for the variation.
- * (heterozygous)
- *
- * Microsatellite or short tandem repeat (STR)
- * Alleles are designated by providing the repeat motif and the copy number for each allele.
- * Expansion of the allele repeat motif designated in dbSNP into full-length sequence will
- * be only an approximation of the true genomic sequence because many microsatellite markers are
- * not fully sequenced and are resolved as size variants only.
- * (CAC)8/9/10/11
- *
- * Named variant
- * Applies to insertion/deletion polymorphisms of longer sequence features, such as retroposon
- * dimorphism for Alu or line elements. These variations frequently include a deletion '-' indicator
- * for the absent allele.
- * (alu) / -
- *
- * Multi-Nucleotide Polymorphism (MNP)
- * Assigned to variations that are multi-base variations of a single, common length
- * GGA/AGT
- */
- public enum Type {
- NO_VARIATION,
- SNP,
- MNP, // a multi-nucleotide polymorphism
- INDEL,
- SYMBOLIC,
- MIXED,
- }
-
- /**
- * Determines (if necessary) and returns the type of this variation by examining the alleles it contains.
- *
- * @return the type of this VariantContext
- **/
- public Type getType() {
- if ( type == null )
- determineType();
-
- return type;
- }
-
- /**
- * convenience method for SNPs
- *
- * @return true if this is a SNP, false otherwise
- */
- public boolean isSNP() { return getType() == Type.SNP; }
-
-
- /**
- * convenience method for variants
- *
- * @return true if this is a variant allele, false if it's reference
- */
- public boolean isVariant() { return getType() != Type.NO_VARIATION; }
-
- /**
- * convenience method for point events
- *
- * @return true if this is a SNP or ref site, false if it's an indel or mixed event
- */
- public boolean isPointEvent() { return isSNP() || !isVariant(); }
-
- /**
- * convenience method for indels
- *
- * @return true if this is an indel, false otherwise
- */
- public boolean isIndel() { return getType() == Type.INDEL; }
-
- /**
- * @return true if the alleles indicate a simple insertion (i.e., the reference allele is Null)
- */
- public boolean isSimpleInsertion() {
- // can't just call !isSimpleDeletion() because of complex indels
- return getType() == Type.INDEL && isBiallelic() && getReference().length() == 1;
- }
-
- /**
- * @return true if the alleles indicate a simple deletion (i.e., a single alt allele that is Null)
- */
- public boolean isSimpleDeletion() {
- // can't just call !isSimpleInsertion() because of complex indels
- return getType() == Type.INDEL && isBiallelic() && getAlternateAllele(0).length() == 1;
- }
-
- /**
- * @return true if the alleles indicate neither a simple deletion nor a simple insertion
- */
- public boolean isComplexIndel() {
- return isIndel() && !isSimpleDeletion() && !isSimpleInsertion();
- }
-
- public boolean isSymbolic() {
- return getType() == Type.SYMBOLIC;
- }
-
- public boolean isStructuralIndel() {
- if ( getType() == Type.INDEL ) {
- List sizes = getIndelLengths();
- if ( sizes != null ) {
- for ( Integer length : sizes ) {
- if ( length > MAX_ALLELE_SIZE_FOR_NON_SV ) {
- return true;
- }
- }
- }
- }
- return false;
- }
-
- /**
- *
- * @return true if the variant is symbolic or a large indel
- */
- public boolean isSymbolicOrSV() {
- return isSymbolic() || isStructuralIndel();
- }
-
- public boolean isMNP() {
- return getType() == Type.MNP;
- }
-
- /**
- * convenience method for indels
- *
- * @return true if this is an mixed variation, false otherwise
- */
- public boolean isMixed() { return getType() == Type.MIXED; }
-
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // Generic accessors
- //
- // ---------------------------------------------------------------------------------------------------------
-
- public boolean hasID() {
- return getID() != VCFConstants.EMPTY_ID_FIELD;
- }
-
- public boolean emptyID() {
- return ! hasID();
- }
-
- public String getID() {
- return ID;
- }
-
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // get routines to access context info fields
- //
- // ---------------------------------------------------------------------------------------------------------
- public String getSource() { return commonInfo.getName(); }
- public Set getFiltersMaybeNull() { return commonInfo.getFiltersMaybeNull(); }
- public Set getFilters() { return commonInfo.getFilters(); }
- public boolean isFiltered() { return commonInfo.isFiltered(); }
- public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
- public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
- public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
- public double getLog10PError() { return commonInfo.getLog10PError(); }
- public double getPhredScaledQual() { return commonInfo.getPhredScaledQual(); }
-
- public Map getAttributes() { return commonInfo.getAttributes(); }
- public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
- public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
-
- public Object getAttribute(String key, Object defaultValue) {
- return commonInfo.getAttribute(key, defaultValue);
- }
-
- public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
- public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
- public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
- public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
-
- public CommonInfo getCommonInfo() {
- return commonInfo;
- }
-
- // ---------------------------------------------------------------------------------------------------------
- //
- // Working with alleles
- //
- // ---------------------------------------------------------------------------------------------------------
-
- /**
- * @return the reference allele for this context
- */
- public Allele getReference() {
- Allele ref = REF;
- if ( ref == null )
- throw new IllegalStateException("BUG: no reference allele found at " + this);
- return ref;
- }
-
-
- /**
- * @return true if the context is strictly bi-allelic
- */
- public boolean isBiallelic() {
- return getNAlleles() == 2;
- }
-
- /**
- * @return The number of segregating alleles in this context
- */
- public int getNAlleles() {
- return alleles.size();
- }
-
- /**
- * Returns the maximum ploidy of all samples in this VC, or default if there are no genotypes
- *
- * This function is caching, so it's only expensive on the first call
- *
- * @param defaultPloidy the default ploidy, if all samples are no-called
- * @return default, or the max ploidy
- */
- public int getMaxPloidy(final int defaultPloidy) {
- return genotypes.getMaxPloidy(defaultPloidy);
- }
-
- /**
- * @return The allele sharing the same bases as this String. A convenience method; better to use byte[]
- */
- public Allele getAllele(String allele) {
- return getAllele(allele.getBytes());
- }
-
- /**
- * @return The allele sharing the same bases as this byte[], or null if no such allele is present.
- */
- public Allele getAllele(byte[] allele) {
- return Allele.getMatchingAllele(getAlleles(), allele);
- }
-
- /**
- * @return True if this context contains Allele allele, or false otherwise
- */
- public boolean hasAllele(final Allele allele) {
- return hasAllele(allele, false, true);
- }
-
- public boolean hasAllele(final Allele allele, final boolean ignoreRefState) {
- return hasAllele(allele, ignoreRefState, true);
- }
-
- public boolean hasAlternateAllele(final Allele allele) {
- return hasAllele(allele, false, false);
- }
-
- public boolean hasAlternateAllele(final Allele allele, final boolean ignoreRefState) {
- return hasAllele(allele, ignoreRefState, false);
- }
-
- private boolean hasAllele(final Allele allele, final boolean ignoreRefState, final boolean considerRefAllele) {
- if ( (considerRefAllele && allele == REF) || allele == ALT ) // optimization for cached cases
- return true;
-
- final List allelesToConsider = considerRefAllele ? getAlleles() : getAlternateAlleles();
- for ( Allele a : allelesToConsider ) {
- if ( a.equals(allele, ignoreRefState) )
- return true;
- }
-
- return false;
- }
-
-
- /**
- * Gets the alleles. This method should return all of the alleles present at the location,
- * including the reference allele. There are no constraints imposed on the ordering of alleles
- * in the set. If the reference is not an allele in this context it will not be included.
- *
- * @return the set of alleles
- */
- public List getAlleles() { return alleles; }
-
- /**
- * Gets the alternate alleles. This method should return all the alleles present at the location,
- * NOT including the reference allele. There are no constraints imposed on the ordering of alleles
- * in the set.
- *
- * @return the set of alternate alleles
- */
- public List