Documentation and contracts for GenotypesContext and VariantContextBuilder

This commit is contained in:
Mark DePristo 2011-11-18 13:59:30 -05:00
parent f54afc19b4
commit 660d6009a2
3 changed files with 332 additions and 37 deletions

View File

@ -296,6 +296,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
if (parts.length > NUM_STANDARD_FIELDS) {
builder.attribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, new String(parts[8]));
builder.attribute(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY, this);
builder.genotypesAreUnparsed();
}
VariantContext vc = null;

View File

@ -24,19 +24,41 @@
package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import java.util.*;
/**
*
* Represents an ordered collection of Genotype objects
*/
public class GenotypesContext implements List<Genotype> {
/**
* static constant value for an empty GenotypesContext. Useful since so many VariantContexts have no genotypes
*/
public final static GenotypesContext NO_GENOTYPES =
new GenotypesContext(new ArrayList<Genotype>(0), new HashMap<String, Integer>(0), Collections.<String>emptyList(), true);
/**
*sampleNamesInOrder a list of sample names, one for each genotype in genotypes, sorted in alphabetical order
*/
List<String> sampleNamesInOrder = null;
/**
* a map optimized for efficient lookup. Each genotype in genotypes must have its
* sample name in sampleNameToOffset, with a corresponding integer value that indicates the offset of that
* genotype in the vector of genotypes
*/
Map<String, Integer> sampleNameToOffset = null;
/** if true, then we need to reinitialize sampleNamesInOrder and sampleNameToOffset before we use them /*/
boolean cacheIsInvalid = true;
/** An ArrayList of genotypes contained in this context */
List<Genotype> genotypes;
/** Are we allowing users to modify the list? */
boolean immutable = false;
// ---------------------------------------------------------------------------
@ -45,14 +67,25 @@ public class GenotypesContext implements List<Genotype> {
//
// ---------------------------------------------------------------------------
/**
* Create an empty GenotypeContext
*/
private GenotypesContext() {
this(10, false);
}
/**
* Create an empty GenotypeContext, with initial capacity for n elements
*/
@Requires("n >= 0")
private GenotypesContext(final int n, final boolean immutable) {
this(new ArrayList<Genotype>(n), immutable);
}
/**
* Create an GenotypeContext containing genotypes
*/
@Requires("genotypes != null")
private GenotypesContext(final ArrayList<Genotype> genotypes, final boolean immutable) {
this.genotypes = genotypes;
this.immutable = immutable;
@ -60,6 +93,23 @@ public class GenotypesContext implements List<Genotype> {
this.cacheIsInvalid = true;
}
/**
* Create a fully resolved GenotypeContext containing genotypes, sample lookup table,
* and sorted sample names
*
* @param genotypes our genotypes in arbitrary
* @param sampleNameToOffset map optimized for efficient lookup. Each genotype in genotypes must have its
* sample name in sampleNameToOffset, with a corresponding integer value that indicates the offset of that
* genotype in the vector of genotypes
* @param sampleNamesInOrder a list of sample names, one for each genotype in genotypes, sorted in alphabetical
* order.
* @param immutable
*/
@Requires({"genotypes != null",
"sampleNameToOffset != null",
"sampleNamesInOrder != null",
"genotypes.size() == sampleNameToOffset.size()",
"genotypes.size() == sampleNamesInOrder.size()"})
private GenotypesContext(final ArrayList<Genotype> genotypes,
final Map<String, Integer> sampleNameToOffset,
final List<String> sampleNamesInOrder,
@ -77,54 +127,98 @@ public class GenotypesContext implements List<Genotype> {
//
// ---------------------------------------------------------------------------
/**
* Basic creation routine
* @return an empty, mutable GenotypeContext
*/
@Ensures({"result != null"})
public static final GenotypesContext create() {
return new GenotypesContext();
}
/**
* Basic creation routine
* @return an empty, mutable GenotypeContext with initial capacity for nGenotypes
*/
@Requires("nGenotypes >= 0")
@Ensures({"result != null"})
public static final GenotypesContext create(final int nGenotypes) {
return new GenotypesContext(nGenotypes, false);
}
/**
* Create a fully resolved GenotypeContext containing genotypes, sample lookup table,
* and sorted sample names
*
* @param genotypes our genotypes in arbitrary
* @param sampleNameToOffset map optimized for efficient lookup. Each genotype in genotypes must have its
* sample name in sampleNameToOffset, with a corresponding integer value that indicates the offset of that
* genotype in the vector of genotypes
* @param sampleNamesInOrder a list of sample names, one for each genotype in genotypes, sorted in alphabetical
* order.
* @return an mutable GenotypeContext containing genotypes with already present lookup data
*/
@Requires({"genotypes != null",
"sampleNameToOffset != null",
"sampleNamesInOrder != null",
"sameSamples(genotypes, sampleNamesInOrder)",
"sameSamples(genotypes, sampleNameToOffset.keySet())"})
@Ensures({"result != null"})
public static final GenotypesContext create(final ArrayList<Genotype> genotypes,
final Map<String, Integer> sampleNameToOffset,
final List<String> sampleNamesInOrder) {
return new GenotypesContext(genotypes, sampleNameToOffset, sampleNamesInOrder, false);
}
/**
* Create a fully resolved GenotypeContext containing genotypes
*
* @param genotypes our genotypes in arbitrary
* @return an mutable GenotypeContext containing genotypes
*/
@Requires({"genotypes != null"})
@Ensures({"result != null"})
public static final GenotypesContext create(final ArrayList<Genotype> genotypes) {
return genotypes == null ? NO_GENOTYPES : new GenotypesContext(genotypes, false);
}
/**
* Create a fully resolved GenotypeContext containing genotypes
*
* @param genotypes our genotypes in arbitrary
* @return an mutable GenotypeContext containing genotypes
*/
@Requires({"genotypes != null"})
@Ensures({"result != null"})
public static final GenotypesContext create(final Genotype... genotypes) {
return new GenotypesContext(new ArrayList<Genotype>(Arrays.asList(genotypes)), false);
}
/**
* Create a freshly allocated GenotypeContext containing the genotypes in toCopy
*
* @param toCopy the GenotypesContext to copy
* @return an mutable GenotypeContext containing genotypes
*/
@Requires({"toCopy != null"})
@Ensures({"result != null"})
public static final GenotypesContext copy(final GenotypesContext toCopy) {
return create(new ArrayList<Genotype>(toCopy.genotypes));
}
/**
* Create a GenotypesContext containing the genotypes in iteration order contained
* in toCopy
*
* @param toCopy the collection of genotypes
* @return an mutable GenotypeContext containing genotypes
*/
@Requires({"toCopy != null"})
@Ensures({"result != null"})
public static final GenotypesContext copy(final Collection<Genotype> toCopy) {
return toCopy == null ? NO_GENOTYPES : create(new ArrayList<Genotype>(toCopy));
}
// public static final GenotypeMap create(final Collection<Genotype> genotypes) {
// if ( genotypes == null )
// return null; // todo -- really should return an empty map
// else {
// GenotypeMap genotypeMap = new GenotypeMap(genotypes.size(), false);
// for ( final Genotype g : genotypes ) {
// if ( genotypeMap.containsKey(g.getSampleName() ) )
// throw new IllegalArgumentException("Duplicate genotype added to VariantContext: " + g);
// genotypeMap.put(g.getSampleName(), g);
// }
//
// //return genotypeMap.immutable(); // todo enable when we have time to dive into mutability issue
// return genotypeMap;
// }
// }
// ---------------------------------------------------------------------------
//
// Mutability methods
@ -152,23 +246,31 @@ public class GenotypesContext implements List<Genotype> {
//
// ---------------------------------------------------------------------------
@Ensures({"cacheIsInvalid = true"})
private void invalidateCaches() {
cacheIsInvalid = true;
sampleNamesInOrder = null;
sampleNameToOffset = null;
}
@Ensures({"cacheIsInvalid = false",
"sampleNamesInOrder != null",
"sampleNameToOffset != null",
"sameSamples(genotypes, sampleNamesInOrder)",
"sameSamples(genotypes, sampleNameToOffset.keySet())"})
private void buildCache() {
cacheIsInvalid = false;
sampleNamesInOrder = new ArrayList<String>(genotypes.size());
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
if ( cacheIsInvalid ) {
cacheIsInvalid = false;
sampleNamesInOrder = new ArrayList<String>(genotypes.size());
sampleNameToOffset = new HashMap<String, Integer>(genotypes.size());
for ( int i = 0; i < genotypes.size(); i++ ) {
final Genotype g = genotypes.get(i);
sampleNamesInOrder.add(g.getSampleName());
sampleNameToOffset.put(g.getSampleName(), i);
for ( int i = 0; i < genotypes.size(); i++ ) {
final Genotype g = genotypes.get(i);
sampleNamesInOrder.add(g.getSampleName());
sampleNameToOffset.put(g.getSampleName(), i);
}
Collections.sort(sampleNamesInOrder);
}
Collections.sort(sampleNamesInOrder);
}
@ -195,12 +297,14 @@ public class GenotypesContext implements List<Genotype> {
}
@Override
@Requires("genotype != null")
public boolean add(final Genotype genotype) {
checkImmutability();
invalidateCaches();
return genotypes.add(genotype);
}
@Requires("genotype != null")
public boolean add(final Genotype ... genotype) {
checkImmutability();
invalidateCaches();
@ -263,13 +367,15 @@ public class GenotypesContext implements List<Genotype> {
@Override
public ListIterator<Genotype> listIterator() {
// todo -- must be immutable
return genotypes.listIterator();
throw new UnsupportedOperationException();
// return genotypes.listIterator();
}
@Override
public ListIterator<Genotype> listIterator(final int i) {
// todo -- must be immutable
return genotypes.listIterator(i);
throw new UnsupportedOperationException();
// return genotypes.listIterator(i);
}
@Override
@ -322,6 +428,14 @@ public class GenotypesContext implements List<Genotype> {
return genotypes.toArray(ts);
}
/**
* Iterate over the Genotypes in this context in the order specified by sampleNamesInOrder
*
* @param sampleNamesInOrder a Iterable of String, containing exactly one entry for each Genotype sample name in
* this context
* @return a Iterable over the genotypes in this context.
*/
@Requires("sampleNamesInOrder != null")
public Iterable<Genotype> iterateInSampleNameOrder(final Iterable<String> sampleNamesInOrder) {
return new Iterable<Genotype>() {
@Override
@ -331,6 +445,11 @@ public class GenotypesContext implements List<Genotype> {
};
}
/**
* Iterate over the Genotypes in this context in their sample name order (A, B, C)
* regardless of the underlying order in the vector of genotypes
* @return a Iterable over the genotypes in this context.
*/
public Iterable<Genotype> iterateInSampleNameOrder() {
return iterateInSampleNameOrder(getSampleNamesOrderedByName());
}
@ -358,30 +477,57 @@ public class GenotypesContext implements List<Genotype> {
}
}
/**
* @return The set of sample names for all genotypes in this context, in arbitrary order
*/
@Ensures("result != null")
public Set<String> getSampleNames() {
buildCache();
return sampleNameToOffset.keySet();
}
/**
* @return The set of sample names for all genotypes in this context, in their natural ordering (A, B, C)
*/
@Ensures("result != null")
public List<String> getSampleNamesOrderedByName() {
buildCache();
return sampleNamesInOrder;
}
@Requires("sample != null")
public boolean containsSample(final String sample) {
buildCache();
return sampleNameToOffset.containsKey(sample);
}
@Requires("samples != null")
public boolean containsSamples(final Collection<String> samples) {
buildCache();
return getSampleNames().containsAll(samples);
}
/**
* Return a freshly allocated subcontext of this context containing only the samples
* listed in samples. Note that samples can contain names not in this context, they
* will just be ignored.
*
* @param samples
* @return
*/
@Requires("samples != null")
@Ensures("result != null")
public GenotypesContext subsetToSamples( final Collection<String> samples ) {
return subsetToSamples(new HashSet<String>(samples));
}
/**
* {@link #subsetToSamples(java.util.Collection)}
* @param samples
* @return
*/
@Requires("samples != null")
@Ensures("result != null")
public GenotypesContext subsetToSamples( final Set<String> samples ) {
if ( samples.size() == genotypes.size() )
return this;
@ -426,4 +572,18 @@ public class GenotypesContext implements List<Genotype> {
}
}
}
private final static boolean sameSamples(List<Genotype> genotypes, Collection<String> sampleNamesInOrder) {
Set<String> names = new HashSet<String>(sampleNamesInOrder);
if ( names.size() != sampleNamesInOrder.size() )
return false;
if ( genotypes.size() != names.size() )
return false;
for ( final Genotype g : genotypes )
if ( ! names.contains(g.getSampleName()) )
return false;
return true;
}
}

View File

@ -37,6 +37,25 @@ import java.util.*;
/**
* Builder class for VariantContext
*
* Some basic assumptions here:
*
* 1 -- data isn't protectively copied. If you provide an attribute map to
* the build, and modify it later, the builder will see this and so will any
* resulting variant contexts. It's best not to modify collections provided
* to a builder.
*
* 2 -- the system uses the standard builder model, allowing the simple construction idiom:
*
* builder.source("a").genotypes(gc).id("x").make() => VariantContext
*
* 3 -- The best way to copy a VariantContext is:
*
* new VariantContextBuilder(vc).make() => a copy of VC
*
* 4 -- validation of arguments is done at the during the final make() call, so a
* VariantContextBuilder can exist in an inconsistent state as long as those issues
* are resolved before the call to make() is issued.
*
* @author depristo
*/
public class VariantContextBuilder {
@ -60,10 +79,19 @@ public class VariantContextBuilder {
/** enum of what must be validated */
final private EnumSet<VariantContext.Validation> toValidate = EnumSet.noneOf(VariantContext.Validation.class);
public VariantContextBuilder() {
}
/**
* Create an empty VariantContextBuilder where all values adopt their default values. Note that
* source, chr, start, stop, and alleles must eventually be filled in, or the resulting VariantContext
* will throw an error.
*/
public VariantContextBuilder() {}
/**
* Create an empty VariantContextBuilder where all values adopt their default values, but the bare min.
* of info (source, chr, start, stop, and alleles) have been provided to start.
*/
@Requires({"source != null", "contig != null", "start >= 0", "stop >= 0",
"alleles != null && !alleles.isEmpty()"})
public VariantContextBuilder(String source, String contig, long start, long stop, Collection<Allele> alleles) {
this.source = source;
this.contig = contig;
@ -95,6 +123,12 @@ public class VariantContextBuilder {
this.stop = parent.getEnd();
}
/**
* Tells this builder to use this collection of alleles for the resulting VariantContext
*
* @param alleles
* @return this builder
*/
@Requires({"alleles != null", "!alleles.isEmpty()"})
public VariantContextBuilder alleles(final Collection<Allele> alleles) {
this.alleles = alleles;
@ -103,6 +137,8 @@ public class VariantContextBuilder {
}
/**
* Tells this builder to use this map of attributes alleles for the resulting VariantContext
*
* Attributes can be null -> meaning there are no attributes. After
* calling this routine the builder assumes it can modify the attributes
* object here, if subsequent calls are made to set attribute values
@ -114,6 +150,14 @@ public class VariantContextBuilder {
return this;
}
/**
* Puts the key -> value mapping into this builder's attributes
*
* @param key
* @param value
* @return
*/
@Requires({"key != null"})
public VariantContextBuilder attribute(final String key, final Object value) {
if ( ! attributesCanBeModified ) {
this.attributesCanBeModified = true;
@ -124,6 +168,8 @@ public class VariantContextBuilder {
}
/**
* This builder's filters are set to this value
*
* filters can be null -> meaning there are no filters
* @param filters
*/
@ -132,22 +178,41 @@ public class VariantContextBuilder {
return this;
}
/**
* {@link #filters}
*
* @param filters
* @return
*/
public VariantContextBuilder filters(final String ... filters) {
filters(new HashSet<String>(Arrays.asList(filters)));
return this;
}
/**
* Tells this builder that the resulting VariantContext should have PASS filters
*
* @return
*/
public VariantContextBuilder passFilters() {
return filters(VariantContext.PASSES_FILTERS);
}
/**
* Tells this builder that the resulting VariantContext be unfiltered
*
* @return
*/
public VariantContextBuilder unfiltered() {
this.filters = null;
return this;
}
/**
* genotypes can be null -> meaning there are no genotypes
* Tells this builder that the resulting VariantContext should use this genotypes GenotypeContext
*
* Note that genotypes can be null -> meaning there are no genotypes
*
* @param genotypes
*/
public VariantContextBuilder genotypes(final GenotypesContext genotypes) {
@ -157,41 +222,74 @@ public class VariantContextBuilder {
return this;
}
/**
* Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes
*
* Note that genotypes can be null -> meaning there are no genotypes
*
* @param genotypes
*/
public VariantContextBuilder genotypes(final Collection<Genotype> genotypes) {
return genotypes(GenotypesContext.copy(genotypes));
}
/**
* Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes
* @param genotypes
*/
public VariantContextBuilder genotypes(final Genotype ... genotypes) {
return genotypes(GenotypesContext.copy(Arrays.asList(genotypes)));
}
/**
* Tells this builder that the resulting VariantContext should not contain any GenotypeContext
*/
public VariantContextBuilder noGenotypes() {
this.genotypes = null;
return this;
}
public VariantContextBuilder genotypesAreUnparsed(final boolean genotypesAreUnparsed) {
this.genotypesAreUnparsed = genotypesAreUnparsed;
/**
* ADVANCED! tells us that the genotypes data is stored as an unparsed attribute
* @return
*/
public VariantContextBuilder genotypesAreUnparsed() {
this.genotypesAreUnparsed = true;
return this;
}
/**
* Tells us that the resulting VariantContext should have ID
* @param ID
* @return
*/
@Requires("ID != null")
public VariantContextBuilder id(final String ID) {
this.ID = ID;
return this;
}
/**
* Tells us that the resulting VariantContext should not have an ID
* @return
*/
public VariantContextBuilder noID() {
return id(VCFConstants.EMPTY_ID_FIELD);
}
@Requires("negLog10PError <= 0")
/**
* Tells us that the resulting VariantContext should have negLog10PError
* @param negLog10PError
* @return
*/
@Requires("negLog10PError <= 0 || negLog10PError == VariantContext.NO_NEG_LOG_10PERROR")
public VariantContextBuilder negLog10PError(final double negLog10PError) {
this.negLog10PError = negLog10PError;
return this;
}
/**
* Tells us that the resulting VariantContext should use this byte for the reference base
* Null means no refBase is available
* @param referenceBaseForIndel
*/
@ -201,12 +299,24 @@ public class VariantContextBuilder {
return this;
}
/**
* Tells us that the resulting VariantContext should have source field set to source
* @param source
* @return
*/
@Requires("source != null")
public VariantContextBuilder source(final String source) {
this.source = source;
return this;
}
/**
* Tells us that the resulting VariantContext should have the specified location
* @param contig
* @param start
* @param stop
* @return
*/
@Requires({"contig != null", "start >= 0", "stop >= 0"})
public VariantContextBuilder loc(final String contig, final long start, final long stop) {
this.contig = contig;
@ -217,12 +327,22 @@ public class VariantContextBuilder {
return this;
}
@Requires({"contig != null", "start >= 0", "stop >= 0"})
/**
* Tells us that the resulting VariantContext should have the specified contig chr
* @param contig
* @return
*/
@Requires({"contig != null"})
public VariantContextBuilder chr(final String contig) {
this.contig = contig;
return this;
}
/**
* Tells us that the resulting VariantContext should have the specified contig start
* @param start
* @return
*/
@Requires({"start >= 0"})
public VariantContextBuilder start(final long start) {
this.start = start;
@ -231,12 +351,26 @@ public class VariantContextBuilder {
return this;
}
/**
* Tells us that the resulting VariantContext should have the specified contig stop
* @param stop
* @return
*/
@Requires({"stop >= 0"})
public VariantContextBuilder stop(final long stop) {
this.stop = stop;
return this;
}
/**
* Takes all of the builder data provided up to this point, and instantiates
* a freshly allocated VariantContext with all of the builder data. This
* VariantContext is validated as appropriate and if not failing QC (and
* throwing an exception) is returned.
*
* Note that this function can be called multiple times to create multiple
* VariantContexts from the same builder.
*/
public VariantContext make() {
return new VariantContext(source, ID, contig, start, stop, alleles,
genotypes, negLog10PError, filters, attributes,