diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index e6e4aa8ce..0f21e1505 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -296,6 +296,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec, if (parts.length > NUM_STANDARD_FIELDS) { builder.attribute(VariantContext.UNPARSED_GENOTYPE_MAP_KEY, new String(parts[8])); builder.attribute(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY, this); + builder.genotypesAreUnparsed(); } VariantContext vc = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 671066d24..c1fcd6226 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -24,19 +24,41 @@ package org.broadinstitute.sting.utils.variantcontext; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; + import java.util.*; /** - * + * Represents an ordered collection of Genotype objects */ public class GenotypesContext implements List { + /** + * static constant value for an empty GenotypesContext. Useful since so many VariantContexts have no genotypes + */ public final static GenotypesContext NO_GENOTYPES = new GenotypesContext(new ArrayList(0), new HashMap(0), Collections.emptyList(), true); + /** + *sampleNamesInOrder a list of sample names, one for each genotype in genotypes, sorted in alphabetical order + */ List sampleNamesInOrder = null; + + /** + * a map optimized for efficient lookup. Each genotype in genotypes must have its + * sample name in sampleNameToOffset, with a corresponding integer value that indicates the offset of that + * genotype in the vector of genotypes + */ Map sampleNameToOffset = null; + + /** if true, then we need to reinitialize sampleNamesInOrder and sampleNameToOffset before we use them /*/ boolean cacheIsInvalid = true; + + /** An ArrayList of genotypes contained in this context */ List genotypes; + + /** Are we allowing users to modify the list? */ boolean immutable = false; // --------------------------------------------------------------------------- @@ -45,14 +67,25 @@ public class GenotypesContext implements List { // // --------------------------------------------------------------------------- + /** + * Create an empty GenotypeContext + */ private GenotypesContext() { this(10, false); } + /** + * Create an empty GenotypeContext, with initial capacity for n elements + */ + @Requires("n >= 0") private GenotypesContext(final int n, final boolean immutable) { this(new ArrayList(n), immutable); } + /** + * Create an GenotypeContext containing genotypes + */ + @Requires("genotypes != null") private GenotypesContext(final ArrayList genotypes, final boolean immutable) { this.genotypes = genotypes; this.immutable = immutable; @@ -60,6 +93,23 @@ public class GenotypesContext implements List { this.cacheIsInvalid = true; } + /** + * Create a fully resolved GenotypeContext containing genotypes, sample lookup table, + * and sorted sample names + * + * @param genotypes our genotypes in arbitrary + * @param sampleNameToOffset map optimized for efficient lookup. Each genotype in genotypes must have its + * sample name in sampleNameToOffset, with a corresponding integer value that indicates the offset of that + * genotype in the vector of genotypes + * @param sampleNamesInOrder a list of sample names, one for each genotype in genotypes, sorted in alphabetical + * order. + * @param immutable + */ + @Requires({"genotypes != null", + "sampleNameToOffset != null", + "sampleNamesInOrder != null", + "genotypes.size() == sampleNameToOffset.size()", + "genotypes.size() == sampleNamesInOrder.size()"}) private GenotypesContext(final ArrayList genotypes, final Map sampleNameToOffset, final List sampleNamesInOrder, @@ -77,54 +127,98 @@ public class GenotypesContext implements List { // // --------------------------------------------------------------------------- + /** + * Basic creation routine + * @return an empty, mutable GenotypeContext + */ + @Ensures({"result != null"}) public static final GenotypesContext create() { return new GenotypesContext(); } + /** + * Basic creation routine + * @return an empty, mutable GenotypeContext with initial capacity for nGenotypes + */ + @Requires("nGenotypes >= 0") + @Ensures({"result != null"}) public static final GenotypesContext create(final int nGenotypes) { return new GenotypesContext(nGenotypes, false); } + /** + * Create a fully resolved GenotypeContext containing genotypes, sample lookup table, + * and sorted sample names + * + * @param genotypes our genotypes in arbitrary + * @param sampleNameToOffset map optimized for efficient lookup. Each genotype in genotypes must have its + * sample name in sampleNameToOffset, with a corresponding integer value that indicates the offset of that + * genotype in the vector of genotypes + * @param sampleNamesInOrder a list of sample names, one for each genotype in genotypes, sorted in alphabetical + * order. + * @return an mutable GenotypeContext containing genotypes with already present lookup data + */ + @Requires({"genotypes != null", + "sampleNameToOffset != null", + "sampleNamesInOrder != null", + "sameSamples(genotypes, sampleNamesInOrder)", + "sameSamples(genotypes, sampleNameToOffset.keySet())"}) + @Ensures({"result != null"}) public static final GenotypesContext create(final ArrayList genotypes, final Map sampleNameToOffset, final List sampleNamesInOrder) { return new GenotypesContext(genotypes, sampleNameToOffset, sampleNamesInOrder, false); } + /** + * Create a fully resolved GenotypeContext containing genotypes + * + * @param genotypes our genotypes in arbitrary + * @return an mutable GenotypeContext containing genotypes + */ + @Requires({"genotypes != null"}) + @Ensures({"result != null"}) public static final GenotypesContext create(final ArrayList genotypes) { return genotypes == null ? NO_GENOTYPES : new GenotypesContext(genotypes, false); } + /** + * Create a fully resolved GenotypeContext containing genotypes + * + * @param genotypes our genotypes in arbitrary + * @return an mutable GenotypeContext containing genotypes + */ + @Requires({"genotypes != null"}) + @Ensures({"result != null"}) public static final GenotypesContext create(final Genotype... genotypes) { return new GenotypesContext(new ArrayList(Arrays.asList(genotypes)), false); } + /** + * Create a freshly allocated GenotypeContext containing the genotypes in toCopy + * + * @param toCopy the GenotypesContext to copy + * @return an mutable GenotypeContext containing genotypes + */ + @Requires({"toCopy != null"}) + @Ensures({"result != null"}) public static final GenotypesContext copy(final GenotypesContext toCopy) { return create(new ArrayList(toCopy.genotypes)); } + /** + * Create a GenotypesContext containing the genotypes in iteration order contained + * in toCopy + * + * @param toCopy the collection of genotypes + * @return an mutable GenotypeContext containing genotypes + */ + @Requires({"toCopy != null"}) + @Ensures({"result != null"}) public static final GenotypesContext copy(final Collection toCopy) { return toCopy == null ? NO_GENOTYPES : create(new ArrayList(toCopy)); } - - -// public static final GenotypeMap create(final Collection genotypes) { -// if ( genotypes == null ) -// return null; // todo -- really should return an empty map -// else { -// GenotypeMap genotypeMap = new GenotypeMap(genotypes.size(), false); -// for ( final Genotype g : genotypes ) { -// if ( genotypeMap.containsKey(g.getSampleName() ) ) -// throw new IllegalArgumentException("Duplicate genotype added to VariantContext: " + g); -// genotypeMap.put(g.getSampleName(), g); -// } -// -// //return genotypeMap.immutable(); // todo enable when we have time to dive into mutability issue -// return genotypeMap; -// } -// } - // --------------------------------------------------------------------------- // // Mutability methods @@ -152,23 +246,31 @@ public class GenotypesContext implements List { // // --------------------------------------------------------------------------- + @Ensures({"cacheIsInvalid = true"}) private void invalidateCaches() { cacheIsInvalid = true; sampleNamesInOrder = null; sampleNameToOffset = null; } + @Ensures({"cacheIsInvalid = false", + "sampleNamesInOrder != null", + "sampleNameToOffset != null", + "sameSamples(genotypes, sampleNamesInOrder)", + "sameSamples(genotypes, sampleNameToOffset.keySet())"}) private void buildCache() { - cacheIsInvalid = false; - sampleNamesInOrder = new ArrayList(genotypes.size()); - sampleNameToOffset = new HashMap(genotypes.size()); + if ( cacheIsInvalid ) { + cacheIsInvalid = false; + sampleNamesInOrder = new ArrayList(genotypes.size()); + sampleNameToOffset = new HashMap(genotypes.size()); - for ( int i = 0; i < genotypes.size(); i++ ) { - final Genotype g = genotypes.get(i); - sampleNamesInOrder.add(g.getSampleName()); - sampleNameToOffset.put(g.getSampleName(), i); + for ( int i = 0; i < genotypes.size(); i++ ) { + final Genotype g = genotypes.get(i); + sampleNamesInOrder.add(g.getSampleName()); + sampleNameToOffset.put(g.getSampleName(), i); + } + Collections.sort(sampleNamesInOrder); } - Collections.sort(sampleNamesInOrder); } @@ -195,12 +297,14 @@ public class GenotypesContext implements List { } @Override + @Requires("genotype != null") public boolean add(final Genotype genotype) { checkImmutability(); invalidateCaches(); return genotypes.add(genotype); } + @Requires("genotype != null") public boolean add(final Genotype ... genotype) { checkImmutability(); invalidateCaches(); @@ -263,13 +367,15 @@ public class GenotypesContext implements List { @Override public ListIterator listIterator() { // todo -- must be immutable - return genotypes.listIterator(); + throw new UnsupportedOperationException(); +// return genotypes.listIterator(); } @Override public ListIterator listIterator(final int i) { // todo -- must be immutable - return genotypes.listIterator(i); + throw new UnsupportedOperationException(); +// return genotypes.listIterator(i); } @Override @@ -322,6 +428,14 @@ public class GenotypesContext implements List { return genotypes.toArray(ts); } + /** + * Iterate over the Genotypes in this context in the order specified by sampleNamesInOrder + * + * @param sampleNamesInOrder a Iterable of String, containing exactly one entry for each Genotype sample name in + * this context + * @return a Iterable over the genotypes in this context. + */ + @Requires("sampleNamesInOrder != null") public Iterable iterateInSampleNameOrder(final Iterable sampleNamesInOrder) { return new Iterable() { @Override @@ -331,6 +445,11 @@ public class GenotypesContext implements List { }; } + /** + * Iterate over the Genotypes in this context in their sample name order (A, B, C) + * regardless of the underlying order in the vector of genotypes + * @return a Iterable over the genotypes in this context. + */ public Iterable iterateInSampleNameOrder() { return iterateInSampleNameOrder(getSampleNamesOrderedByName()); } @@ -358,30 +477,57 @@ public class GenotypesContext implements List { } } + /** + * @return The set of sample names for all genotypes in this context, in arbitrary order + */ + @Ensures("result != null") public Set getSampleNames() { buildCache(); return sampleNameToOffset.keySet(); } + /** + * @return The set of sample names for all genotypes in this context, in their natural ordering (A, B, C) + */ + @Ensures("result != null") public List getSampleNamesOrderedByName() { buildCache(); return sampleNamesInOrder; } + @Requires("sample != null") public boolean containsSample(final String sample) { buildCache(); return sampleNameToOffset.containsKey(sample); } + @Requires("samples != null") public boolean containsSamples(final Collection samples) { buildCache(); return getSampleNames().containsAll(samples); } + /** + * Return a freshly allocated subcontext of this context containing only the samples + * listed in samples. Note that samples can contain names not in this context, they + * will just be ignored. + * + * @param samples + * @return + */ + @Requires("samples != null") + @Ensures("result != null") public GenotypesContext subsetToSamples( final Collection samples ) { return subsetToSamples(new HashSet(samples)); } + /** + * {@link #subsetToSamples(java.util.Collection)} + * @param samples + * @return + */ + @Requires("samples != null") + @Ensures("result != null") public GenotypesContext subsetToSamples( final Set samples ) { if ( samples.size() == genotypes.size() ) return this; @@ -426,4 +572,18 @@ public class GenotypesContext implements List { } } } + + private final static boolean sameSamples(List genotypes, Collection sampleNamesInOrder) { + Set names = new HashSet(sampleNamesInOrder); + if ( names.size() != sampleNamesInOrder.size() ) + return false; + if ( genotypes.size() != names.size() ) + return false; + + for ( final Genotype g : genotypes ) + if ( ! names.contains(g.getSampleName()) ) + return false; + + return true; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java index fb92f60a2..67077e8c3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextBuilder.java @@ -37,6 +37,25 @@ import java.util.*; /** * Builder class for VariantContext * + * Some basic assumptions here: + * + * 1 -- data isn't protectively copied. If you provide an attribute map to + * the build, and modify it later, the builder will see this and so will any + * resulting variant contexts. It's best not to modify collections provided + * to a builder. + * + * 2 -- the system uses the standard builder model, allowing the simple construction idiom: + * + * builder.source("a").genotypes(gc).id("x").make() => VariantContext + * + * 3 -- The best way to copy a VariantContext is: + * + * new VariantContextBuilder(vc).make() => a copy of VC + * + * 4 -- validation of arguments is done at the during the final make() call, so a + * VariantContextBuilder can exist in an inconsistent state as long as those issues + * are resolved before the call to make() is issued. + * * @author depristo */ public class VariantContextBuilder { @@ -60,10 +79,19 @@ public class VariantContextBuilder { /** enum of what must be validated */ final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); - public VariantContextBuilder() { - - } + /** + * Create an empty VariantContextBuilder where all values adopt their default values. Note that + * source, chr, start, stop, and alleles must eventually be filled in, or the resulting VariantContext + * will throw an error. + */ + public VariantContextBuilder() {} + /** + * Create an empty VariantContextBuilder where all values adopt their default values, but the bare min. + * of info (source, chr, start, stop, and alleles) have been provided to start. + */ + @Requires({"source != null", "contig != null", "start >= 0", "stop >= 0", + "alleles != null && !alleles.isEmpty()"}) public VariantContextBuilder(String source, String contig, long start, long stop, Collection alleles) { this.source = source; this.contig = contig; @@ -95,6 +123,12 @@ public class VariantContextBuilder { this.stop = parent.getEnd(); } + /** + * Tells this builder to use this collection of alleles for the resulting VariantContext + * + * @param alleles + * @return this builder + */ @Requires({"alleles != null", "!alleles.isEmpty()"}) public VariantContextBuilder alleles(final Collection alleles) { this.alleles = alleles; @@ -103,6 +137,8 @@ public class VariantContextBuilder { } /** + * Tells this builder to use this map of attributes alleles for the resulting VariantContext + * * Attributes can be null -> meaning there are no attributes. After * calling this routine the builder assumes it can modify the attributes * object here, if subsequent calls are made to set attribute values @@ -114,6 +150,14 @@ public class VariantContextBuilder { return this; } + /** + * Puts the key -> value mapping into this builder's attributes + * + * @param key + * @param value + * @return + */ + @Requires({"key != null"}) public VariantContextBuilder attribute(final String key, final Object value) { if ( ! attributesCanBeModified ) { this.attributesCanBeModified = true; @@ -124,6 +168,8 @@ public class VariantContextBuilder { } /** + * This builder's filters are set to this value + * * filters can be null -> meaning there are no filters * @param filters */ @@ -132,22 +178,41 @@ public class VariantContextBuilder { return this; } + /** + * {@link #filters} + * + * @param filters + * @return + */ public VariantContextBuilder filters(final String ... filters) { filters(new HashSet(Arrays.asList(filters))); return this; } + /** + * Tells this builder that the resulting VariantContext should have PASS filters + * + * @return + */ public VariantContextBuilder passFilters() { return filters(VariantContext.PASSES_FILTERS); } + /** + * Tells this builder that the resulting VariantContext be unfiltered + * + * @return + */ public VariantContextBuilder unfiltered() { this.filters = null; return this; } /** - * genotypes can be null -> meaning there are no genotypes + * Tells this builder that the resulting VariantContext should use this genotypes GenotypeContext + * + * Note that genotypes can be null -> meaning there are no genotypes + * * @param genotypes */ public VariantContextBuilder genotypes(final GenotypesContext genotypes) { @@ -157,41 +222,74 @@ public class VariantContextBuilder { return this; } + /** + * Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes + * + * Note that genotypes can be null -> meaning there are no genotypes + * + * @param genotypes + */ public VariantContextBuilder genotypes(final Collection genotypes) { return genotypes(GenotypesContext.copy(genotypes)); } + /** + * Tells this builder that the resulting VariantContext should use a GenotypeContext containing genotypes + * @param genotypes + */ public VariantContextBuilder genotypes(final Genotype ... genotypes) { return genotypes(GenotypesContext.copy(Arrays.asList(genotypes))); } + /** + * Tells this builder that the resulting VariantContext should not contain any GenotypeContext + */ public VariantContextBuilder noGenotypes() { this.genotypes = null; return this; } - public VariantContextBuilder genotypesAreUnparsed(final boolean genotypesAreUnparsed) { - this.genotypesAreUnparsed = genotypesAreUnparsed; + /** + * ADVANCED! tells us that the genotypes data is stored as an unparsed attribute + * @return + */ + public VariantContextBuilder genotypesAreUnparsed() { + this.genotypesAreUnparsed = true; return this; } + /** + * Tells us that the resulting VariantContext should have ID + * @param ID + * @return + */ @Requires("ID != null") public VariantContextBuilder id(final String ID) { this.ID = ID; return this; } + /** + * Tells us that the resulting VariantContext should not have an ID + * @return + */ public VariantContextBuilder noID() { return id(VCFConstants.EMPTY_ID_FIELD); } - @Requires("negLog10PError <= 0") + /** + * Tells us that the resulting VariantContext should have negLog10PError + * @param negLog10PError + * @return + */ + @Requires("negLog10PError <= 0 || negLog10PError == VariantContext.NO_NEG_LOG_10PERROR") public VariantContextBuilder negLog10PError(final double negLog10PError) { this.negLog10PError = negLog10PError; return this; } /** + * Tells us that the resulting VariantContext should use this byte for the reference base * Null means no refBase is available * @param referenceBaseForIndel */ @@ -201,12 +299,24 @@ public class VariantContextBuilder { return this; } + /** + * Tells us that the resulting VariantContext should have source field set to source + * @param source + * @return + */ @Requires("source != null") public VariantContextBuilder source(final String source) { this.source = source; return this; } + /** + * Tells us that the resulting VariantContext should have the specified location + * @param contig + * @param start + * @param stop + * @return + */ @Requires({"contig != null", "start >= 0", "stop >= 0"}) public VariantContextBuilder loc(final String contig, final long start, final long stop) { this.contig = contig; @@ -217,12 +327,22 @@ public class VariantContextBuilder { return this; } - @Requires({"contig != null", "start >= 0", "stop >= 0"}) + /** + * Tells us that the resulting VariantContext should have the specified contig chr + * @param contig + * @return + */ + @Requires({"contig != null"}) public VariantContextBuilder chr(final String contig) { this.contig = contig; return this; } + /** + * Tells us that the resulting VariantContext should have the specified contig start + * @param start + * @return + */ @Requires({"start >= 0"}) public VariantContextBuilder start(final long start) { this.start = start; @@ -231,12 +351,26 @@ public class VariantContextBuilder { return this; } + /** + * Tells us that the resulting VariantContext should have the specified contig stop + * @param stop + * @return + */ @Requires({"stop >= 0"}) public VariantContextBuilder stop(final long stop) { this.stop = stop; return this; } + /** + * Takes all of the builder data provided up to this point, and instantiates + * a freshly allocated VariantContext with all of the builder data. This + * VariantContext is validated as appropriate and if not failing QC (and + * throwing an exception) is returned. + * + * Note that this function can be called multiple times to create multiple + * VariantContexts from the same builder. + */ public VariantContext make() { return new VariantContext(source, ID, contig, start, stop, alleles, genotypes, negLog10PError, filters, attributes,