From d9671dffbaf555c1981a5d4669c1582c3b6ff6ef Mon Sep 17 00:00:00 2001 From: depristo Date: Mon, 1 Feb 2010 17:49:51 +0000 Subject: [PATCH] Documentation for VariantContext. Please read it and start using it. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2756 348d0f76-0448-11de-a6fe-93d51630548a --- .../oneoffprojects/variantcontext/Allele.java | 106 ++++- .../variantcontext/AttributedObject.java | 40 ++ .../variantcontext/VariantContext.java | 443 +++++++++++++----- .../VariantContextAdaptors.java | 6 +- .../variantcontext/VariantContextUtils.java | 4 + .../utils/genotype/vcf/VCFGenotypeRecord.java | 6 +- .../variantcontext/VariantContextTest.java | 2 - 7 files changed, 466 insertions(+), 141 deletions(-) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java index de72ca049..b49aa6453 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java @@ -20,7 +20,7 @@ import java.util.Arrays; * * SNP polymorphism of C/G -> { C , G } -> C is the reference allele * 1 base deletion of C -> { C , - } -> C is the reference allele - * 1 base insertion of A -> { - ; A } -> NULL is the reference allele + * 1 base insertion of A -> { - ; A } -> Null is the reference allele * * Suppose I see a the following in the population: * @@ -61,12 +61,16 @@ import java.util.Arrays; * A / C @ loc => SNP with * - / A => INDEL * - * If you know where allele is the reference, you can determine whether the variant is an insertion or deletion + * If you know where allele is the reference, you can determine whether the variant is an insertion or deletion. + * + * Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be + * determined. This is usually represented by a '.' allele. + * + * Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an + * Allele. */ public class Allele { private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; -// private static final byte[] NULL_ALLELE_BASES = new byte[0]; -// private static final byte[] NO_CALL_ALLELE_BASES = ".".getBytes(); private boolean isRef = false; private boolean isNull = false; @@ -74,8 +78,17 @@ public class Allele { private byte[] bases = null; + /** A generic static NO_CALL allele for use */ public final static Allele NO_CALL = new Allele("."); + /** + * Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases + * == '-', a Null allele is created. If bases == '.', a no call Allele is created. + * + * @param bases the DNA sequence of this variation, '-', of '.' + * @param isRef should we make this a reference allele? + * @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated + */ public Allele(byte[] bases, boolean isRef) { if ( bases == null ) throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele"); @@ -84,8 +97,7 @@ public class Allele { if ( wouldBeNullAllele(bases) ) { bases = EMPTY_ALLELE_BASES; isNull = true; - } - if ( wouldBeNoCallAllele(bases) ) { + } else if ( wouldBeNoCallAllele(bases) ) { bases = EMPTY_ALLELE_BASES; isNoCall = true; if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); @@ -100,21 +112,26 @@ public class Allele { throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases)); } - public final static boolean wouldBeNullAllele(byte[] bases) { + /** + * Do the bases represent the null allele? + */ + public static boolean wouldBeNullAllele(byte[] bases) { return (bases.length == 1 && bases[0] == '-') || bases.length == 0; } - public final static boolean wouldBeNoCallAllele(byte[] bases) { + /** Do the bases represent the NO_CALL allele? */ + public static boolean wouldBeNoCallAllele(byte[] bases) { return bases.length == 1 && bases[0] == '.'; } - - public final static boolean acceptableAlleleBases(String bases) { + /** Do the bases represent the null allele? */ + public static boolean acceptableAlleleBases(String bases) { return acceptableAlleleBases(bases.getBytes()); } - - public final static boolean acceptableAlleleBases(byte[] bases) { - if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0) + + /** Can we create an allele from bases, including NO_CALL and Null alleles? */ + public static boolean acceptableAlleleBases(byte[] bases) { + if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) ) return true; for ( byte b : bases ) { @@ -126,33 +143,52 @@ public class Allele { return true; } - /** null allele creation method */ - public Allele(boolean isRef) { - this("", isRef); - } - + /** + * @see Allele(byte[], boolean) + * + * @param bases + * @param isRef + */ public Allele(String bases, boolean isRef) { this(bases.getBytes(), isRef); } - public Allele() { this(false); } + /** + * Creates a non-Ref allele. @see Allele(byte[], boolean) for full information + * + * @param bases + */ public Allele(String bases) { this(bases, false); } + + /** + * Creates a non-Ref allele. @see Allele(byte[], boolean) for full information + * + * @param bases + */ public Allele(byte[] bases) { this(bases, false); } - // + // --------------------------------------------------------------------------------------------------------- // // accessor routines // - // + // --------------------------------------------------------------------------------------------------------- + + /** Returns true if this is the null allele */ public boolean isNull() { return isNull; } + /** Returns true if this is not the null allele */ public boolean isNonNull() { return ! isNull(); } + /** Returns true if this is the NO_CALL allele */ public boolean isNoCall() { return isNoCall; } + /** Returns true if this is the not the NO_CALL allele */ public boolean isCalled() { return ! isNoCall(); } + /** Returns true if this Allele is the reference allele */ public boolean isReference() { return isRef; } + /** Returns true if this Allele is not the reference allele */ public boolean isNonReference() { return ! isReference(); } + /** Returns a nice string representation of this object */ public String toString() { return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : ""); } @@ -174,11 +210,37 @@ public class Allele { return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases()); } - // todo -- notice case insensitivity + /** + * Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles + * Null and NO_CALL alleles + * + * @param test + * @return + */ public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); } + + /** + * Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles + * Null and NO_CALL alleles + * + * @param test + * @return + */ public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } + + /** + * Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles + * Null and NO_CALL alleles + * + * @param test + * @return + */ public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } + /** + * Returns the length of this allele. Null and NO_CALL alleles have 0 length. + * @return + */ public int length() { return bases.length; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java index a433fc667..3a761d65e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java @@ -16,6 +16,7 @@ import java.util.*; public class AttributedObject { public static final double NO_NEG_LOG_10PERROR = 0.0; private double negLog10PError = NO_NEG_LOG_10PERROR; + private Set filters = new HashSet(); private Map attributes = new HashMap(); @@ -36,6 +37,45 @@ public class AttributedObject { } + // --------------------------------------------------------------------------------------------------------- + // + // Filter + // + // --------------------------------------------------------------------------------------------------------- + + public Set getFilters() { + return filters; + } + + public boolean isFiltered() { + return filters.size() > 0; + } + + public boolean isNotFiltered() { + return ! isFiltered(); + } + + public void addFilter(Object filter) { + if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this); + if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this); + filters.add(filter); + } + + public void addFilters(Collection filters) { + if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this); + for ( Object f : filters ) + addFilter(f); + } + + public void clearFilters() { + filters.clear(); + } + + public void setFilters(Collection filters) { + clearFilters(); + addFilters(filters); + } + // --------------------------------------------------------------------------------------------------------- // // Working with log error rates diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java index b7b4ddf3f..a1569a16f 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java @@ -6,28 +6,177 @@ import org.broadinstitute.sting.utils.BaseUtils; import java.util.*; - /** * @author depristo *

* Class VariantContext *

* - * + * == High-level overview == + * + * The VariantContext object is a single general class system for representing genetic variation data composed of: + * + * * Allele: representing single genetic haplotypes (A, T, ATC, -) + * * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus + * * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes + * for multiple individuals containing alleles at that locus + * + * The class system works by defining segregating alleles, creating a variant context representing the segregating + * information at a locus, and potentially creating and associating genotypes with individuals in the context. + * + * All of the classes are highly validating -- call validate() if you modify them -- so you can rely on the + * self-consistency of the data once you have a VariantContext in hand. The system has a rich set of assessor + * and manipulator routines, as well as more complex static support routines in VariantContextUtils. + * + * The VariantContext (and Genotype) objects are attributed (supporting addition of arbitrary key/value pairs) and + * filtered (can represent a variation that is viewed as suspect). + * + * VariantContexts are dynamically typed, so whether a VariantContext is a SNP, Indel, or NoVariant depends + * on the properties of the alleles in the context. See the detailed documentation on the Type parameter below. + * + * It's also easy to create subcontexts based on selected genotypes. + * + * == Working with Variant Contexts == + * === Some example data === + * + * Allele A, Aref, T, Tref; + * Allele del, delRef, ATC, ATCref; + * + * A [ref] / T at 10 + * GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10); + * + * - / ATC [ref] from 20-23 + * GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22); + * + * // - [ref] / ATC immediately after 20 + * GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20); + * + * === Alleles === + * + * See the documentation in the Allele class itself + * + * What are they? + * + * Alleles can be either reference or non-reference + * + * Example alleles used here: + * + * del = new Allele("-"); + * A = new Allele("A"); + * Aref = new Allele("A", true); + * T = new Allele("T"); + * ATC = new Allele("ATC"); + * + * === Creating variant contexts === + * + * ==== By hand ==== + * + * Here's an example of a A/T polymorphism with the A being reference: + * + *

+ * VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref, T));
+ * 
+ * + * If you want to create a non-variant site, just put in a single reference allele + * + *
+ * VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref));
+ * 
+ * + * A deletion is just as easy: + * + *
+ * VariantContext vc = new VariantContext(delLoc, Arrays.asList(ATCref, del));
+ * 
+ * + * The only 2 things that distinguishes between a insertion and deletion are the reference allele + * and the location of the variation. An insertion has a Null reference allele and at least + * one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after + * a 1-bp GenomeLoc (at say 20). + * + *
+ * VariantContext vc = new VariantContext(insLoc, Arrays.asList(delRef, ATC));
+ * 
+ * + * ==== Converting rods and other data structures to VCs ==== + * + * You can convert many common types into VariantContexts using the general function: + * + *
+ * VariantContextAdaptors.convertToVariantContext(myObject)
+ * 
+ * + * dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that + * object will be returned. A null return type indicates that the type isn't yet supported. This is the best + * and easiest way to create contexts using RODs. + * + * + * === Working with genotypes === + * + *
+ * List alleles = Arrays.asList(Aref, T);
+ * VariantContext vc = new VariantContext(snpLoc, alleles);
+ *
+ * Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
+ * Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
+ * Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
+ * vc.addGenotypes(Arrays.asList(g1, g2, g3));
+ * 
+ * + * At this point we have 3 genotypes in our context, g1-g3. + * + * You can assess a good deal of information about the genotypes through the VariantContext: + * + *
+ * vc.hasGenotypes()
+ * vc.isMonomorphic()
+ * vc.isPolymorphic()
+ * vc.getSampleNames().size()
+ *
+ * vc.getGenotypes()
+ * vc.getGenotypes().get("g1")
+ * vc.hasGenotype("g1")
+ *
+ * vc.getChromosomeCount()
+ * vc.getChromosomeCount(Aref)
+ * vc.getChromosomeCount(T)
+ * 
+ * + * === NO_CALL alleles === + * + * The system allows one to create Genotypes carrying special NO_CALL alleles that aren't present in the + * set of context alleles and that represent undetermined alleles in a genotype: + * + * Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "NO_DATA_FOR_SAMPLE", 10); + * + * + * === subcontexts === + * It's also very easy get subcontext based only the data in a subset of the genotypes: + * + *
+ * VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
+ * VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
+ * 
*/ public class VariantContext extends AttributedObject { + /** The location of this VariantContext */ private GenomeLoc loc; + + /** The type (cached for performance reasons) of this context */ private Type type = Type.UNDETERMINED; + + /** A set of the alleles segregating in this context */ private Set alleles = new HashSet(); + + /** A mapping from sampleName -> genotype objects for all genotypes associated with this context */ private Map genotypes = new HashMap(); - private Set filters = new HashSet(); + // --------------------------------------------------------------------------------------------------------- // // constructors // // --------------------------------------------------------------------------------------------------------- - public VariantContext(GenomeLoc loc) { super(); @@ -57,7 +206,7 @@ public class VariantContext extends AttributedObject { public VariantContext(GenomeLoc loc, Collection alleles, Collection genotypes) { this(loc); setAlleles(alleles); - setGenotypes(genotypes); + addGenotypes(genotypes); validate(); } @@ -66,7 +215,7 @@ public class VariantContext extends AttributedObject { double negLog10PError, Collection filters) { this(loc); setAlleles(alleles); - setGenotypes(genotypes); + addGenotypes(genotypes); setAttributes(attributes); setNegLog10PError(negLog10PError); setFilters(filters); @@ -78,7 +227,7 @@ public class VariantContext extends AttributedObject { double negLog10PError, Collection filters) { this(loc); setAlleles(alleles); - setGenotypes(genotypes); + addGenotypes(genotypes); setAttributes(attributes); setNegLog10PError(negLog10PError); setFilters(filters); @@ -93,20 +242,37 @@ public class VariantContext extends AttributedObject { // // --------------------------------------------------------------------------------------------------------- + /** + * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype + * genotype and alleles in genotype. This is the right way to test if a single genotype is actually + * variant or not. + * + * @param genotype + * @return + */ public VariantContext subContextFromGenotypes(Genotype genotype) { return subContextFromGenotypes(Arrays.asList(genotype)); } + + /** + * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes + * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually + * variant or not. + * + * @param genotypes + * @return + */ public VariantContext subContextFromGenotypes(Collection genotypes) { // todo -- we should check for uniqueness of genotypes - return subContextFromGenotypes(new HashSet(genotypes), getAttributes()); + return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, getAttributes(), getNegLog10PError(), getFilters()); } - public VariantContext subContextFromGenotypes(Collection genotypes, Map attributes) { - return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, attributes, getNegLog10PError(), getFilters()); - } - - /** helper routnine for subcontext */ + /** + * helper routnine for subcontext + * @param genotypes + * @return + */ private Set allelesOfGenotypes(Collection genotypes) { Set alleles = new HashSet(); @@ -123,45 +289,6 @@ public class VariantContext extends AttributedObject { return alleles; } - // --------------------------------------------------------------------------------------------------------- - // - // Filter - // - // --------------------------------------------------------------------------------------------------------- - - public Set getFilters() { - return filters; - } - - public boolean isFiltered() { - return filters.size() > 0; - } - - public boolean isNotFiltered() { - return ! isFiltered(); - } - - public void addFilter(Object filter) { - if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this); - if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this); - filters.add(filter); - } - - public void addFilters(Collection filters) { - if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this); - for ( Object f : filters ) - addFilter(f); - } - - public void clearFilters() { - filters.clear(); - } - - public void setFilters(Collection filters) { - clearFilters(); - addFilters(filters); - } - // --------------------------------------------------------------------------------------------------------- // // type operations @@ -195,6 +322,8 @@ public class VariantContext extends AttributedObject { * Mixed * Mix of other classes * + * Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population + * * * Not currently supported: * @@ -232,9 +361,9 @@ public class VariantContext extends AttributedObject { } /** - * convenience method for switching over the allele type + * Determines (if necessary) and returns the type of this variation by examining the alleles it contains. * - * @return the AlleleType of this allele + * @return the type of this VariantContext **/ public Type getType() { if ( type == Type.UNDETERMINED ) @@ -255,7 +384,10 @@ public class VariantContext extends AttributedObject { return BaseUtils.SNPSubstitutionType(getReference().getBases()[0], getAlternateAllele(0).getBases()[0]); } + /** If this is a BiAlleic SNP, is it a transition? */ public boolean isTransition() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSITION; } + + /** If this is a BiAlleic SNP, is it a transversion? */ public boolean isTransversion() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSVERSION; } /** @@ -322,6 +454,7 @@ public class VariantContext extends AttributedObject { return ref; } + /** Private helper routine that grabs the reference allele but doesn't through an error if there's no such allele */ private Allele getReferenceWithoutError() { for ( Allele allele : getAlleles() ) if ( allele.isReference() ) @@ -336,14 +469,23 @@ public class VariantContext extends AttributedObject { return getNAlleles() == 2; } + /** + * @return The number of segregating alleles in this context + */ public int getNAlleles() { return alleles.size(); } + /** + * @return The allele sharing the same bases as this String. A convenience method; better to use byte[] + */ public Allele getAllele(String allele) { return getAllele(allele.getBytes()); } + /** + * @return The allele sharing the same bases as this byte[], or null if no such allele is present. + */ public Allele getAllele(byte[] allele) { for ( Allele a : getAlleles() ) { if ( a.basesMatch(allele) ) { @@ -354,6 +496,9 @@ public class VariantContext extends AttributedObject { return null; // couldn't find anything } + /** + * @return True if this context contains Allele allele, or false otherwise + */ public boolean hasAllele(Allele allele) { for ( Allele a : getAlleles() ) { if ( a.equals(allele) ) @@ -389,6 +534,11 @@ public class VariantContext extends AttributedObject { return altAlleles; } + /** + * @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele) + * @return the ith non-reference allele in this context + * @throws IllegalArgumentException if i is invalid + */ public Allele getAlternateAllele(int i) { int n = 0; @@ -400,18 +550,28 @@ public class VariantContext extends AttributedObject { throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this); } - + /** + * Sets the alleles segregating in this context to the collect of alleles. Each of which must be unique according + * to equals() in Allele. Validate() should be called when you are done modifying the context. + * + * @param alleles + */ public void setAlleles(Collection alleles) { this.alleles.clear(); for ( Allele a : alleles ) addAllele(a); } + /** + * Adds allele to the segregating allele list in this context to the collection of alleles. The new + * allele must be be unique according to equals() in Allele. + * Validate() should be called when you are done modifying the context. + * + * @param allele + */ public void addAllele(Allele allele) { - addAllele(allele, false); - } + final boolean allowDuplicates = false; // used to be a parameter - public void addAllele(Allele allele, boolean allowDuplicates) { type = Type.UNDETERMINED; for ( Allele a : alleles ) { @@ -431,21 +591,35 @@ public class VariantContext extends AttributedObject { // --------------------------------------------------------------------------------------------------------- /** - * @return true if the context represents variants with associated genotypes + * @return true if the context has associated genotypes */ public boolean hasGenotypes() { return genotypes.size() > 0; } - public boolean hasSingleSample() { return genotypes.size() == 1; } - /** * @return set of all Genotypes associated with this context */ public Map getGenotypes() { return genotypes; } + /** + * Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map + * for consistency with the multi-get function. + * + * @param sampleName + * @return + * @throws IllegalArgumentException if sampleName isn't bound to a genotype + */ public Map getGenotypes(String sampleName) { return getGenotypes(Arrays.asList(sampleName)); } + /** + * Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map + * for consistency with the multi-get function. + * + * @param sampleNames a unique list of sample names + * @return + * @throws IllegalArgumentException if sampleName isn't bound to a genotype + */ public Map getGenotypes(Collection sampleNames) { HashMap map = new HashMap(); @@ -464,6 +638,20 @@ public class VariantContext extends AttributedObject { return getGenotypes().keySet(); } + /** + * @param sample the sample name + * + * @return the Genotype associated with the given sample in this context or null if the sample is not in this context + */ + public Genotype getGenotype(String sample) { + return getGenotypes().get(sample); + } + + public boolean hasGenotype(String sample) { + return getGenotypes().containsKey(sample); + } + + /** * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS * @@ -496,7 +684,8 @@ public class VariantContext extends AttributedObject { } /** - * These are genotype-specific functions + * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this + * site? That is, is the number of alternate alleles among all fo the genotype == 0? * * @return */ @@ -504,65 +693,106 @@ public class VariantContext extends AttributedObject { return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); } + /** + * Genotype-specific functions -- are the genotypes polymorphic w.r.t. to the alleles segregating at this + * site? That is, is the number of alternate alleles among all fo the genotype > 0? + * + * @return + */ public boolean isPolymorphic() { return ! isMonomorphic(); } + public void clearGenotypes() { + this.genotypes.clear(); + } /** - * @param sample the sample name - * - * @return the Genotype associated with the given sample in this context or null if the sample is not in this context + * Adds this single genotype to the context, not allowing duplicate genotypes to be added + * @param genotype */ - public Genotype getGenotype(String sample) { - return getGenotypes().get(sample); + public void addGenotypes(Genotype genotype) { + putGenotype(genotype.getSampleName(), genotype, false); } - public boolean hasGenotype(String sample) { - return getGenotypes().containsKey(sample); - } - - public void setGenotypes(Genotype genotype) { - this.genotypes.clear(); - addGenotype(genotype); - } - - public void setGenotypes(Collection genotypes) { - this.genotypes.clear(); - + /** + * Adds these genotypes to the context, not allowing duplicate genotypes to be added + * @param genotypes + */ + public void addGenotypes(Collection genotypes) { for ( Genotype g : genotypes ) { - addGenotype(g.getSampleName(), g); + addGenotype(g); } } - public void setGenotypes(Map genotypes) { - this.genotypes.clear(); + /** + * Adds these genotype to the context, not allowing duplicate genotypes to be added. + * @param genotypes + */ + public void addGenotypes(Map genotypes) { for ( Map.Entry elt : genotypes.entrySet() ) { - addGenotype(elt.getKey(), elt.getValue()); + addGenotype(elt.getValue()); } } - public void addGenotypes(Map genotypes) { + /** + * Adds these genotypes to the context. + * + * @param genotypes + */ + public void putGenotypes(Map genotypes) { for ( Map.Entry g : genotypes.entrySet() ) - addGenotype(g.getKey(), g.getValue()); + putGenotype(g.getKey(), g.getValue()); } - - public void addGenotypes(Collection genotypes) { + /** + * Adds these genotypes to the context. + * + * @param genotypes + */ + public void putGenotypes(Collection genotypes) { for ( Genotype g : genotypes ) - addGenotype(g); + putGenotype(g); } + /** + * Adds this genotype to the context, throwing an error if it's already bound. + * + * @param genotype + */ public void addGenotype(Genotype genotype) { - addGenotype(genotype.getSampleName(), genotype, false); + addGenotype(genotype.getSampleName(), genotype); } + /** + * Adds this genotype to the context, throwing an error if it's already bound. + * + * @param genotype + */ public void addGenotype(String sampleName, Genotype genotype) { - addGenotype(sampleName, genotype, false); + putGenotype(sampleName, genotype, false); } - public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) { + /** + * Adds this genotype to the context. + * + * @param genotype + */ + public void putGenotype(Genotype genotype) { + putGenotype(genotype.getSampleName(), genotype); + } + + /** + * Adds this genotype to the context. + * + * @param genotype + */ + public void putGenotype(String sampleName, Genotype genotype) { + putGenotype(sampleName, genotype, true); + } + + private void putGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) { if ( hasGenotype(sampleName) && ! allowOverwrites ) throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this); @@ -572,6 +802,10 @@ public class VariantContext extends AttributedObject { this.genotypes.put(sampleName, genotype); } + /** + * Removes the binding from sampleName to genotype. If this doesn't exist, throws an IllegalArgumentException + * @param sampleName + */ public void removeGenotype(String sampleName) { if ( ! this.genotypes.containsKey(sampleName) ) throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes); @@ -579,6 +813,10 @@ public class VariantContext extends AttributedObject { this.genotypes.remove(sampleName); } + /** + * Removes genotype from the context. If this doesn't exist, throws an IllegalArgumentException + * @param genotype + */ public void removeGenotype(Genotype genotype) { removeGenotype(genotype.getSampleName()); } @@ -598,7 +836,7 @@ public class VariantContext extends AttributedObject { return validate(true); } - public boolean validate(boolean throwException) { + private boolean validate(boolean throwException) { try { validateAlleles(); validateGenotypes(); @@ -713,25 +951,4 @@ public class VariantContext extends AttributedObject { return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s", getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values()); } - - // todo -- move to utils - /** - * @param allele the allele to be queried - * - * @return the frequency of the given allele in this context - */ -// public double getAlleleFrequency(Allele allele) { -// int alleleCount = 0; -// int totalCount = 0; -// -// for ( Genotype g : getGenotypes().values() ) { -// for ( Allele a : g.getAlleles() ) { -// totalCount++; -// if ( allele.equals(a) ) -// alleleCount++; -// } -// } -// -// return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount; -// } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java index 4a19432a0..3d8fddf96 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java @@ -89,11 +89,15 @@ public class VariantContextAdaptors { double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? AttributedObject.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError(); Genotype g = new Genotype(vc, alleleStrings, vcfG.getSampleName(), pError); + for ( Map.Entry e : vcfG.getFields().entrySet() ) { - if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) + if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) g.putAttribute(e.getKey(), e.getValue()); } + if ( vcfG.isFiltered() ) // setup the FL genotype filter fields + g.setFilters(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY.split(";")))); + vc.addGenotype(g); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java index b7ca0bfeb..629b4fe15 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java @@ -39,6 +39,10 @@ public class VariantContextUtils { return exps; } + // todo -- add generalize matching routine here + // todo -- should file in all fields (loc, filter, etc) for selection + // todo -- genotypes should be sampleNAME.field -> value bindings + private static final String UNIQUIFIED_SUFFIX = ".unique"; /** diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index 814b02542..56096ba65 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -19,7 +19,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { public static final String GENOTYPE_QUALITY_KEY = "GQ"; public static final String DEPTH_KEY = "DP"; public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; - public static final String FILTER_KEY = "FT"; + public static final String GENOTYPE_FILTER_KEY = "FT"; public static final String OLD_DEPTH_KEY = "RD"; // the values for empty fields @@ -194,7 +194,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { } public boolean isFiltered() { - return ( mFields.get(FILTER_KEY) != null && ! mFields.get(FILTER_KEY).equals("0")); + return ( mFields.get(GENOTYPE_FILTER_KEY) != null && ! mFields.get(GENOTYPE_FILTER_KEY).equals("0")); } public int getPloidy() { @@ -291,7 +291,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked { result = String.valueOf(MISSING_GENOTYPE_QUALITY); else if ( field.equals(DEPTH_KEY) ) result = String.valueOf(MISSING_DEPTH); - else if ( field.equals(FILTER_KEY) ) + else if ( field.equals(GENOTYPE_FILTER_KEY) ) result = UNFILTERED; // TODO -- support haplotype quality //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java index 27a92dd8b..f4e30e8eb 100755 --- a/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java +++ b/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java @@ -15,8 +15,6 @@ import org.junit.BeforeClass; import java.util.Arrays; import java.util.List; -import java.util.Set; -import java.util.Collection; import java.io.FileNotFoundException; import java.io.File;