Documentation for VariantContext. Please read it and start using it.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2756 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-02-01 17:49:51 +00:00
parent 990af3f76e
commit d9671dffba
7 changed files with 466 additions and 141 deletions

View File

@ -20,7 +20,7 @@ import java.util.Arrays;
* *
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele * SNP polymorphism of C/G -> { C , G } -> C is the reference allele
* 1 base deletion of C -> { C , - } -> C is the reference allele * 1 base deletion of C -> { C , - } -> C is the reference allele
* 1 base insertion of A -> { - ; A } -> NULL is the reference allele * 1 base insertion of A -> { - ; A } -> Null is the reference allele
* *
* Suppose I see a the following in the population: * Suppose I see a the following in the population:
* *
@ -61,12 +61,16 @@ import java.util.Arrays;
* A / C @ loc => SNP with * A / C @ loc => SNP with
* - / A => INDEL * - / A => INDEL
* *
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion * If you know where allele is the reference, you can determine whether the variant is an insertion or deletion.
*
* Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be
* determined. This is usually represented by a '.' allele.
*
* Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an
* Allele.
*/ */
public class Allele { public class Allele {
private static final byte[] EMPTY_ALLELE_BASES = new byte[0]; private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
// private static final byte[] NULL_ALLELE_BASES = new byte[0];
// private static final byte[] NO_CALL_ALLELE_BASES = ".".getBytes();
private boolean isRef = false; private boolean isRef = false;
private boolean isNull = false; private boolean isNull = false;
@ -74,8 +78,17 @@ public class Allele {
private byte[] bases = null; private byte[] bases = null;
/** A generic static NO_CALL allele for use */
public final static Allele NO_CALL = new Allele("."); public final static Allele NO_CALL = new Allele(".");
/**
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
*
* @param bases the DNA sequence of this variation, '-', of '.'
* @param isRef should we make this a reference allele?
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
*/
public Allele(byte[] bases, boolean isRef) { public Allele(byte[] bases, boolean isRef) {
if ( bases == null ) if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele"); throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
@ -84,8 +97,7 @@ public class Allele {
if ( wouldBeNullAllele(bases) ) { if ( wouldBeNullAllele(bases) ) {
bases = EMPTY_ALLELE_BASES; bases = EMPTY_ALLELE_BASES;
isNull = true; isNull = true;
} } else if ( wouldBeNoCallAllele(bases) ) {
if ( wouldBeNoCallAllele(bases) ) {
bases = EMPTY_ALLELE_BASES; bases = EMPTY_ALLELE_BASES;
isNoCall = true; isNoCall = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele"); if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
@ -100,21 +112,26 @@ public class Allele {
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases)); throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
} }
public final static boolean wouldBeNullAllele(byte[] bases) { /**
* Do the bases represent the null allele?
*/
public static boolean wouldBeNullAllele(byte[] bases) {
return (bases.length == 1 && bases[0] == '-') || bases.length == 0; return (bases.length == 1 && bases[0] == '-') || bases.length == 0;
} }
public final static boolean wouldBeNoCallAllele(byte[] bases) { /** Do the bases represent the NO_CALL allele? */
public static boolean wouldBeNoCallAllele(byte[] bases) {
return bases.length == 1 && bases[0] == '.'; return bases.length == 1 && bases[0] == '.';
} }
/** Do the bases represent the null allele? */
public final static boolean acceptableAlleleBases(String bases) { public static boolean acceptableAlleleBases(String bases) {
return acceptableAlleleBases(bases.getBytes()); return acceptableAlleleBases(bases.getBytes());
} }
public final static boolean acceptableAlleleBases(byte[] bases) { /** Can we create an allele from bases, including NO_CALL and Null alleles? */
if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0) public static boolean acceptableAlleleBases(byte[] bases) {
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
return true; return true;
for ( byte b : bases ) { for ( byte b : bases ) {
@ -126,33 +143,52 @@ public class Allele {
return true; return true;
} }
/** null allele creation method */ /**
public Allele(boolean isRef) { * @see Allele(byte[], boolean)
this("", isRef); *
} * @param bases
* @param isRef
*/
public Allele(String bases, boolean isRef) { public Allele(String bases, boolean isRef) {
this(bases.getBytes(), isRef); this(bases.getBytes(), isRef);
} }
public Allele() { this(false); } /**
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
*
* @param bases
*/
public Allele(String bases) { this(bases, false); } public Allele(String bases) { this(bases, false); }
/**
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
*
* @param bases
*/
public Allele(byte[] bases) { this(bases, false); } public Allele(byte[] bases) { this(bases, false); }
// // ---------------------------------------------------------------------------------------------------------
// //
// accessor routines // accessor routines
// //
// // ---------------------------------------------------------------------------------------------------------
/** Returns true if this is the null allele */
public boolean isNull() { return isNull; } public boolean isNull() { return isNull; }
/** Returns true if this is not the null allele */
public boolean isNonNull() { return ! isNull(); } public boolean isNonNull() { return ! isNull(); }
/** Returns true if this is the NO_CALL allele */
public boolean isNoCall() { return isNoCall; } public boolean isNoCall() { return isNoCall; }
/** Returns true if this is the not the NO_CALL allele */
public boolean isCalled() { return ! isNoCall(); } public boolean isCalled() { return ! isNoCall(); }
/** Returns true if this Allele is the reference allele */
public boolean isReference() { return isRef; } public boolean isReference() { return isRef; }
/** Returns true if this Allele is not the reference allele */
public boolean isNonReference() { return ! isReference(); } public boolean isNonReference() { return ! isReference(); }
/** Returns a nice string representation of this object */
public String toString() { public String toString() {
return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : ""); return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : "");
} }
@ -174,11 +210,37 @@ public class Allele {
return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases()); return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases());
} }
// todo -- notice case insensitivity /**
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
* Null and NO_CALL alleles
*
* @param test
* @return
*/
public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); } public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); }
/**
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
* Null and NO_CALL alleles
*
* @param test
* @return
*/
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
/**
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
* Null and NO_CALL alleles
*
* @param test
* @return
*/
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
/**
* Returns the length of this allele. Null and NO_CALL alleles have 0 length.
* @return
*/
public int length() { public int length() {
return bases.length; return bases.length;
} }

View File

@ -16,6 +16,7 @@ import java.util.*;
public class AttributedObject { public class AttributedObject {
public static final double NO_NEG_LOG_10PERROR = 0.0; public static final double NO_NEG_LOG_10PERROR = 0.0;
private double negLog10PError = NO_NEG_LOG_10PERROR; private double negLog10PError = NO_NEG_LOG_10PERROR;
private Set<Object> filters = new HashSet<Object>();
private Map<Object, Object> attributes = new HashMap<Object, Object>(); private Map<Object, Object> attributes = new HashMap<Object, Object>();
@ -36,6 +37,45 @@ public class AttributedObject {
} }
// ---------------------------------------------------------------------------------------------------------
//
// Filter
//
// ---------------------------------------------------------------------------------------------------------
public Set<Object> getFilters() {
return filters;
}
public boolean isFiltered() {
return filters.size() > 0;
}
public boolean isNotFiltered() {
return ! isFiltered();
}
public void addFilter(Object filter) {
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
filters.add(filter);
}
public void addFilters(Collection<? extends Object> filters) {
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
for ( Object f : filters )
addFilter(f);
}
public void clearFilters() {
filters.clear();
}
public void setFilters(Collection<? extends Object> filters) {
clearFilters();
addFilters(filters);
}
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
// Working with log error rates // Working with log error rates

View File

@ -6,28 +6,177 @@ import org.broadinstitute.sting.utils.BaseUtils;
import java.util.*; import java.util.*;
/** /**
* @author depristo * @author depristo
* <p/> * <p/>
* Class VariantContext * Class VariantContext
* <p/> * <p/>
* *
* * == High-level overview ==
*
* The VariantContext object is a single general class system for representing genetic variation data composed of:
*
* * Allele: representing single genetic haplotypes (A, T, ATC, -)
* * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus
* * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes
* for multiple individuals containing alleles at that locus
*
* The class system works by defining segregating alleles, creating a variant context representing the segregating
* information at a locus, and potentially creating and associating genotypes with individuals in the context.
*
* All of the classes are highly validating -- call validate() if you modify them -- so you can rely on the
* self-consistency of the data once you have a VariantContext in hand. The system has a rich set of assessor
* and manipulator routines, as well as more complex static support routines in VariantContextUtils.
*
* The VariantContext (and Genotype) objects are attributed (supporting addition of arbitrary key/value pairs) and
* filtered (can represent a variation that is viewed as suspect).
*
* VariantContexts are dynamically typed, so whether a VariantContext is a SNP, Indel, or NoVariant depends
* on the properties of the alleles in the context. See the detailed documentation on the Type parameter below.
*
* It's also easy to create subcontexts based on selected genotypes.
*
* == Working with Variant Contexts ==
* === Some example data ===
*
* Allele A, Aref, T, Tref;
* Allele del, delRef, ATC, ATCref;
*
* A [ref] / T at 10
* GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
*
* - / ATC [ref] from 20-23
* GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
*
* // - [ref] / ATC immediately after 20
* GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
*
* === Alleles ===
*
* See the documentation in the Allele class itself
*
* What are they?
*
* Alleles can be either reference or non-reference
*
* Example alleles used here:
*
* del = new Allele("-");
* A = new Allele("A");
* Aref = new Allele("A", true);
* T = new Allele("T");
* ATC = new Allele("ATC");
*
* === Creating variant contexts ===
*
* ==== By hand ====
*
* Here's an example of a A/T polymorphism with the A being reference:
*
* <pre>
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref, T));
* </pre>
*
* If you want to create a non-variant site, just put in a single reference allele
*
* <pre>
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref));
* </pre>
*
* A deletion is just as easy:
*
* <pre>
* VariantContext vc = new VariantContext(delLoc, Arrays.asList(ATCref, del));
* </pre>
*
* The only 2 things that distinguishes between a insertion and deletion are the reference allele
* and the location of the variation. An insertion has a Null reference allele and at least
* one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after
* a 1-bp GenomeLoc (at say 20).
*
* <pre>
* VariantContext vc = new VariantContext(insLoc, Arrays.asList(delRef, ATC));
* </pre>
*
* ==== Converting rods and other data structures to VCs ====
*
* You can convert many common types into VariantContexts using the general function:
*
* <pre>
* VariantContextAdaptors.convertToVariantContext(myObject)
* </pre>
*
* dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that
* object will be returned. A null return type indicates that the type isn't yet supported. This is the best
* and easiest way to create contexts using RODs.
*
*
* === Working with genotypes ===
*
* <pre>
* List<Allele> alleles = Arrays.asList(Aref, T);
* VariantContext vc = new VariantContext(snpLoc, alleles);
*
* Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
* Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
* Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
* vc.addGenotypes(Arrays.asList(g1, g2, g3));
* </pre>
*
* At this point we have 3 genotypes in our context, g1-g3.
*
* You can assess a good deal of information about the genotypes through the VariantContext:
*
* <pre>
* vc.hasGenotypes()
* vc.isMonomorphic()
* vc.isPolymorphic()
* vc.getSampleNames().size()
*
* vc.getGenotypes()
* vc.getGenotypes().get("g1")
* vc.hasGenotype("g1")
*
* vc.getChromosomeCount()
* vc.getChromosomeCount(Aref)
* vc.getChromosomeCount(T)
* </pre>
*
* === NO_CALL alleles ===
*
* The system allows one to create Genotypes carrying special NO_CALL alleles that aren't present in the
* set of context alleles and that represent undetermined alleles in a genotype:
*
* Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "NO_DATA_FOR_SAMPLE", 10);
*
*
* === subcontexts ===
* It's also very easy get subcontext based only the data in a subset of the genotypes:
*
* <pre>
* VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
* VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
* </pre>
*/ */
public class VariantContext extends AttributedObject { public class VariantContext extends AttributedObject {
/** The location of this VariantContext */
private GenomeLoc loc; private GenomeLoc loc;
/** The type (cached for performance reasons) of this context */
private Type type = Type.UNDETERMINED; private Type type = Type.UNDETERMINED;
/** A set of the alleles segregating in this context */
private Set<Allele> alleles = new HashSet<Allele>(); private Set<Allele> alleles = new HashSet<Allele>();
/** A mapping from sampleName -> genotype objects for all genotypes associated with this context */
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>(); private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
private Set<Object> filters = new HashSet<Object>();
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
// constructors // constructors
// //
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
public VariantContext(GenomeLoc loc) { public VariantContext(GenomeLoc loc) {
super(); super();
@ -57,7 +206,7 @@ public class VariantContext extends AttributedObject {
public VariantContext(GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) { public VariantContext(GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
this(loc); this(loc);
setAlleles(alleles); setAlleles(alleles);
setGenotypes(genotypes); addGenotypes(genotypes);
validate(); validate();
} }
@ -66,7 +215,7 @@ public class VariantContext extends AttributedObject {
double negLog10PError, Collection<Object> filters) { double negLog10PError, Collection<Object> filters) {
this(loc); this(loc);
setAlleles(alleles); setAlleles(alleles);
setGenotypes(genotypes); addGenotypes(genotypes);
setAttributes(attributes); setAttributes(attributes);
setNegLog10PError(negLog10PError); setNegLog10PError(negLog10PError);
setFilters(filters); setFilters(filters);
@ -78,7 +227,7 @@ public class VariantContext extends AttributedObject {
double negLog10PError, Collection<Object> filters) { double negLog10PError, Collection<Object> filters) {
this(loc); this(loc);
setAlleles(alleles); setAlleles(alleles);
setGenotypes(genotypes); addGenotypes(genotypes);
setAttributes(attributes); setAttributes(attributes);
setNegLog10PError(negLog10PError); setNegLog10PError(negLog10PError);
setFilters(filters); setFilters(filters);
@ -93,20 +242,37 @@ public class VariantContext extends AttributedObject {
// //
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
/**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
* genotype and alleles in genotype. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotype
* @return
*/
public VariantContext subContextFromGenotypes(Genotype genotype) { public VariantContext subContextFromGenotypes(Genotype genotype) {
return subContextFromGenotypes(Arrays.asList(genotype)); return subContextFromGenotypes(Arrays.asList(genotype));
} }
/**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
* genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotypes
* @return
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) { public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
// todo -- we should check for uniqueness of genotypes // todo -- we should check for uniqueness of genotypes
return subContextFromGenotypes(new HashSet<Genotype>(genotypes), getAttributes()); return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, getAttributes(), getNegLog10PError(), getFilters());
} }
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Map<Object, Object> attributes) { /**
return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, attributes, getNegLog10PError(), getFilters()); * helper routnine for subcontext
} * @param genotypes
* @return
/** helper routnine for subcontext */ */
private Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) { private Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
Set<Allele> alleles = new HashSet<Allele>(); Set<Allele> alleles = new HashSet<Allele>();
@ -123,45 +289,6 @@ public class VariantContext extends AttributedObject {
return alleles; return alleles;
} }
// ---------------------------------------------------------------------------------------------------------
//
// Filter
//
// ---------------------------------------------------------------------------------------------------------
public Set<Object> getFilters() {
return filters;
}
public boolean isFiltered() {
return filters.size() > 0;
}
public boolean isNotFiltered() {
return ! isFiltered();
}
public void addFilter(Object filter) {
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
filters.add(filter);
}
public void addFilters(Collection<? extends Object> filters) {
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
for ( Object f : filters )
addFilter(f);
}
public void clearFilters() {
filters.clear();
}
public void setFilters(Collection<? extends Object> filters) {
clearFilters();
addFilters(filters);
}
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// //
// type operations // type operations
@ -195,6 +322,8 @@ public class VariantContext extends AttributedObject {
* Mixed * Mixed
* Mix of other classes * Mix of other classes
* *
* Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population
*
* *
* Not currently supported: * Not currently supported:
* *
@ -232,9 +361,9 @@ public class VariantContext extends AttributedObject {
} }
/** /**
* convenience method for switching over the allele type * Determines (if necessary) and returns the type of this variation by examining the alleles it contains.
* *
* @return the AlleleType of this allele * @return the type of this VariantContext
**/ **/
public Type getType() { public Type getType() {
if ( type == Type.UNDETERMINED ) if ( type == Type.UNDETERMINED )
@ -255,7 +384,10 @@ public class VariantContext extends AttributedObject {
return BaseUtils.SNPSubstitutionType(getReference().getBases()[0], getAlternateAllele(0).getBases()[0]); return BaseUtils.SNPSubstitutionType(getReference().getBases()[0], getAlternateAllele(0).getBases()[0]);
} }
/** If this is a BiAlleic SNP, is it a transition? */
public boolean isTransition() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSITION; } public boolean isTransition() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSITION; }
/** If this is a BiAlleic SNP, is it a transversion? */
public boolean isTransversion() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSVERSION; } public boolean isTransversion() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSVERSION; }
/** /**
@ -322,6 +454,7 @@ public class VariantContext extends AttributedObject {
return ref; return ref;
} }
/** Private helper routine that grabs the reference allele but doesn't through an error if there's no such allele */
private Allele getReferenceWithoutError() { private Allele getReferenceWithoutError() {
for ( Allele allele : getAlleles() ) for ( Allele allele : getAlleles() )
if ( allele.isReference() ) if ( allele.isReference() )
@ -336,14 +469,23 @@ public class VariantContext extends AttributedObject {
return getNAlleles() == 2; return getNAlleles() == 2;
} }
/**
* @return The number of segregating alleles in this context
*/
public int getNAlleles() { public int getNAlleles() {
return alleles.size(); return alleles.size();
} }
/**
* @return The allele sharing the same bases as this String. A convenience method; better to use byte[]
*/
public Allele getAllele(String allele) { public Allele getAllele(String allele) {
return getAllele(allele.getBytes()); return getAllele(allele.getBytes());
} }
/**
* @return The allele sharing the same bases as this byte[], or null if no such allele is present.
*/
public Allele getAllele(byte[] allele) { public Allele getAllele(byte[] allele) {
for ( Allele a : getAlleles() ) { for ( Allele a : getAlleles() ) {
if ( a.basesMatch(allele) ) { if ( a.basesMatch(allele) ) {
@ -354,6 +496,9 @@ public class VariantContext extends AttributedObject {
return null; // couldn't find anything return null; // couldn't find anything
} }
/**
* @return True if this context contains Allele allele, or false otherwise
*/
public boolean hasAllele(Allele allele) { public boolean hasAllele(Allele allele) {
for ( Allele a : getAlleles() ) { for ( Allele a : getAlleles() ) {
if ( a.equals(allele) ) if ( a.equals(allele) )
@ -389,6 +534,11 @@ public class VariantContext extends AttributedObject {
return altAlleles; return altAlleles;
} }
/**
* @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele)
* @return the ith non-reference allele in this context
* @throws IllegalArgumentException if i is invalid
*/
public Allele getAlternateAllele(int i) { public Allele getAlternateAllele(int i) {
int n = 0; int n = 0;
@ -400,18 +550,28 @@ public class VariantContext extends AttributedObject {
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this); throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
} }
/**
* Sets the alleles segregating in this context to the collect of alleles. Each of which must be unique according
* to equals() in Allele. Validate() should be called when you are done modifying the context.
*
* @param alleles
*/
public void setAlleles(Collection<Allele> alleles) { public void setAlleles(Collection<Allele> alleles) {
this.alleles.clear(); this.alleles.clear();
for ( Allele a : alleles ) for ( Allele a : alleles )
addAllele(a); addAllele(a);
} }
/**
* Adds allele to the segregating allele list in this context to the collection of alleles. The new
* allele must be be unique according to equals() in Allele.
* Validate() should be called when you are done modifying the context.
*
* @param allele
*/
public void addAllele(Allele allele) { public void addAllele(Allele allele) {
addAllele(allele, false); final boolean allowDuplicates = false; // used to be a parameter
}
public void addAllele(Allele allele, boolean allowDuplicates) {
type = Type.UNDETERMINED; type = Type.UNDETERMINED;
for ( Allele a : alleles ) { for ( Allele a : alleles ) {
@ -431,21 +591,35 @@ public class VariantContext extends AttributedObject {
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
/** /**
* @return true if the context represents variants with associated genotypes * @return true if the context has associated genotypes
*/ */
public boolean hasGenotypes() { return genotypes.size() > 0; } public boolean hasGenotypes() { return genotypes.size() > 0; }
public boolean hasSingleSample() { return genotypes.size() == 1; }
/** /**
* @return set of all Genotypes associated with this context * @return set of all Genotypes associated with this context
*/ */
public Map<String, Genotype> getGenotypes() { return genotypes; } public Map<String, Genotype> getGenotypes() { return genotypes; }
/**
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
* for consistency with the multi-get function.
*
* @param sampleName
* @return
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public Map<String, Genotype> getGenotypes(String sampleName) { public Map<String, Genotype> getGenotypes(String sampleName) {
return getGenotypes(Arrays.asList(sampleName)); return getGenotypes(Arrays.asList(sampleName));
} }
/**
* Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map
* for consistency with the multi-get function.
*
* @param sampleNames a unique list of sample names
* @return
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public Map<String, Genotype> getGenotypes(Collection<String> sampleNames) { public Map<String, Genotype> getGenotypes(Collection<String> sampleNames) {
HashMap<String, Genotype> map = new HashMap<String, Genotype>(); HashMap<String, Genotype> map = new HashMap<String, Genotype>();
@ -464,6 +638,20 @@ public class VariantContext extends AttributedObject {
return getGenotypes().keySet(); return getGenotypes().keySet();
} }
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
*/
public Genotype getGenotype(String sample) {
return getGenotypes().get(sample);
}
public boolean hasGenotype(String sample) {
return getGenotypes().containsKey(sample);
}
/** /**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
* *
@ -496,7 +684,8 @@ public class VariantContext extends AttributedObject {
} }
/** /**
* These are genotype-specific functions * Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this
* site? That is, is the number of alternate alleles among all fo the genotype == 0?
* *
* @return * @return
*/ */
@ -504,65 +693,106 @@ public class VariantContext extends AttributedObject {
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount();
} }
/**
* Genotype-specific functions -- are the genotypes polymorphic w.r.t. to the alleles segregating at this
* site? That is, is the number of alternate alleles among all fo the genotype > 0?
*
* @return
*/
public boolean isPolymorphic() { public boolean isPolymorphic() {
return ! isMonomorphic(); return ! isMonomorphic();
} }
public void clearGenotypes() {
this.genotypes.clear();
}
/** /**
* @param sample the sample name * Adds this single genotype to the context, not allowing duplicate genotypes to be added
* * @param genotype
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
*/ */
public Genotype getGenotype(String sample) { public void addGenotypes(Genotype genotype) {
return getGenotypes().get(sample); putGenotype(genotype.getSampleName(), genotype, false);
} }
public boolean hasGenotype(String sample) { /**
return getGenotypes().containsKey(sample); * Adds these genotypes to the context, not allowing duplicate genotypes to be added
} * @param genotypes
*/
public void setGenotypes(Genotype genotype) { public void addGenotypes(Collection<Genotype> genotypes) {
this.genotypes.clear();
addGenotype(genotype);
}
public void setGenotypes(Collection<Genotype> genotypes) {
this.genotypes.clear();
for ( Genotype g : genotypes ) { for ( Genotype g : genotypes ) {
addGenotype(g.getSampleName(), g); addGenotype(g);
} }
} }
public void setGenotypes(Map<String, Genotype> genotypes) { /**
this.genotypes.clear(); * Adds these genotype to the context, not allowing duplicate genotypes to be added.
* @param genotypes
*/
public void addGenotypes(Map<String, Genotype> genotypes) {
for ( Map.Entry<String, Genotype> elt : genotypes.entrySet() ) { for ( Map.Entry<String, Genotype> elt : genotypes.entrySet() ) {
addGenotype(elt.getKey(), elt.getValue()); addGenotype(elt.getValue());
} }
} }
public void addGenotypes(Map<String, Genotype> genotypes) { /**
* Adds these genotypes to the context.
*
* @param genotypes
*/
public void putGenotypes(Map<String, Genotype> genotypes) {
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() ) for ( Map.Entry<String, Genotype> g : genotypes.entrySet() )
addGenotype(g.getKey(), g.getValue()); putGenotype(g.getKey(), g.getValue());
} }
/**
public void addGenotypes(Collection<Genotype> genotypes) { * Adds these genotypes to the context.
*
* @param genotypes
*/
public void putGenotypes(Collection<Genotype> genotypes) {
for ( Genotype g : genotypes ) for ( Genotype g : genotypes )
addGenotype(g); putGenotype(g);
} }
/**
* Adds this genotype to the context, throwing an error if it's already bound.
*
* @param genotype
*/
public void addGenotype(Genotype genotype) { public void addGenotype(Genotype genotype) {
addGenotype(genotype.getSampleName(), genotype, false); addGenotype(genotype.getSampleName(), genotype);
} }
/**
* Adds this genotype to the context, throwing an error if it's already bound.
*
* @param genotype
*/
public void addGenotype(String sampleName, Genotype genotype) { public void addGenotype(String sampleName, Genotype genotype) {
addGenotype(sampleName, genotype, false); putGenotype(sampleName, genotype, false);
} }
public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) { /**
* Adds this genotype to the context.
*
* @param genotype
*/
public void putGenotype(Genotype genotype) {
putGenotype(genotype.getSampleName(), genotype);
}
/**
* Adds this genotype to the context.
*
* @param genotype
*/
public void putGenotype(String sampleName, Genotype genotype) {
putGenotype(sampleName, genotype, true);
}
private void putGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
if ( hasGenotype(sampleName) && ! allowOverwrites ) if ( hasGenotype(sampleName) && ! allowOverwrites )
throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this); throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this);
@ -572,6 +802,10 @@ public class VariantContext extends AttributedObject {
this.genotypes.put(sampleName, genotype); this.genotypes.put(sampleName, genotype);
} }
/**
* Removes the binding from sampleName to genotype. If this doesn't exist, throws an IllegalArgumentException
* @param sampleName
*/
public void removeGenotype(String sampleName) { public void removeGenotype(String sampleName) {
if ( ! this.genotypes.containsKey(sampleName) ) if ( ! this.genotypes.containsKey(sampleName) )
throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes); throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes);
@ -579,6 +813,10 @@ public class VariantContext extends AttributedObject {
this.genotypes.remove(sampleName); this.genotypes.remove(sampleName);
} }
/**
* Removes genotype from the context. If this doesn't exist, throws an IllegalArgumentException
* @param genotype
*/
public void removeGenotype(Genotype genotype) { public void removeGenotype(Genotype genotype) {
removeGenotype(genotype.getSampleName()); removeGenotype(genotype.getSampleName());
} }
@ -598,7 +836,7 @@ public class VariantContext extends AttributedObject {
return validate(true); return validate(true);
} }
public boolean validate(boolean throwException) { private boolean validate(boolean throwException) {
try { try {
validateAlleles(); validateAlleles();
validateGenotypes(); validateGenotypes();
@ -713,25 +951,4 @@ public class VariantContext extends AttributedObject {
return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s", return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s",
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values()); getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values());
} }
// todo -- move to utils
/**
* @param allele the allele to be queried
*
* @return the frequency of the given allele in this context
*/
// public double getAlleleFrequency(Allele allele) {
// int alleleCount = 0;
// int totalCount = 0;
//
// for ( Genotype g : getGenotypes().values() ) {
// for ( Allele a : g.getAlleles() ) {
// totalCount++;
// if ( allele.equals(a) )
// alleleCount++;
// }
// }
//
// return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
// }
} }

View File

@ -89,11 +89,15 @@ public class VariantContextAdaptors {
double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? AttributedObject.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError(); double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? AttributedObject.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError();
Genotype g = new Genotype(vc, alleleStrings, vcfG.getSampleName(), pError); Genotype g = new Genotype(vc, alleleStrings, vcfG.getSampleName(), pError);
for ( Map.Entry<String, String> e : vcfG.getFields().entrySet() ) { for ( Map.Entry<String, String> e : vcfG.getFields().entrySet() ) {
if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) )
g.putAttribute(e.getKey(), e.getValue()); g.putAttribute(e.getKey(), e.getValue());
} }
if ( vcfG.isFiltered() ) // setup the FL genotype filter fields
g.setFilters(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY.split(";"))));
vc.addGenotype(g); vc.addGenotype(g);
} }

View File

@ -39,6 +39,10 @@ public class VariantContextUtils {
return exps; return exps;
} }
// todo -- add generalize matching routine here
// todo -- should file in all fields (loc, filter, etc) for selection
// todo -- genotypes should be sampleNAME.field -> value bindings
private static final String UNIQUIFIED_SUFFIX = ".unique"; private static final String UNIQUIFIED_SUFFIX = ".unique";
/** /**

View File

@ -19,7 +19,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
public static final String GENOTYPE_QUALITY_KEY = "GQ"; public static final String GENOTYPE_QUALITY_KEY = "GQ";
public static final String DEPTH_KEY = "DP"; public static final String DEPTH_KEY = "DP";
public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
public static final String FILTER_KEY = "FT"; public static final String GENOTYPE_FILTER_KEY = "FT";
public static final String OLD_DEPTH_KEY = "RD"; public static final String OLD_DEPTH_KEY = "RD";
// the values for empty fields // the values for empty fields
@ -194,7 +194,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
} }
public boolean isFiltered() { public boolean isFiltered() {
return ( mFields.get(FILTER_KEY) != null && ! mFields.get(FILTER_KEY).equals("0")); return ( mFields.get(GENOTYPE_FILTER_KEY) != null && ! mFields.get(GENOTYPE_FILTER_KEY).equals("0"));
} }
public int getPloidy() { public int getPloidy() {
@ -291,7 +291,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
result = String.valueOf(MISSING_GENOTYPE_QUALITY); result = String.valueOf(MISSING_GENOTYPE_QUALITY);
else if ( field.equals(DEPTH_KEY) ) else if ( field.equals(DEPTH_KEY) )
result = String.valueOf(MISSING_DEPTH); result = String.valueOf(MISSING_DEPTH);
else if ( field.equals(FILTER_KEY) ) else if ( field.equals(GENOTYPE_FILTER_KEY) )
result = UNFILTERED; result = UNFILTERED;
// TODO -- support haplotype quality // TODO -- support haplotype quality
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )

View File

@ -15,8 +15,6 @@ import org.junit.BeforeClass;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Set;
import java.util.Collection;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.File; import java.io.File;