Documentation for VariantContext. Please read it and start using it.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2756 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
990af3f76e
commit
d9671dffba
|
|
@ -20,7 +20,7 @@ import java.util.Arrays;
|
|||
*
|
||||
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
|
||||
* 1 base deletion of C -> { C , - } -> C is the reference allele
|
||||
* 1 base insertion of A -> { - ; A } -> NULL is the reference allele
|
||||
* 1 base insertion of A -> { - ; A } -> Null is the reference allele
|
||||
*
|
||||
* Suppose I see a the following in the population:
|
||||
*
|
||||
|
|
@ -61,12 +61,16 @@ import java.util.Arrays;
|
|||
* A / C @ loc => SNP with
|
||||
* - / A => INDEL
|
||||
*
|
||||
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
|
||||
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion.
|
||||
*
|
||||
* Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be
|
||||
* determined. This is usually represented by a '.' allele.
|
||||
*
|
||||
* Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an
|
||||
* Allele.
|
||||
*/
|
||||
public class Allele {
|
||||
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
|
||||
// private static final byte[] NULL_ALLELE_BASES = new byte[0];
|
||||
// private static final byte[] NO_CALL_ALLELE_BASES = ".".getBytes();
|
||||
|
||||
private boolean isRef = false;
|
||||
private boolean isNull = false;
|
||||
|
|
@ -74,8 +78,17 @@ public class Allele {
|
|||
|
||||
private byte[] bases = null;
|
||||
|
||||
/** A generic static NO_CALL allele for use */
|
||||
public final static Allele NO_CALL = new Allele(".");
|
||||
|
||||
/**
|
||||
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
|
||||
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
|
||||
*
|
||||
* @param bases the DNA sequence of this variation, '-', of '.'
|
||||
* @param isRef should we make this a reference allele?
|
||||
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
|
||||
*/
|
||||
public Allele(byte[] bases, boolean isRef) {
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
|
||||
|
|
@ -84,8 +97,7 @@ public class Allele {
|
|||
if ( wouldBeNullAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
isNull = true;
|
||||
}
|
||||
if ( wouldBeNoCallAllele(bases) ) {
|
||||
} else if ( wouldBeNoCallAllele(bases) ) {
|
||||
bases = EMPTY_ALLELE_BASES;
|
||||
isNoCall = true;
|
||||
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
|
||||
|
|
@ -100,21 +112,26 @@ public class Allele {
|
|||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
}
|
||||
|
||||
public final static boolean wouldBeNullAllele(byte[] bases) {
|
||||
/**
|
||||
* Do the bases represent the null allele?
|
||||
*/
|
||||
public static boolean wouldBeNullAllele(byte[] bases) {
|
||||
return (bases.length == 1 && bases[0] == '-') || bases.length == 0;
|
||||
}
|
||||
|
||||
public final static boolean wouldBeNoCallAllele(byte[] bases) {
|
||||
/** Do the bases represent the NO_CALL allele? */
|
||||
public static boolean wouldBeNoCallAllele(byte[] bases) {
|
||||
return bases.length == 1 && bases[0] == '.';
|
||||
}
|
||||
|
||||
|
||||
public final static boolean acceptableAlleleBases(String bases) {
|
||||
/** Do the bases represent the null allele? */
|
||||
public static boolean acceptableAlleleBases(String bases) {
|
||||
return acceptableAlleleBases(bases.getBytes());
|
||||
}
|
||||
|
||||
public final static boolean acceptableAlleleBases(byte[] bases) {
|
||||
if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0)
|
||||
|
||||
/** Can we create an allele from bases, including NO_CALL and Null alleles? */
|
||||
public static boolean acceptableAlleleBases(byte[] bases) {
|
||||
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
|
||||
return true;
|
||||
|
||||
for ( byte b : bases ) {
|
||||
|
|
@ -126,33 +143,52 @@ public class Allele {
|
|||
return true;
|
||||
}
|
||||
|
||||
/** null allele creation method */
|
||||
public Allele(boolean isRef) {
|
||||
this("", isRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see Allele(byte[], boolean)
|
||||
*
|
||||
* @param bases
|
||||
* @param isRef
|
||||
*/
|
||||
public Allele(String bases, boolean isRef) {
|
||||
this(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
public Allele() { this(false); }
|
||||
/**
|
||||
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
|
||||
*
|
||||
* @param bases
|
||||
*/
|
||||
public Allele(String bases) { this(bases, false); }
|
||||
|
||||
/**
|
||||
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
|
||||
*
|
||||
* @param bases
|
||||
*/
|
||||
public Allele(byte[] bases) { this(bases, false); }
|
||||
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// accessor routines
|
||||
//
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/** Returns true if this is the null allele */
|
||||
public boolean isNull() { return isNull; }
|
||||
/** Returns true if this is not the null allele */
|
||||
public boolean isNonNull() { return ! isNull(); }
|
||||
|
||||
/** Returns true if this is the NO_CALL allele */
|
||||
public boolean isNoCall() { return isNoCall; }
|
||||
/** Returns true if this is the not the NO_CALL allele */
|
||||
public boolean isCalled() { return ! isNoCall(); }
|
||||
|
||||
/** Returns true if this Allele is the reference allele */
|
||||
public boolean isReference() { return isRef; }
|
||||
/** Returns true if this Allele is not the reference allele */
|
||||
public boolean isNonReference() { return ! isReference(); }
|
||||
|
||||
/** Returns a nice string representation of this object */
|
||||
public String toString() {
|
||||
return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : "");
|
||||
}
|
||||
|
|
@ -174,11 +210,37 @@ public class Allele {
|
|||
return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases());
|
||||
}
|
||||
|
||||
// todo -- notice case insensitivity
|
||||
/**
|
||||
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
|
||||
* Null and NO_CALL alleles
|
||||
*
|
||||
* @param test
|
||||
* @return
|
||||
*/
|
||||
public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); }
|
||||
|
||||
/**
|
||||
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
|
||||
* Null and NO_CALL alleles
|
||||
*
|
||||
* @param test
|
||||
* @return
|
||||
*/
|
||||
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
|
||||
|
||||
/**
|
||||
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
|
||||
* Null and NO_CALL alleles
|
||||
*
|
||||
* @param test
|
||||
* @return
|
||||
*/
|
||||
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
|
||||
|
||||
/**
|
||||
* Returns the length of this allele. Null and NO_CALL alleles have 0 length.
|
||||
* @return
|
||||
*/
|
||||
public int length() {
|
||||
return bases.length;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import java.util.*;
|
|||
public class AttributedObject {
|
||||
public static final double NO_NEG_LOG_10PERROR = 0.0;
|
||||
private double negLog10PError = NO_NEG_LOG_10PERROR;
|
||||
private Set<Object> filters = new HashSet<Object>();
|
||||
|
||||
private Map<Object, Object> attributes = new HashMap<Object, Object>();
|
||||
|
||||
|
|
@ -36,6 +37,45 @@ public class AttributedObject {
|
|||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Filter
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Set<Object> getFilters() {
|
||||
return filters;
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return filters.size() > 0;
|
||||
}
|
||||
|
||||
public boolean isNotFiltered() {
|
||||
return ! isFiltered();
|
||||
}
|
||||
|
||||
public void addFilter(Object filter) {
|
||||
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
|
||||
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
|
||||
filters.add(filter);
|
||||
}
|
||||
|
||||
public void addFilters(Collection<? extends Object> filters) {
|
||||
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
|
||||
for ( Object f : filters )
|
||||
addFilter(f);
|
||||
}
|
||||
|
||||
public void clearFilters() {
|
||||
filters.clear();
|
||||
}
|
||||
|
||||
public void setFilters(Collection<? extends Object> filters) {
|
||||
clearFilters();
|
||||
addFilters(filters);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with log error rates
|
||||
|
|
|
|||
|
|
@ -6,28 +6,177 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author depristo
|
||||
* <p/>
|
||||
* Class VariantContext
|
||||
* <p/>
|
||||
*
|
||||
*
|
||||
* == High-level overview ==
|
||||
*
|
||||
* The VariantContext object is a single general class system for representing genetic variation data composed of:
|
||||
*
|
||||
* * Allele: representing single genetic haplotypes (A, T, ATC, -)
|
||||
* * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus
|
||||
* * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes
|
||||
* for multiple individuals containing alleles at that locus
|
||||
*
|
||||
* The class system works by defining segregating alleles, creating a variant context representing the segregating
|
||||
* information at a locus, and potentially creating and associating genotypes with individuals in the context.
|
||||
*
|
||||
* All of the classes are highly validating -- call validate() if you modify them -- so you can rely on the
|
||||
* self-consistency of the data once you have a VariantContext in hand. The system has a rich set of assessor
|
||||
* and manipulator routines, as well as more complex static support routines in VariantContextUtils.
|
||||
*
|
||||
* The VariantContext (and Genotype) objects are attributed (supporting addition of arbitrary key/value pairs) and
|
||||
* filtered (can represent a variation that is viewed as suspect).
|
||||
*
|
||||
* VariantContexts are dynamically typed, so whether a VariantContext is a SNP, Indel, or NoVariant depends
|
||||
* on the properties of the alleles in the context. See the detailed documentation on the Type parameter below.
|
||||
*
|
||||
* It's also easy to create subcontexts based on selected genotypes.
|
||||
*
|
||||
* == Working with Variant Contexts ==
|
||||
* === Some example data ===
|
||||
*
|
||||
* Allele A, Aref, T, Tref;
|
||||
* Allele del, delRef, ATC, ATCref;
|
||||
*
|
||||
* A [ref] / T at 10
|
||||
* GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
|
||||
*
|
||||
* - / ATC [ref] from 20-23
|
||||
* GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
|
||||
*
|
||||
* // - [ref] / ATC immediately after 20
|
||||
* GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
|
||||
*
|
||||
* === Alleles ===
|
||||
*
|
||||
* See the documentation in the Allele class itself
|
||||
*
|
||||
* What are they?
|
||||
*
|
||||
* Alleles can be either reference or non-reference
|
||||
*
|
||||
* Example alleles used here:
|
||||
*
|
||||
* del = new Allele("-");
|
||||
* A = new Allele("A");
|
||||
* Aref = new Allele("A", true);
|
||||
* T = new Allele("T");
|
||||
* ATC = new Allele("ATC");
|
||||
*
|
||||
* === Creating variant contexts ===
|
||||
*
|
||||
* ==== By hand ====
|
||||
*
|
||||
* Here's an example of a A/T polymorphism with the A being reference:
|
||||
*
|
||||
* <pre>
|
||||
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref, T));
|
||||
* </pre>
|
||||
*
|
||||
* If you want to create a non-variant site, just put in a single reference allele
|
||||
*
|
||||
* <pre>
|
||||
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref));
|
||||
* </pre>
|
||||
*
|
||||
* A deletion is just as easy:
|
||||
*
|
||||
* <pre>
|
||||
* VariantContext vc = new VariantContext(delLoc, Arrays.asList(ATCref, del));
|
||||
* </pre>
|
||||
*
|
||||
* The only 2 things that distinguishes between a insertion and deletion are the reference allele
|
||||
* and the location of the variation. An insertion has a Null reference allele and at least
|
||||
* one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after
|
||||
* a 1-bp GenomeLoc (at say 20).
|
||||
*
|
||||
* <pre>
|
||||
* VariantContext vc = new VariantContext(insLoc, Arrays.asList(delRef, ATC));
|
||||
* </pre>
|
||||
*
|
||||
* ==== Converting rods and other data structures to VCs ====
|
||||
*
|
||||
* You can convert many common types into VariantContexts using the general function:
|
||||
*
|
||||
* <pre>
|
||||
* VariantContextAdaptors.convertToVariantContext(myObject)
|
||||
* </pre>
|
||||
*
|
||||
* dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that
|
||||
* object will be returned. A null return type indicates that the type isn't yet supported. This is the best
|
||||
* and easiest way to create contexts using RODs.
|
||||
*
|
||||
*
|
||||
* === Working with genotypes ===
|
||||
*
|
||||
* <pre>
|
||||
* List<Allele> alleles = Arrays.asList(Aref, T);
|
||||
* VariantContext vc = new VariantContext(snpLoc, alleles);
|
||||
*
|
||||
* Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
|
||||
* Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
|
||||
* Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
|
||||
* vc.addGenotypes(Arrays.asList(g1, g2, g3));
|
||||
* </pre>
|
||||
*
|
||||
* At this point we have 3 genotypes in our context, g1-g3.
|
||||
*
|
||||
* You can assess a good deal of information about the genotypes through the VariantContext:
|
||||
*
|
||||
* <pre>
|
||||
* vc.hasGenotypes()
|
||||
* vc.isMonomorphic()
|
||||
* vc.isPolymorphic()
|
||||
* vc.getSampleNames().size()
|
||||
*
|
||||
* vc.getGenotypes()
|
||||
* vc.getGenotypes().get("g1")
|
||||
* vc.hasGenotype("g1")
|
||||
*
|
||||
* vc.getChromosomeCount()
|
||||
* vc.getChromosomeCount(Aref)
|
||||
* vc.getChromosomeCount(T)
|
||||
* </pre>
|
||||
*
|
||||
* === NO_CALL alleles ===
|
||||
*
|
||||
* The system allows one to create Genotypes carrying special NO_CALL alleles that aren't present in the
|
||||
* set of context alleles and that represent undetermined alleles in a genotype:
|
||||
*
|
||||
* Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "NO_DATA_FOR_SAMPLE", 10);
|
||||
*
|
||||
*
|
||||
* === subcontexts ===
|
||||
* It's also very easy get subcontext based only the data in a subset of the genotypes:
|
||||
*
|
||||
* <pre>
|
||||
* VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
|
||||
* VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
|
||||
* </pre>
|
||||
*/
|
||||
public class VariantContext extends AttributedObject {
|
||||
/** The location of this VariantContext */
|
||||
private GenomeLoc loc;
|
||||
|
||||
/** The type (cached for performance reasons) of this context */
|
||||
private Type type = Type.UNDETERMINED;
|
||||
|
||||
/** A set of the alleles segregating in this context */
|
||||
private Set<Allele> alleles = new HashSet<Allele>();
|
||||
|
||||
/** A mapping from sampleName -> genotype objects for all genotypes associated with this context */
|
||||
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
|
||||
private Set<Object> filters = new HashSet<Object>();
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public VariantContext(GenomeLoc loc) {
|
||||
super();
|
||||
|
||||
|
|
@ -57,7 +206,7 @@ public class VariantContext extends AttributedObject {
|
|||
public VariantContext(GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
|
||||
this(loc);
|
||||
setAlleles(alleles);
|
||||
setGenotypes(genotypes);
|
||||
addGenotypes(genotypes);
|
||||
validate();
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +215,7 @@ public class VariantContext extends AttributedObject {
|
|||
double negLog10PError, Collection<Object> filters) {
|
||||
this(loc);
|
||||
setAlleles(alleles);
|
||||
setGenotypes(genotypes);
|
||||
addGenotypes(genotypes);
|
||||
setAttributes(attributes);
|
||||
setNegLog10PError(negLog10PError);
|
||||
setFilters(filters);
|
||||
|
|
@ -78,7 +227,7 @@ public class VariantContext extends AttributedObject {
|
|||
double negLog10PError, Collection<Object> filters) {
|
||||
this(loc);
|
||||
setAlleles(alleles);
|
||||
setGenotypes(genotypes);
|
||||
addGenotypes(genotypes);
|
||||
setAttributes(attributes);
|
||||
setNegLog10PError(negLog10PError);
|
||||
setFilters(filters);
|
||||
|
|
@ -93,20 +242,37 @@ public class VariantContext extends AttributedObject {
|
|||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
|
||||
* genotype and alleles in genotype. This is the right way to test if a single genotype is actually
|
||||
* variant or not.
|
||||
*
|
||||
* @param genotype
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Genotype genotype) {
|
||||
return subContextFromGenotypes(Arrays.asList(genotype));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
|
||||
* genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
|
||||
* variant or not.
|
||||
*
|
||||
* @param genotypes
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
|
||||
// todo -- we should check for uniqueness of genotypes
|
||||
return subContextFromGenotypes(new HashSet<Genotype>(genotypes), getAttributes());
|
||||
return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, getAttributes(), getNegLog10PError(), getFilters());
|
||||
}
|
||||
|
||||
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Map<Object, Object> attributes) {
|
||||
return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, attributes, getNegLog10PError(), getFilters());
|
||||
}
|
||||
|
||||
/** helper routnine for subcontext */
|
||||
/**
|
||||
* helper routnine for subcontext
|
||||
* @param genotypes
|
||||
* @return
|
||||
*/
|
||||
private Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
|
||||
Set<Allele> alleles = new HashSet<Allele>();
|
||||
|
||||
|
|
@ -123,45 +289,6 @@ public class VariantContext extends AttributedObject {
|
|||
return alleles;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Filter
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public Set<Object> getFilters() {
|
||||
return filters;
|
||||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return filters.size() > 0;
|
||||
}
|
||||
|
||||
public boolean isNotFiltered() {
|
||||
return ! isFiltered();
|
||||
}
|
||||
|
||||
public void addFilter(Object filter) {
|
||||
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
|
||||
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
|
||||
filters.add(filter);
|
||||
}
|
||||
|
||||
public void addFilters(Collection<? extends Object> filters) {
|
||||
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
|
||||
for ( Object f : filters )
|
||||
addFilter(f);
|
||||
}
|
||||
|
||||
public void clearFilters() {
|
||||
filters.clear();
|
||||
}
|
||||
|
||||
public void setFilters(Collection<? extends Object> filters) {
|
||||
clearFilters();
|
||||
addFilters(filters);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// type operations
|
||||
|
|
@ -195,6 +322,8 @@ public class VariantContext extends AttributedObject {
|
|||
* Mixed
|
||||
* Mix of other classes
|
||||
*
|
||||
* Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population
|
||||
*
|
||||
*
|
||||
* Not currently supported:
|
||||
*
|
||||
|
|
@ -232,9 +361,9 @@ public class VariantContext extends AttributedObject {
|
|||
}
|
||||
|
||||
/**
|
||||
* convenience method for switching over the allele type
|
||||
* Determines (if necessary) and returns the type of this variation by examining the alleles it contains.
|
||||
*
|
||||
* @return the AlleleType of this allele
|
||||
* @return the type of this VariantContext
|
||||
**/
|
||||
public Type getType() {
|
||||
if ( type == Type.UNDETERMINED )
|
||||
|
|
@ -255,7 +384,10 @@ public class VariantContext extends AttributedObject {
|
|||
return BaseUtils.SNPSubstitutionType(getReference().getBases()[0], getAlternateAllele(0).getBases()[0]);
|
||||
}
|
||||
|
||||
/** If this is a BiAlleic SNP, is it a transition? */
|
||||
public boolean isTransition() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSITION; }
|
||||
|
||||
/** If this is a BiAlleic SNP, is it a transversion? */
|
||||
public boolean isTransversion() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSVERSION; }
|
||||
|
||||
/**
|
||||
|
|
@ -322,6 +454,7 @@ public class VariantContext extends AttributedObject {
|
|||
return ref;
|
||||
}
|
||||
|
||||
/** Private helper routine that grabs the reference allele but doesn't through an error if there's no such allele */
|
||||
private Allele getReferenceWithoutError() {
|
||||
for ( Allele allele : getAlleles() )
|
||||
if ( allele.isReference() )
|
||||
|
|
@ -336,14 +469,23 @@ public class VariantContext extends AttributedObject {
|
|||
return getNAlleles() == 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The number of segregating alleles in this context
|
||||
*/
|
||||
public int getNAlleles() {
|
||||
return alleles.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The allele sharing the same bases as this String. A convenience method; better to use byte[]
|
||||
*/
|
||||
public Allele getAllele(String allele) {
|
||||
return getAllele(allele.getBytes());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The allele sharing the same bases as this byte[], or null if no such allele is present.
|
||||
*/
|
||||
public Allele getAllele(byte[] allele) {
|
||||
for ( Allele a : getAlleles() ) {
|
||||
if ( a.basesMatch(allele) ) {
|
||||
|
|
@ -354,6 +496,9 @@ public class VariantContext extends AttributedObject {
|
|||
return null; // couldn't find anything
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if this context contains Allele allele, or false otherwise
|
||||
*/
|
||||
public boolean hasAllele(Allele allele) {
|
||||
for ( Allele a : getAlleles() ) {
|
||||
if ( a.equals(allele) )
|
||||
|
|
@ -389,6 +534,11 @@ public class VariantContext extends AttributedObject {
|
|||
return altAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele)
|
||||
* @return the ith non-reference allele in this context
|
||||
* @throws IllegalArgumentException if i is invalid
|
||||
*/
|
||||
public Allele getAlternateAllele(int i) {
|
||||
int n = 0;
|
||||
|
||||
|
|
@ -400,18 +550,28 @@ public class VariantContext extends AttributedObject {
|
|||
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the alleles segregating in this context to the collect of alleles. Each of which must be unique according
|
||||
* to equals() in Allele. Validate() should be called when you are done modifying the context.
|
||||
*
|
||||
* @param alleles
|
||||
*/
|
||||
public void setAlleles(Collection<Allele> alleles) {
|
||||
this.alleles.clear();
|
||||
for ( Allele a : alleles )
|
||||
addAllele(a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds allele to the segregating allele list in this context to the collection of alleles. The new
|
||||
* allele must be be unique according to equals() in Allele.
|
||||
* Validate() should be called when you are done modifying the context.
|
||||
*
|
||||
* @param allele
|
||||
*/
|
||||
public void addAllele(Allele allele) {
|
||||
addAllele(allele, false);
|
||||
}
|
||||
final boolean allowDuplicates = false; // used to be a parameter
|
||||
|
||||
public void addAllele(Allele allele, boolean allowDuplicates) {
|
||||
type = Type.UNDETERMINED;
|
||||
|
||||
for ( Allele a : alleles ) {
|
||||
|
|
@ -431,21 +591,35 @@ public class VariantContext extends AttributedObject {
|
|||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return true if the context represents variants with associated genotypes
|
||||
* @return true if the context has associated genotypes
|
||||
*/
|
||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||
|
||||
public boolean hasSingleSample() { return genotypes.size() == 1; }
|
||||
|
||||
/**
|
||||
* @return set of all Genotypes associated with this context
|
||||
*/
|
||||
public Map<String, Genotype> getGenotypes() { return genotypes; }
|
||||
|
||||
/**
|
||||
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
|
||||
* for consistency with the multi-get function.
|
||||
*
|
||||
* @param sampleName
|
||||
* @return
|
||||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||
*/
|
||||
public Map<String, Genotype> getGenotypes(String sampleName) {
|
||||
return getGenotypes(Arrays.asList(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map
|
||||
* for consistency with the multi-get function.
|
||||
*
|
||||
* @param sampleNames a unique list of sample names
|
||||
* @return
|
||||
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
|
||||
*/
|
||||
public Map<String, Genotype> getGenotypes(Collection<String> sampleNames) {
|
||||
HashMap<String, Genotype> map = new HashMap<String, Genotype>();
|
||||
|
||||
|
|
@ -464,6 +638,20 @@ public class VariantContext extends AttributedObject {
|
|||
return getGenotypes().keySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
||||
*/
|
||||
public Genotype getGenotype(String sample) {
|
||||
return getGenotypes().get(sample);
|
||||
}
|
||||
|
||||
public boolean hasGenotype(String sample) {
|
||||
return getGenotypes().containsKey(sample);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
|
||||
*
|
||||
|
|
@ -496,7 +684,8 @@ public class VariantContext extends AttributedObject {
|
|||
}
|
||||
|
||||
/**
|
||||
* These are genotype-specific functions
|
||||
* Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this
|
||||
* site? That is, is the number of alternate alleles among all fo the genotype == 0?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
|
|
@ -504,65 +693,106 @@ public class VariantContext extends AttributedObject {
|
|||
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Genotype-specific functions -- are the genotypes polymorphic w.r.t. to the alleles segregating at this
|
||||
* site? That is, is the number of alternate alleles among all fo the genotype > 0?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isPolymorphic() {
|
||||
return ! isMonomorphic();
|
||||
}
|
||||
|
||||
public void clearGenotypes() {
|
||||
this.genotypes.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
||||
* Adds this single genotype to the context, not allowing duplicate genotypes to be added
|
||||
* @param genotype
|
||||
*/
|
||||
public Genotype getGenotype(String sample) {
|
||||
return getGenotypes().get(sample);
|
||||
public void addGenotypes(Genotype genotype) {
|
||||
putGenotype(genotype.getSampleName(), genotype, false);
|
||||
}
|
||||
|
||||
public boolean hasGenotype(String sample) {
|
||||
return getGenotypes().containsKey(sample);
|
||||
}
|
||||
|
||||
public void setGenotypes(Genotype genotype) {
|
||||
this.genotypes.clear();
|
||||
addGenotype(genotype);
|
||||
}
|
||||
|
||||
public void setGenotypes(Collection<Genotype> genotypes) {
|
||||
this.genotypes.clear();
|
||||
|
||||
/**
|
||||
* Adds these genotypes to the context, not allowing duplicate genotypes to be added
|
||||
* @param genotypes
|
||||
*/
|
||||
public void addGenotypes(Collection<Genotype> genotypes) {
|
||||
for ( Genotype g : genotypes ) {
|
||||
addGenotype(g.getSampleName(), g);
|
||||
addGenotype(g);
|
||||
}
|
||||
}
|
||||
|
||||
public void setGenotypes(Map<String, Genotype> genotypes) {
|
||||
this.genotypes.clear();
|
||||
/**
|
||||
* Adds these genotype to the context, not allowing duplicate genotypes to be added.
|
||||
* @param genotypes
|
||||
*/
|
||||
public void addGenotypes(Map<String, Genotype> genotypes) {
|
||||
|
||||
for ( Map.Entry<String, Genotype> elt : genotypes.entrySet() ) {
|
||||
addGenotype(elt.getKey(), elt.getValue());
|
||||
addGenotype(elt.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void addGenotypes(Map<String, Genotype> genotypes) {
|
||||
/**
|
||||
* Adds these genotypes to the context.
|
||||
*
|
||||
* @param genotypes
|
||||
*/
|
||||
public void putGenotypes(Map<String, Genotype> genotypes) {
|
||||
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() )
|
||||
addGenotype(g.getKey(), g.getValue());
|
||||
putGenotype(g.getKey(), g.getValue());
|
||||
}
|
||||
|
||||
|
||||
public void addGenotypes(Collection<Genotype> genotypes) {
|
||||
/**
|
||||
* Adds these genotypes to the context.
|
||||
*
|
||||
* @param genotypes
|
||||
*/
|
||||
public void putGenotypes(Collection<Genotype> genotypes) {
|
||||
for ( Genotype g : genotypes )
|
||||
addGenotype(g);
|
||||
putGenotype(g);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context, throwing an error if it's already bound.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void addGenotype(Genotype genotype) {
|
||||
addGenotype(genotype.getSampleName(), genotype, false);
|
||||
addGenotype(genotype.getSampleName(), genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context, throwing an error if it's already bound.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void addGenotype(String sampleName, Genotype genotype) {
|
||||
addGenotype(sampleName, genotype, false);
|
||||
putGenotype(sampleName, genotype, false);
|
||||
}
|
||||
|
||||
public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
|
||||
/**
|
||||
* Adds this genotype to the context.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void putGenotype(Genotype genotype) {
|
||||
putGenotype(genotype.getSampleName(), genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds this genotype to the context.
|
||||
*
|
||||
* @param genotype
|
||||
*/
|
||||
public void putGenotype(String sampleName, Genotype genotype) {
|
||||
putGenotype(sampleName, genotype, true);
|
||||
}
|
||||
|
||||
private void putGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
|
||||
if ( hasGenotype(sampleName) && ! allowOverwrites )
|
||||
throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this);
|
||||
|
||||
|
|
@ -572,6 +802,10 @@ public class VariantContext extends AttributedObject {
|
|||
this.genotypes.put(sampleName, genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes the binding from sampleName to genotype. If this doesn't exist, throws an IllegalArgumentException
|
||||
* @param sampleName
|
||||
*/
|
||||
public void removeGenotype(String sampleName) {
|
||||
if ( ! this.genotypes.containsKey(sampleName) )
|
||||
throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes);
|
||||
|
|
@ -579,6 +813,10 @@ public class VariantContext extends AttributedObject {
|
|||
this.genotypes.remove(sampleName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes genotype from the context. If this doesn't exist, throws an IllegalArgumentException
|
||||
* @param genotype
|
||||
*/
|
||||
public void removeGenotype(Genotype genotype) {
|
||||
removeGenotype(genotype.getSampleName());
|
||||
}
|
||||
|
|
@ -598,7 +836,7 @@ public class VariantContext extends AttributedObject {
|
|||
return validate(true);
|
||||
}
|
||||
|
||||
public boolean validate(boolean throwException) {
|
||||
private boolean validate(boolean throwException) {
|
||||
try {
|
||||
validateAlleles();
|
||||
validateGenotypes();
|
||||
|
|
@ -713,25 +951,4 @@ public class VariantContext extends AttributedObject {
|
|||
return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s",
|
||||
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values());
|
||||
}
|
||||
|
||||
// todo -- move to utils
|
||||
/**
|
||||
* @param allele the allele to be queried
|
||||
*
|
||||
* @return the frequency of the given allele in this context
|
||||
*/
|
||||
// public double getAlleleFrequency(Allele allele) {
|
||||
// int alleleCount = 0;
|
||||
// int totalCount = 0;
|
||||
//
|
||||
// for ( Genotype g : getGenotypes().values() ) {
|
||||
// for ( Allele a : g.getAlleles() ) {
|
||||
// totalCount++;
|
||||
// if ( allele.equals(a) )
|
||||
// alleleCount++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
|
||||
// }
|
||||
}
|
||||
|
|
@ -89,11 +89,15 @@ public class VariantContextAdaptors {
|
|||
|
||||
double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? AttributedObject.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError();
|
||||
Genotype g = new Genotype(vc, alleleStrings, vcfG.getSampleName(), pError);
|
||||
|
||||
for ( Map.Entry<String, String> e : vcfG.getFields().entrySet() ) {
|
||||
if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) )
|
||||
if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) )
|
||||
g.putAttribute(e.getKey(), e.getValue());
|
||||
}
|
||||
|
||||
if ( vcfG.isFiltered() ) // setup the FL genotype filter fields
|
||||
g.setFilters(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY.split(";"))));
|
||||
|
||||
vc.addGenotype(g);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,10 @@ public class VariantContextUtils {
|
|||
return exps;
|
||||
}
|
||||
|
||||
// todo -- add generalize matching routine here
|
||||
// todo -- should file in all fields (loc, filter, etc) for selection
|
||||
// todo -- genotypes should be sampleNAME.field -> value bindings
|
||||
|
||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
public static final String GENOTYPE_QUALITY_KEY = "GQ";
|
||||
public static final String DEPTH_KEY = "DP";
|
||||
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
|
||||
public static final String FILTER_KEY = "FT";
|
||||
public static final String GENOTYPE_FILTER_KEY = "FT";
|
||||
public static final String OLD_DEPTH_KEY = "RD";
|
||||
|
||||
// the values for empty fields
|
||||
|
|
@ -194,7 +194,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
}
|
||||
|
||||
public boolean isFiltered() {
|
||||
return ( mFields.get(FILTER_KEY) != null && ! mFields.get(FILTER_KEY).equals("0"));
|
||||
return ( mFields.get(GENOTYPE_FILTER_KEY) != null && ! mFields.get(GENOTYPE_FILTER_KEY).equals("0"));
|
||||
}
|
||||
|
||||
public int getPloidy() {
|
||||
|
|
@ -291,7 +291,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
|
|||
result = String.valueOf(MISSING_GENOTYPE_QUALITY);
|
||||
else if ( field.equals(DEPTH_KEY) )
|
||||
result = String.valueOf(MISSING_DEPTH);
|
||||
else if ( field.equals(FILTER_KEY) )
|
||||
else if ( field.equals(GENOTYPE_FILTER_KEY) )
|
||||
result = UNFILTERED;
|
||||
// TODO -- support haplotype quality
|
||||
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )
|
||||
|
|
|
|||
|
|
@ -15,8 +15,6 @@ import org.junit.BeforeClass;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.Collection;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue