Documentation for VariantContext. Please read it and start using it.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2756 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-02-01 17:49:51 +00:00
parent 990af3f76e
commit d9671dffba
7 changed files with 466 additions and 141 deletions

View File

@ -20,7 +20,7 @@ import java.util.Arrays;
*
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
* 1 base deletion of C -> { C , - } -> C is the reference allele
* 1 base insertion of A -> { - ; A } -> NULL is the reference allele
* 1 base insertion of A -> { - ; A } -> Null is the reference allele
*
* Suppose I see a the following in the population:
*
@ -61,12 +61,16 @@ import java.util.Arrays;
* A / C @ loc => SNP with
* - / A => INDEL
*
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion.
*
* Alelle also supports is concept of a NO_CALL allele. This Allele represents a haplotype that couldn't be
* determined. This is usually represented by a '.' allele.
*
* Note that Alleles store all bases as bytes, in **UPPER CASE**. So 'atc' == 'ATC' from the perspective of an
* Allele.
*/
public class Allele {
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
// private static final byte[] NULL_ALLELE_BASES = new byte[0];
// private static final byte[] NO_CALL_ALLELE_BASES = ".".getBytes();
private boolean isRef = false;
private boolean isNull = false;
@ -74,8 +78,17 @@ public class Allele {
private byte[] bases = null;
/** A generic static NO_CALL allele for use */
public final static Allele NO_CALL = new Allele(".");
/**
* Create a new Allele that includes bases and if tagged as the reference allele if isRef == true. If bases
* == '-', a Null allele is created. If bases == '.', a no call Allele is created.
*
* @param bases the DNA sequence of this variation, '-', of '.'
* @param isRef should we make this a reference allele?
* @throws IllegalArgumentException if bases contains illegal characters or is otherwise malformated
*/
public Allele(byte[] bases, boolean isRef) {
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
@ -84,8 +97,7 @@ public class Allele {
if ( wouldBeNullAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
isNull = true;
}
if ( wouldBeNoCallAllele(bases) ) {
} else if ( wouldBeNoCallAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
isNoCall = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
@ -100,21 +112,26 @@ public class Allele {
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
}
public final static boolean wouldBeNullAllele(byte[] bases) {
/**
* Do the bases represent the null allele?
*/
public static boolean wouldBeNullAllele(byte[] bases) {
return (bases.length == 1 && bases[0] == '-') || bases.length == 0;
}
public final static boolean wouldBeNoCallAllele(byte[] bases) {
/** Do the bases represent the NO_CALL allele? */
public static boolean wouldBeNoCallAllele(byte[] bases) {
return bases.length == 1 && bases[0] == '.';
}
public final static boolean acceptableAlleleBases(String bases) {
/** Do the bases represent the null allele? */
public static boolean acceptableAlleleBases(String bases) {
return acceptableAlleleBases(bases.getBytes());
}
public final static boolean acceptableAlleleBases(byte[] bases) {
if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0)
/** Can we create an allele from bases, including NO_CALL and Null alleles? */
public static boolean acceptableAlleleBases(byte[] bases) {
if ( wouldBeNullAllele(bases) || wouldBeNoCallAllele(bases) )
return true;
for ( byte b : bases ) {
@ -126,33 +143,52 @@ public class Allele {
return true;
}
/** null allele creation method */
public Allele(boolean isRef) {
this("", isRef);
}
/**
* @see Allele(byte[], boolean)
*
* @param bases
* @param isRef
*/
public Allele(String bases, boolean isRef) {
this(bases.getBytes(), isRef);
}
public Allele() { this(false); }
/**
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
*
* @param bases
*/
public Allele(String bases) { this(bases, false); }
/**
* Creates a non-Ref allele. @see Allele(byte[], boolean) for full information
*
* @param bases
*/
public Allele(byte[] bases) { this(bases, false); }
//
// ---------------------------------------------------------------------------------------------------------
//
// accessor routines
//
//
// ---------------------------------------------------------------------------------------------------------
/** Returns true if this is the null allele */
public boolean isNull() { return isNull; }
/** Returns true if this is not the null allele */
public boolean isNonNull() { return ! isNull(); }
/** Returns true if this is the NO_CALL allele */
public boolean isNoCall() { return isNoCall; }
/** Returns true if this is the not the NO_CALL allele */
public boolean isCalled() { return ! isNoCall(); }
/** Returns true if this Allele is the reference allele */
public boolean isReference() { return isRef; }
/** Returns true if this Allele is not the reference allele */
public boolean isNonReference() { return ! isReference(); }
/** Returns a nice string representation of this object */
public String toString() {
return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : "");
}
@ -174,11 +210,37 @@ public class Allele {
return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases());
}
// todo -- notice case insensitivity
/**
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
* Null and NO_CALL alleles
*
* @param test
* @return
*/
public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); }
/**
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
* Null and NO_CALL alleles
*
* @param test
* @return
*/
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
/**
* Returns true if this Alelle contains the same bases as test, regardless of its reference status. Also handles
* Null and NO_CALL alleles
*
* @param test
* @return
*/
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
/**
* Returns the length of this allele. Null and NO_CALL alleles have 0 length.
* @return
*/
public int length() {
return bases.length;
}

View File

@ -16,6 +16,7 @@ import java.util.*;
public class AttributedObject {
public static final double NO_NEG_LOG_10PERROR = 0.0;
private double negLog10PError = NO_NEG_LOG_10PERROR;
private Set<Object> filters = new HashSet<Object>();
private Map<Object, Object> attributes = new HashMap<Object, Object>();
@ -36,6 +37,45 @@ public class AttributedObject {
}
// ---------------------------------------------------------------------------------------------------------
//
// Filter
//
// ---------------------------------------------------------------------------------------------------------
public Set<Object> getFilters() {
return filters;
}
public boolean isFiltered() {
return filters.size() > 0;
}
public boolean isNotFiltered() {
return ! isFiltered();
}
public void addFilter(Object filter) {
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
filters.add(filter);
}
public void addFilters(Collection<? extends Object> filters) {
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
for ( Object f : filters )
addFilter(f);
}
public void clearFilters() {
filters.clear();
}
public void setFilters(Collection<? extends Object> filters) {
clearFilters();
addFilters(filters);
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with log error rates

View File

@ -6,28 +6,177 @@ import org.broadinstitute.sting.utils.BaseUtils;
import java.util.*;
/**
* @author depristo
* <p/>
* Class VariantContext
* <p/>
*
*
* == High-level overview ==
*
* The VariantContext object is a single general class system for representing genetic variation data composed of:
*
* * Allele: representing single genetic haplotypes (A, T, ATC, -)
* * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus
* * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes
* for multiple individuals containing alleles at that locus
*
* The class system works by defining segregating alleles, creating a variant context representing the segregating
* information at a locus, and potentially creating and associating genotypes with individuals in the context.
*
* All of the classes are highly validating -- call validate() if you modify them -- so you can rely on the
* self-consistency of the data once you have a VariantContext in hand. The system has a rich set of assessor
* and manipulator routines, as well as more complex static support routines in VariantContextUtils.
*
* The VariantContext (and Genotype) objects are attributed (supporting addition of arbitrary key/value pairs) and
* filtered (can represent a variation that is viewed as suspect).
*
* VariantContexts are dynamically typed, so whether a VariantContext is a SNP, Indel, or NoVariant depends
* on the properties of the alleles in the context. See the detailed documentation on the Type parameter below.
*
* It's also easy to create subcontexts based on selected genotypes.
*
* == Working with Variant Contexts ==
* === Some example data ===
*
* Allele A, Aref, T, Tref;
* Allele del, delRef, ATC, ATCref;
*
* A [ref] / T at 10
* GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
*
* - / ATC [ref] from 20-23
* GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
*
* // - [ref] / ATC immediately after 20
* GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
*
* === Alleles ===
*
* See the documentation in the Allele class itself
*
* What are they?
*
* Alleles can be either reference or non-reference
*
* Example alleles used here:
*
* del = new Allele("-");
* A = new Allele("A");
* Aref = new Allele("A", true);
* T = new Allele("T");
* ATC = new Allele("ATC");
*
* === Creating variant contexts ===
*
* ==== By hand ====
*
* Here's an example of a A/T polymorphism with the A being reference:
*
* <pre>
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref, T));
* </pre>
*
* If you want to create a non-variant site, just put in a single reference allele
*
* <pre>
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref));
* </pre>
*
* A deletion is just as easy:
*
* <pre>
* VariantContext vc = new VariantContext(delLoc, Arrays.asList(ATCref, del));
* </pre>
*
* The only 2 things that distinguishes between a insertion and deletion are the reference allele
* and the location of the variation. An insertion has a Null reference allele and at least
* one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after
* a 1-bp GenomeLoc (at say 20).
*
* <pre>
* VariantContext vc = new VariantContext(insLoc, Arrays.asList(delRef, ATC));
* </pre>
*
* ==== Converting rods and other data structures to VCs ====
*
* You can convert many common types into VariantContexts using the general function:
*
* <pre>
* VariantContextAdaptors.convertToVariantContext(myObject)
* </pre>
*
* dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that
* object will be returned. A null return type indicates that the type isn't yet supported. This is the best
* and easiest way to create contexts using RODs.
*
*
* === Working with genotypes ===
*
* <pre>
* List<Allele> alleles = Arrays.asList(Aref, T);
* VariantContext vc = new VariantContext(snpLoc, alleles);
*
* Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
* Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
* Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
* vc.addGenotypes(Arrays.asList(g1, g2, g3));
* </pre>
*
* At this point we have 3 genotypes in our context, g1-g3.
*
* You can assess a good deal of information about the genotypes through the VariantContext:
*
* <pre>
* vc.hasGenotypes()
* vc.isMonomorphic()
* vc.isPolymorphic()
* vc.getSampleNames().size()
*
* vc.getGenotypes()
* vc.getGenotypes().get("g1")
* vc.hasGenotype("g1")
*
* vc.getChromosomeCount()
* vc.getChromosomeCount(Aref)
* vc.getChromosomeCount(T)
* </pre>
*
* === NO_CALL alleles ===
*
* The system allows one to create Genotypes carrying special NO_CALL alleles that aren't present in the
* set of context alleles and that represent undetermined alleles in a genotype:
*
* Genotype g4 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "NO_DATA_FOR_SAMPLE", 10);
*
*
* === subcontexts ===
* It's also very easy get subcontext based only the data in a subset of the genotypes:
*
* <pre>
* VariantContext vc12 = vc.subContextFromGenotypes(Arrays.asList(g1,g2));
* VariantContext vc1 = vc.subContextFromGenotypes(Arrays.asList(g1));
* </pre>
*/
public class VariantContext extends AttributedObject {
/** The location of this VariantContext */
private GenomeLoc loc;
/** The type (cached for performance reasons) of this context */
private Type type = Type.UNDETERMINED;
/** A set of the alleles segregating in this context */
private Set<Allele> alleles = new HashSet<Allele>();
/** A mapping from sampleName -> genotype objects for all genotypes associated with this context */
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
private Set<Object> filters = new HashSet<Object>();
// ---------------------------------------------------------------------------------------------------------
//
// constructors
//
// ---------------------------------------------------------------------------------------------------------
public VariantContext(GenomeLoc loc) {
super();
@ -57,7 +206,7 @@ public class VariantContext extends AttributedObject {
public VariantContext(GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
this(loc);
setAlleles(alleles);
setGenotypes(genotypes);
addGenotypes(genotypes);
validate();
}
@ -66,7 +215,7 @@ public class VariantContext extends AttributedObject {
double negLog10PError, Collection<Object> filters) {
this(loc);
setAlleles(alleles);
setGenotypes(genotypes);
addGenotypes(genotypes);
setAttributes(attributes);
setNegLog10PError(negLog10PError);
setFilters(filters);
@ -78,7 +227,7 @@ public class VariantContext extends AttributedObject {
double negLog10PError, Collection<Object> filters) {
this(loc);
setAlleles(alleles);
setGenotypes(genotypes);
addGenotypes(genotypes);
setAttributes(attributes);
setNegLog10PError(negLog10PError);
setFilters(filters);
@ -93,20 +242,37 @@ public class VariantContext extends AttributedObject {
//
// ---------------------------------------------------------------------------------------------------------
/**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
* genotype and alleles in genotype. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotype
* @return
*/
public VariantContext subContextFromGenotypes(Genotype genotype) {
return subContextFromGenotypes(Arrays.asList(genotype));
}
/**
* Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
* genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
* variant or not.
*
* @param genotypes
* @return
*/
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
// todo -- we should check for uniqueness of genotypes
return subContextFromGenotypes(new HashSet<Genotype>(genotypes), getAttributes());
return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, getAttributes(), getNegLog10PError(), getFilters());
}
public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Map<Object, Object> attributes) {
return new VariantContext(getLocation(), allelesOfGenotypes(genotypes), genotypes, attributes, getNegLog10PError(), getFilters());
}
/** helper routnine for subcontext */
/**
* helper routnine for subcontext
* @param genotypes
* @return
*/
private Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
Set<Allele> alleles = new HashSet<Allele>();
@ -123,45 +289,6 @@ public class VariantContext extends AttributedObject {
return alleles;
}
// ---------------------------------------------------------------------------------------------------------
//
// Filter
//
// ---------------------------------------------------------------------------------------------------------
public Set<Object> getFilters() {
return filters;
}
public boolean isFiltered() {
return filters.size() > 0;
}
public boolean isNotFiltered() {
return ! isFiltered();
}
public void addFilter(Object filter) {
if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this);
if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this);
filters.add(filter);
}
public void addFilters(Collection<? extends Object> filters) {
if ( filters == null ) throw new IllegalArgumentException("BUG: Attempting to add null filters at" + this);
for ( Object f : filters )
addFilter(f);
}
public void clearFilters() {
filters.clear();
}
public void setFilters(Collection<? extends Object> filters) {
clearFilters();
addFilters(filters);
}
// ---------------------------------------------------------------------------------------------------------
//
// type operations
@ -195,6 +322,8 @@ public class VariantContext extends AttributedObject {
* Mixed
* Mix of other classes
*
* Also supports NO_VARIATION type, used to indicate that the site isn't polymorphic in the population
*
*
* Not currently supported:
*
@ -232,9 +361,9 @@ public class VariantContext extends AttributedObject {
}
/**
* convenience method for switching over the allele type
* Determines (if necessary) and returns the type of this variation by examining the alleles it contains.
*
* @return the AlleleType of this allele
* @return the type of this VariantContext
**/
public Type getType() {
if ( type == Type.UNDETERMINED )
@ -255,7 +384,10 @@ public class VariantContext extends AttributedObject {
return BaseUtils.SNPSubstitutionType(getReference().getBases()[0], getAlternateAllele(0).getBases()[0]);
}
/** If this is a BiAlleic SNP, is it a transition? */
public boolean isTransition() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSITION; }
/** If this is a BiAlleic SNP, is it a transversion? */
public boolean isTransversion() { return getSNPSubstitutionType() == BaseUtils.BaseSubstitutionType.TRANSVERSION; }
/**
@ -322,6 +454,7 @@ public class VariantContext extends AttributedObject {
return ref;
}
/** Private helper routine that grabs the reference allele but doesn't through an error if there's no such allele */
private Allele getReferenceWithoutError() {
for ( Allele allele : getAlleles() )
if ( allele.isReference() )
@ -336,14 +469,23 @@ public class VariantContext extends AttributedObject {
return getNAlleles() == 2;
}
/**
* @return The number of segregating alleles in this context
*/
public int getNAlleles() {
return alleles.size();
}
/**
* @return The allele sharing the same bases as this String. A convenience method; better to use byte[]
*/
public Allele getAllele(String allele) {
return getAllele(allele.getBytes());
}
/**
* @return The allele sharing the same bases as this byte[], or null if no such allele is present.
*/
public Allele getAllele(byte[] allele) {
for ( Allele a : getAlleles() ) {
if ( a.basesMatch(allele) ) {
@ -354,6 +496,9 @@ public class VariantContext extends AttributedObject {
return null; // couldn't find anything
}
/**
* @return True if this context contains Allele allele, or false otherwise
*/
public boolean hasAllele(Allele allele) {
for ( Allele a : getAlleles() ) {
if ( a.equals(allele) )
@ -389,6 +534,11 @@ public class VariantContext extends AttributedObject {
return altAlleles;
}
/**
* @param i -- the ith allele (from 0 to n - 2 for a context with n alleles including a reference allele)
* @return the ith non-reference allele in this context
* @throws IllegalArgumentException if i is invalid
*/
public Allele getAlternateAllele(int i) {
int n = 0;
@ -400,18 +550,28 @@ public class VariantContext extends AttributedObject {
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
}
/**
* Sets the alleles segregating in this context to the collect of alleles. Each of which must be unique according
* to equals() in Allele. Validate() should be called when you are done modifying the context.
*
* @param alleles
*/
public void setAlleles(Collection<Allele> alleles) {
this.alleles.clear();
for ( Allele a : alleles )
addAllele(a);
}
/**
* Adds allele to the segregating allele list in this context to the collection of alleles. The new
* allele must be be unique according to equals() in Allele.
* Validate() should be called when you are done modifying the context.
*
* @param allele
*/
public void addAllele(Allele allele) {
addAllele(allele, false);
}
final boolean allowDuplicates = false; // used to be a parameter
public void addAllele(Allele allele, boolean allowDuplicates) {
type = Type.UNDETERMINED;
for ( Allele a : alleles ) {
@ -431,21 +591,35 @@ public class VariantContext extends AttributedObject {
// ---------------------------------------------------------------------------------------------------------
/**
* @return true if the context represents variants with associated genotypes
* @return true if the context has associated genotypes
*/
public boolean hasGenotypes() { return genotypes.size() > 0; }
public boolean hasSingleSample() { return genotypes.size() == 1; }
/**
* @return set of all Genotypes associated with this context
*/
public Map<String, Genotype> getGenotypes() { return genotypes; }
/**
* Returns a map from sampleName -> Genotype for the genotype associated with sampleName. Returns a map
* for consistency with the multi-get function.
*
* @param sampleName
* @return
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public Map<String, Genotype> getGenotypes(String sampleName) {
return getGenotypes(Arrays.asList(sampleName));
}
/**
* Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map
* for consistency with the multi-get function.
*
* @param sampleNames a unique list of sample names
* @return
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public Map<String, Genotype> getGenotypes(Collection<String> sampleNames) {
HashMap<String, Genotype> map = new HashMap<String, Genotype>();
@ -464,6 +638,20 @@ public class VariantContext extends AttributedObject {
return getGenotypes().keySet();
}
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
*/
public Genotype getGenotype(String sample) {
return getGenotypes().get(sample);
}
public boolean hasGenotype(String sample) {
return getGenotypes().containsKey(sample);
}
/**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
*
@ -496,7 +684,8 @@ public class VariantContext extends AttributedObject {
}
/**
* These are genotype-specific functions
* Genotype-specific functions -- are the genotypes monomorphic w.r.t. to the alleles segregating at this
* site? That is, is the number of alternate alleles among all fo the genotype == 0?
*
* @return
*/
@ -504,65 +693,106 @@ public class VariantContext extends AttributedObject {
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount();
}
/**
* Genotype-specific functions -- are the genotypes polymorphic w.r.t. to the alleles segregating at this
* site? That is, is the number of alternate alleles among all fo the genotype > 0?
*
* @return
*/
public boolean isPolymorphic() {
return ! isMonomorphic();
}
public void clearGenotypes() {
this.genotypes.clear();
}
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
* Adds this single genotype to the context, not allowing duplicate genotypes to be added
* @param genotype
*/
public Genotype getGenotype(String sample) {
return getGenotypes().get(sample);
public void addGenotypes(Genotype genotype) {
putGenotype(genotype.getSampleName(), genotype, false);
}
public boolean hasGenotype(String sample) {
return getGenotypes().containsKey(sample);
}
public void setGenotypes(Genotype genotype) {
this.genotypes.clear();
addGenotype(genotype);
}
public void setGenotypes(Collection<Genotype> genotypes) {
this.genotypes.clear();
/**
* Adds these genotypes to the context, not allowing duplicate genotypes to be added
* @param genotypes
*/
public void addGenotypes(Collection<Genotype> genotypes) {
for ( Genotype g : genotypes ) {
addGenotype(g.getSampleName(), g);
addGenotype(g);
}
}
public void setGenotypes(Map<String, Genotype> genotypes) {
this.genotypes.clear();
/**
* Adds these genotype to the context, not allowing duplicate genotypes to be added.
* @param genotypes
*/
public void addGenotypes(Map<String, Genotype> genotypes) {
for ( Map.Entry<String, Genotype> elt : genotypes.entrySet() ) {
addGenotype(elt.getKey(), elt.getValue());
addGenotype(elt.getValue());
}
}
public void addGenotypes(Map<String, Genotype> genotypes) {
/**
* Adds these genotypes to the context.
*
* @param genotypes
*/
public void putGenotypes(Map<String, Genotype> genotypes) {
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() )
addGenotype(g.getKey(), g.getValue());
putGenotype(g.getKey(), g.getValue());
}
public void addGenotypes(Collection<Genotype> genotypes) {
/**
* Adds these genotypes to the context.
*
* @param genotypes
*/
public void putGenotypes(Collection<Genotype> genotypes) {
for ( Genotype g : genotypes )
addGenotype(g);
putGenotype(g);
}
/**
* Adds this genotype to the context, throwing an error if it's already bound.
*
* @param genotype
*/
public void addGenotype(Genotype genotype) {
addGenotype(genotype.getSampleName(), genotype, false);
addGenotype(genotype.getSampleName(), genotype);
}
/**
* Adds this genotype to the context, throwing an error if it's already bound.
*
* @param genotype
*/
public void addGenotype(String sampleName, Genotype genotype) {
addGenotype(sampleName, genotype, false);
putGenotype(sampleName, genotype, false);
}
public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
/**
* Adds this genotype to the context.
*
* @param genotype
*/
public void putGenotype(Genotype genotype) {
putGenotype(genotype.getSampleName(), genotype);
}
/**
* Adds this genotype to the context.
*
* @param genotype
*/
public void putGenotype(String sampleName, Genotype genotype) {
putGenotype(sampleName, genotype, true);
}
private void putGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
if ( hasGenotype(sampleName) && ! allowOverwrites )
throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this);
@ -572,6 +802,10 @@ public class VariantContext extends AttributedObject {
this.genotypes.put(sampleName, genotype);
}
/**
* Removes the binding from sampleName to genotype. If this doesn't exist, throws an IllegalArgumentException
* @param sampleName
*/
public void removeGenotype(String sampleName) {
if ( ! this.genotypes.containsKey(sampleName) )
throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes);
@ -579,6 +813,10 @@ public class VariantContext extends AttributedObject {
this.genotypes.remove(sampleName);
}
/**
* Removes genotype from the context. If this doesn't exist, throws an IllegalArgumentException
* @param genotype
*/
public void removeGenotype(Genotype genotype) {
removeGenotype(genotype.getSampleName());
}
@ -598,7 +836,7 @@ public class VariantContext extends AttributedObject {
return validate(true);
}
public boolean validate(boolean throwException) {
private boolean validate(boolean throwException) {
try {
validateAlleles();
validateGenotypes();
@ -713,25 +951,4 @@ public class VariantContext extends AttributedObject {
return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s",
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values());
}
// todo -- move to utils
/**
* @param allele the allele to be queried
*
* @return the frequency of the given allele in this context
*/
// public double getAlleleFrequency(Allele allele) {
// int alleleCount = 0;
// int totalCount = 0;
//
// for ( Genotype g : getGenotypes().values() ) {
// for ( Allele a : g.getAlleles() ) {
// totalCount++;
// if ( allele.equals(a) )
// alleleCount++;
// }
// }
//
// return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
// }
}

View File

@ -89,11 +89,15 @@ public class VariantContextAdaptors {
double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? AttributedObject.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError();
Genotype g = new Genotype(vc, alleleStrings, vcfG.getSampleName(), pError);
for ( Map.Entry<String, String> e : vcfG.getFields().entrySet() ) {
if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) )
if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) )
g.putAttribute(e.getKey(), e.getValue());
}
if ( vcfG.isFiltered() ) // setup the FL genotype filter fields
g.setFilters(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY.split(";"))));
vc.addGenotype(g);
}

View File

@ -39,6 +39,10 @@ public class VariantContextUtils {
return exps;
}
// todo -- add generalize matching routine here
// todo -- should file in all fields (loc, filter, etc) for selection
// todo -- genotypes should be sampleNAME.field -> value bindings
private static final String UNIQUIFIED_SUFFIX = ".unique";
/**

View File

@ -19,7 +19,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
public static final String GENOTYPE_QUALITY_KEY = "GQ";
public static final String DEPTH_KEY = "DP";
public static final String HAPLOTYPE_QUALITY_KEY = "HQ";
public static final String FILTER_KEY = "FT";
public static final String GENOTYPE_FILTER_KEY = "FT";
public static final String OLD_DEPTH_KEY = "RD";
// the values for empty fields
@ -194,7 +194,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
}
public boolean isFiltered() {
return ( mFields.get(FILTER_KEY) != null && ! mFields.get(FILTER_KEY).equals("0"));
return ( mFields.get(GENOTYPE_FILTER_KEY) != null && ! mFields.get(GENOTYPE_FILTER_KEY).equals("0"));
}
public int getPloidy() {
@ -291,7 +291,7 @@ public class VCFGenotypeRecord implements Genotype, SampleBacked {
result = String.valueOf(MISSING_GENOTYPE_QUALITY);
else if ( field.equals(DEPTH_KEY) )
result = String.valueOf(MISSING_DEPTH);
else if ( field.equals(FILTER_KEY) )
else if ( field.equals(GENOTYPE_FILTER_KEY) )
result = UNFILTERED;
// TODO -- support haplotype quality
//else if ( field.equals(HAPLOTYPE_QUALITY_KEY) )

View File

@ -15,8 +15,6 @@ import org.junit.BeforeClass;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.Collection;
import java.io.FileNotFoundException;
import java.io.File;