1. Removing an old version of the Genotype interface which is no longer being used. Needed to do this now so that the naming conflicts would cease.
2. Adding a preliminary version of the new Genotype/Allele interface (putting it into refdata/ as the VariantContext really only applies to rods) with updates to VariantContext. This is by no means complete - further updates coming tomorrow. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2533 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a9245a58e2
commit
6c739e30e0
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
||||
import java.util.*;
|
||||
import org.apache.commons.jexl.*;
|
||||
|
|
@ -11,159 +11,305 @@ import org.apache.commons.jexl.*;
|
|||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Interface VariantContext
|
||||
* Class VariantContext
|
||||
* <p/>
|
||||
* This class represents a context that unifies one or more variants
|
||||
*/
|
||||
public interface VariantContext {
|
||||
public class VariantContext {
|
||||
|
||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
private Set<Allele> alleles;
|
||||
|
||||
private Set<Genotype> genotypes;
|
||||
|
||||
private Allele reference;
|
||||
|
||||
private GenomeLoc loc;
|
||||
|
||||
private HashMap<Object, Object> attributes;
|
||||
|
||||
|
||||
public VariantContext(VariationRod rod) {
|
||||
|
||||
// TODO -- VariationRod should eventually require classes to implement toVariationContext()
|
||||
// TODO -- (instead of using a temporary adapter class)
|
||||
|
||||
loc = rod.getLocation();
|
||||
reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
|
||||
|
||||
// TODO -- populate the alleles and genotypes through an adapter
|
||||
alleles = new HashSet<Allele>();
|
||||
genotypes = new HashSet<Genotype>();
|
||||
|
||||
attributes = new HashMap<Object, Object>();
|
||||
}
|
||||
|
||||
protected VariantContext(GenomeLoc loc, Allele reference, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
||||
this.loc = loc;
|
||||
this.reference = reference;
|
||||
this.alleles = new HashSet<Allele>(alleles);
|
||||
this.genotypes = new HashSet<Genotype>(genotypes);
|
||||
this.attributes = new HashMap<Object, Object>(attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
*
|
||||
* throws an exception if there is a collision such that the same sample exists in both contexts
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
public VariantContext merge(VariantContext other) {
|
||||
return merge(other, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
|
||||
* the samples will be uniquified(based on their sources);
|
||||
* otherwise, an exception will be thrown
|
||||
*
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
public VariantContext merge(VariantContext other, boolean uniquifySamples) {
|
||||
if ( !loc.equals(other.getLocation()) )
|
||||
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
|
||||
|
||||
Set<String> samples = getSampleNames();
|
||||
Set<Genotype> Gs = new HashSet<Genotype>(genotypes);
|
||||
|
||||
for ( Genotype g : other.getGenotypes() ) {
|
||||
if ( samples.contains(g.getSample()) ) {
|
||||
if ( uniquifySamples )
|
||||
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
|
||||
else
|
||||
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
|
||||
}
|
||||
Gs.add(g);
|
||||
}
|
||||
|
||||
HashMap<Object, Object> attrs = new HashMap<Object, Object>(attributes);
|
||||
attrs.putAll(other.getAttributes());
|
||||
|
||||
return createNewContext(Gs, attrs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this context
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
/**
|
||||
* @return the reference base or bases for this context, as a string
|
||||
* @return the reference allele for this context
|
||||
*/
|
||||
public String getReference();
|
||||
public Allele getReference() { return reference; }
|
||||
|
||||
/**
|
||||
* @return true if the context is variant (i.e. contains a non-reference allele)
|
||||
*/
|
||||
public boolean isVariant();
|
||||
public boolean isVariant() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context is strictly bi-allelic
|
||||
*/
|
||||
public boolean isBiallelic();
|
||||
public boolean isBiallelic() {
|
||||
return getAlternateAlleles().size() == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context represents point mutations only (i.e. no indels or structural variants)
|
||||
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
|
||||
*/
|
||||
public boolean isPointMutation();
|
||||
public boolean isPointAllele() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() && !allele.isSNP() )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return true if the context contains data for the given sample
|
||||
* @return the set of all sample names in this context
|
||||
*/
|
||||
public boolean hasSample(String sample);
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return true if the context contains data for only a single instance of the given sample
|
||||
*/
|
||||
public boolean hasSingleInstanceOfSample(String sample);
|
||||
|
||||
/**
|
||||
* @return the number of samples in this context.
|
||||
* If a given sample has more than one instance in this context, each one is counted separately
|
||||
*/
|
||||
public int size();
|
||||
|
||||
/**
|
||||
* @return the number of unique samples in this context
|
||||
*/
|
||||
public int numSamples();
|
||||
|
||||
/**
|
||||
* @return the set of all unique sample names in this context
|
||||
*/
|
||||
public Set<String> getSampleNames();
|
||||
public Set<String> getSampleNames() {
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
for ( Genotype g : genotypes )
|
||||
samples.add(g.getSample());
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context represents variants with associated genotypes
|
||||
*/
|
||||
public boolean hasGenotypes();
|
||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||
|
||||
/**
|
||||
* @return set of all Genotypes associated with this context
|
||||
*/
|
||||
public Set<Genotype> getGenotypes();
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return set of all Genotypes associated with the given sample in this context
|
||||
*/
|
||||
public Set<Genotype> getGenotypes(String sample);
|
||||
public Set<Genotype> getGenotypes() { return genotypes; }
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
||||
* throws an exception if multiple instances of the sample exist in the context
|
||||
*/
|
||||
public Genotype getGenotype(String sample);
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return the Variation associated with the given sample in this context or null if the sample is not in this context
|
||||
*/
|
||||
public Variation toVariation(String sample);
|
||||
public Genotype getGenotype(String sample) {
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( g.getSample().equals(sample) )
|
||||
return g;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return set of all subclasses within this context
|
||||
*/
|
||||
public Set<String> getSubclasses();
|
||||
public Set<Object> getSubclasses() {
|
||||
Set<Object> subclasses = new HashSet<Object>();
|
||||
for ( Genotype g : genotypes )
|
||||
subclasses.addAll(g.getAttributes().keySet());
|
||||
return subclasses;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param subclass the name of a subclass of variants to select
|
||||
*
|
||||
* @return a subset of this context which selects based on the given subclass
|
||||
*/
|
||||
public VariantContext select(String subclass);
|
||||
public VariantContext select(String subclass) {
|
||||
HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( g.getAttribute(subclass) != null )
|
||||
Gs.add(g);
|
||||
}
|
||||
return createNewContext(Gs, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param expr a jexl expression describing how to filter this context
|
||||
*
|
||||
* @return a subset of this context which is filtered based on the given expression
|
||||
*/
|
||||
public VariantContext filter(String expr);
|
||||
public VariantContext filter(String expr) {
|
||||
HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
try {
|
||||
Expression filterExpression = ExpressionFactory.createExpression(expr);
|
||||
|
||||
for ( Genotype g : genotypes ) {
|
||||
JexlContext jContext = JexlHelper.createContext();
|
||||
jContext.setVars(g.getAttributes());
|
||||
if ( (Boolean)filterExpression.evaluate(jContext) )
|
||||
Gs.add(g);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new StingException("JEXL error in VariantContext: " + e.getMessage());
|
||||
}
|
||||
|
||||
return createNewContext(Gs, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a set of variant contexts where each one represents (all instances of) a single sample from this context
|
||||
* @return a set of new variant contexts, one for each sample from this context
|
||||
*/
|
||||
public Set<VariantContext> splitBySample();
|
||||
public Set<VariantContext> splitBySample() {
|
||||
Set<VariantContext> contexts = new HashSet<VariantContext>();
|
||||
for ( Genotype g : genotypes ) {
|
||||
HashSet<Genotype> gAsSet = new HashSet<Genotype>();
|
||||
gAsSet.add(g);
|
||||
contexts.add(createNewContext(gAsSet, attributes));
|
||||
}
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the alternate allele frequency of this variant context, if the context is variable.
|
||||
* If this context is not variable or a frequency cannot be provided, this method should return 0.
|
||||
* @param Gs the set of genotypes from which to create a new context
|
||||
* @param attrs the attributes for the new context
|
||||
*
|
||||
* WARNING: This method is valid only for bi-allelic contexts; the contract is to check isBiallelic()
|
||||
* before calling this method
|
||||
*
|
||||
* @return double the minor allele frequency
|
||||
* @return a new context based on the given genotypes
|
||||
*/
|
||||
public double getNonRefAlleleFrequency();
|
||||
private VariantContext createNewContext(Set<Genotype> Gs, HashMap<Object, Object> attrs) {
|
||||
HashSet<Allele> As = new HashSet<Allele>();
|
||||
for ( Genotype g : Gs )
|
||||
As.addAll(g.getAlleles());
|
||||
return new VariantContext(loc, reference, As, Gs, attrs);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
* @param allele the allele to be queried
|
||||
*
|
||||
* @return the postive number space log based error estimate
|
||||
* @return the frequency of the given allele in this context
|
||||
*/
|
||||
public double getNegLog10PError();
|
||||
public double getAlleleFrequency(Allele allele) {
|
||||
|
||||
int alleleCount = 0;
|
||||
int totalCount = 0;
|
||||
|
||||
for ( Genotype g : genotypes ) {
|
||||
for ( Allele a : g.getAlleles() ) {
|
||||
totalCount++;
|
||||
if ( allele.equals(a) )
|
||||
alleleCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all of the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles. If the reference base is not an allele in this varation
|
||||
* it will not be in the list (i.e. there is no guarantee that the reference base is in the list).
|
||||
* Gets the alleles. This method should return all of the alleles present at the location,
|
||||
* including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
* in the set. If the reference is not an allele in this context it will not be included.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
* @return the set of alleles
|
||||
*/
|
||||
public List<String> getAlleles();
|
||||
public Set<Allele> getAlleles() { return alleles; }
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
* Gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
* in the set.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
* @return the set of alternate alleles
|
||||
*/
|
||||
public List<String> getAlternateAlleleList();
|
||||
public Set<Allele> getAlternateAlleles() {
|
||||
HashSet<Allele> altAlleles = new HashSet<Allele>();
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( !allele.equals(reference) )
|
||||
altAlleles.add(allele);
|
||||
}
|
||||
return altAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given attribute
|
||||
*
|
||||
* @param key the attribute key
|
||||
* @param value the attribute value
|
||||
*/
|
||||
public void setAttribute(Object key, Object value) {
|
||||
attributes.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the attribute key
|
||||
*
|
||||
* @return the attribute value for the given key (or null if not set)
|
||||
*/
|
||||
public Object getAttribute(Object key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the attribute map
|
||||
*/
|
||||
public Map<Object, Object> getAttributes() {
|
||||
return attributes;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,79 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class Allele
|
||||
* <p/>
|
||||
* This class emcompasses all the basic information about an allele
|
||||
*/
|
||||
public class Allele {
|
||||
|
||||
private AlleleType type;
|
||||
|
||||
private String bases;
|
||||
|
||||
|
||||
// the types of variants we currently allow
|
||||
public enum AlleleType {
|
||||
REFERENCE, SNP, INSERTION, DELETION, INVERSION
|
||||
}
|
||||
|
||||
public Allele(AlleleType type, String bases) {
|
||||
this.type = type;
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
|
||||
if ( type == AlleleType.DELETION && bases.length() > 0 )
|
||||
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
|
||||
this.bases = bases.toUpperCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* convenience method for switching over the allele type
|
||||
*
|
||||
* @return the AlleleType of this allele
|
||||
**/
|
||||
public AlleleType getType() { return type; }
|
||||
|
||||
/**
|
||||
* convenience method for SNPs
|
||||
*
|
||||
* @return true if this is a SNP, false otherwise
|
||||
*/
|
||||
public boolean isSNP() { return type == AlleleType.SNP; }
|
||||
|
||||
/**
|
||||
* convenience method for variants
|
||||
*
|
||||
* @return true if this is a variant allele, false if it's reference
|
||||
*/
|
||||
public boolean isVariant() { return type != AlleleType.REFERENCE; }
|
||||
|
||||
|
||||
/**
|
||||
* convenience method for indels
|
||||
*
|
||||
* @return true if this is an indel, false otherwise
|
||||
*/
|
||||
public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
|
||||
|
||||
|
||||
/**
|
||||
* For deletions, this method returns an empty String.
|
||||
* For everything else, observed bases for the allele are returned.
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() { return bases; }
|
||||
|
||||
/**
|
||||
* @param other the other allele
|
||||
*
|
||||
* @return true if these alleles are equal
|
||||
*/
|
||||
public boolean equals(Allele other) {
|
||||
return type == other.getType() && bases.equals(other.getBases());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,172 +1,147 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Apr 1, 2009
|
||||
* Time: 11:02:03 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Deprecated
|
||||
public interface Genotype extends Comparable<ReferenceOrderedDatum> {
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// manipulating the SNP information
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
/** Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
GenomeLoc getLocation();
|
||||
|
||||
/** Returns actual observed alleles on the fwd strand. Allowed to return more than two alleles (@see #getPloidy()).
|
||||
* For point genotypes (those that report themselves as isPointGenotype()), the alleles are observed bases. For indels
|
||||
* (i.e. if isIndelGenotype() is true), a no-event allele is represented by an empty string,
|
||||
* while the event allele is represented by '+' (insertion) or '-' (deletion) followed by actual inserted or deleted bases.
|
||||
* @return list of alleles
|
||||
*/
|
||||
List<String> getFWDAlleles() ;
|
||||
|
||||
|
||||
/** Returns bases on the fwd strand in the true reference allele (regardless of whether the reference allele
|
||||
* was observed at this site in this particular genotype!)
|
||||
* as a String. String can be empty (if alt allele is insertion into
|
||||
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
|
||||
* (if alt is a longer deletion). For the actually observed alleles, see #getFWDAlleles()
|
||||
*
|
||||
* @return reference allele, forward strand
|
||||
*/
|
||||
String getFWDRefBases();
|
||||
|
||||
/** Returns reference base (regardless of whether the reference allele was actually observed. This method
|
||||
* is similar to getFWDRefBases() except it returns a single char. For non-point variants (i.e. indels), behavior
|
||||
* of this method is not specified and can be defined by implementing classes.
|
||||
* @return
|
||||
*/
|
||||
char getRef() ;
|
||||
|
||||
/** Returns true if the object represents a point genotype (e.g. reference base vs. a SNP),
|
||||
* regardless of what the actual observation is (see isReference() and isSNP()).
|
||||
* @return
|
||||
*/
|
||||
boolean isPointGenotype() ;
|
||||
|
||||
/** Returns true if the object represents an extended (indel) genotype (e.g. insertion/deletion event vs. no event),
|
||||
* regardless of what the actual observation is (see isReference(), isIndel(), isInsertion(), isDeletion()).
|
||||
* @return
|
||||
*/
|
||||
boolean isIndelGenotype() ;
|
||||
|
||||
|
||||
/** Is this variant a SNP? Returns true only if this is point genotype AND a non-reference base was observed, false otherwise.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
boolean isSNP();
|
||||
|
||||
/** Returns true if all observed alleles are reference ones (only reference base was observed if this is a point genotype,
|
||||
* or no indel events were observed if this is an indel genotype). All is"Variant" methods (where Variant=SNP,Insertion, etc) should
|
||||
* return false at a site where isReference() is true to ensure consistency.
|
||||
* @return
|
||||
*/
|
||||
boolean isReference();
|
||||
|
||||
|
||||
/** Is this variant an insertion? Returns true if this is indel genotype AND only insertion event(s) was observed,
|
||||
* false otherwise. The contract requires isIndel() to return true
|
||||
* whenever isInsertion() method returns true.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
boolean isInsertion();
|
||||
|
||||
|
||||
/** Is this variant a deletion? Returns true if this is indel genotype AND only deletion event(s) was observed,
|
||||
* false otherwise. The contract requires isIndel() to return true
|
||||
* whenever isDeletion() method returns true.
|
||||
*
|
||||
* @return true or false
|
||||
*/
|
||||
boolean isDeletion();
|
||||
|
||||
|
||||
/** Is this variant an insertion or a deletion? The contract requires
|
||||
* this to be true if either isInsertion() or isDeletion() returns true. However,
|
||||
* this method is also allowed to return true even if neither isInsertion()
|
||||
* nor isDeletion() do (e.g. a het genotype with one insertion and one deletion). Always returns
|
||||
* false if the object does not represent indel genotype.
|
||||
* @return
|
||||
*/
|
||||
boolean isIndel();
|
||||
|
||||
|
||||
/** Returns phred-scaled confidence for the variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
double getVariantConfidence();
|
||||
|
||||
/** Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
|
||||
* best vs next-best.
|
||||
* @return
|
||||
*/
|
||||
double getConsensusConfidence();
|
||||
|
||||
|
||||
/** Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
|
||||
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
|
||||
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
|
||||
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
|
||||
* @return
|
||||
*/
|
||||
boolean isBiallelic() ;
|
||||
|
||||
/** Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
boolean isHom() ;
|
||||
|
||||
/** Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
boolean isHet() ;
|
||||
|
||||
/** Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* @return reference base on the forward strand
|
||||
*/
|
||||
//char getRefSnpFWD() throws IllegalStateException;
|
||||
|
||||
/** Returns bases in the alternative allele as a String. String can be empty (as in deletion from
|
||||
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
|
||||
* (for longer indels).
|
||||
*
|
||||
* @return alternative allele, forward strand
|
||||
*/
|
||||
//String getAltBasesFWD();
|
||||
|
||||
/** Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
|
||||
* if variant is not a SNP.
|
||||
*
|
||||
* @return alternative allele base on the forward starnd
|
||||
*/
|
||||
// char getAltSnpFWD() throws IllegalStateException;
|
||||
|
||||
int length();
|
||||
|
||||
|
||||
/** Return actual number of observed alleles (chromosomes) in the genotype.
|
||||
* @return
|
||||
*/
|
||||
// int getPloidy() throws IllegalStateException;
|
||||
|
||||
}
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class Genotype
|
||||
* <p/>
|
||||
* This class emcompasses all the basic information about a genotype
|
||||
*/
|
||||
public class Genotype {
|
||||
|
||||
private List<Allele> alleles;
|
||||
|
||||
private double negLog10PError;
|
||||
|
||||
private String sample;
|
||||
|
||||
private HashMap<Object, Object> attributes;
|
||||
|
||||
|
||||
public Genotype(List<Allele> alleles, String sample, double negLog10PError) {
|
||||
this.alleles = new ArrayList<Allele>(alleles);
|
||||
this.sample = sample;
|
||||
this.negLog10PError = negLog10PError;
|
||||
attributes = new HashMap<Object, Object>();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the alleles for this genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() { return alleles; }
|
||||
|
||||
/**
|
||||
* @return the ploidy of this genotype
|
||||
*/
|
||||
public int getPloidy() { return alleles.size(); }
|
||||
|
||||
/**
|
||||
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*/
|
||||
public boolean isHom() {
|
||||
|
||||
// do not assume ploidy == 2
|
||||
|
||||
if ( alleles.size() == 0 )
|
||||
return true;
|
||||
|
||||
Allele first = alleles.get(0);
|
||||
for ( int i = 1; i < alleles.size(); i++ ) {
|
||||
if ( !first.equals(alleles.get(i)) )
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if we're het (observed alleles differ)
|
||||
*/
|
||||
public boolean isHet() { return !isHom(); }
|
||||
|
||||
/**
|
||||
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
|
||||
*/
|
||||
public boolean isNoCall() {
|
||||
// TODO -- implement me
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if all alleles for this genotype are SNPs or reference
|
||||
*/
|
||||
public boolean isPointGenotype() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() && !allele.isSNP() )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if this is a variant genotype, false if it's reference
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the -1 * log10-based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() { return negLog10PError; }
|
||||
|
||||
/**
|
||||
* @return the sample name
|
||||
*/
|
||||
public String getSample() { return sample; }
|
||||
|
||||
/**
|
||||
* Sets the sample name
|
||||
*
|
||||
* @param sample the sample name
|
||||
*/
|
||||
public void setSample(String sample) {
|
||||
this.sample = sample;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param ref the reference base
|
||||
*
|
||||
* @return this genotype as a variation
|
||||
*/
|
||||
// TODO -- implement me
|
||||
// public Variation toVariation(char ref);
|
||||
|
||||
/**
|
||||
* Sets the given attribute
|
||||
*
|
||||
* @param key the attribute key
|
||||
* @param value the attribute value
|
||||
*/
|
||||
public void setAttribute(Object key, Object value) {
|
||||
attributes.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the attribute key
|
||||
*
|
||||
* @return the attribute value for the given key (or null if not set)
|
||||
*/
|
||||
public Object getAttribute(Object key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the attribute map
|
||||
*/
|
||||
public Map<Object, Object> getAttributes() {
|
||||
return attributes;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
/** A simple wrapper interface that allows for keeping any of the "main" (point mutation) or "associated" (indel)
|
||||
* genotypes, or both, at any given reference position. This is the abstraction of "complete" genotyping information:
|
||||
* one might have or might have not genotyped independently a base at the given position (point) or an indel right after
|
||||
* that position. The query and getter methods allow clients to figure out what pieces of the genotyping information
|
||||
* are actually available and to retrieve them. Behavior of the getters when corresponding queries return false is
|
||||
* not bound by a contract but it is suggested that they return null or a "trivial" (reference) genotype.
|
||||
*
|
||||
* @author asivache
|
||||
*
|
||||
*/
|
||||
|
||||
public interface GenotypeList {
|
||||
/** Returns true if point mutation genotyping information is available at the
|
||||
* current position.
|
||||
* @return
|
||||
*/
|
||||
boolean hasPointGenotype();
|
||||
|
||||
/** Returns true if indel genotyping information is available at the
|
||||
* current position.
|
||||
* @return
|
||||
*/
|
||||
boolean hasIndelGenotype();
|
||||
|
||||
/** Returns point mutation genotyping information if available;
|
||||
* behavior is not specified in the case when there is no such information.
|
||||
* @return
|
||||
*/
|
||||
Genotype getPointGenotype();
|
||||
|
||||
/** Returns indel genotyping information if available;
|
||||
* behavior is not specified in the case when there is no such information.
|
||||
* @return
|
||||
*/
|
||||
Genotype getIndelGenotype();
|
||||
|
||||
/** Returns position on the reference the stored genotyping data are associated with.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
GenomeLoc getLocation();
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ import net.sf.samtools.util.StringUtil;
|
|||
*/
|
||||
|
||||
|
||||
public class SAMPileupRecord implements Genotype, GenotypeList {
|
||||
public class SAMPileupRecord {
|
||||
private static final int NO_VARIANT = -1;
|
||||
private static final int SNP_VARIANT = 0;
|
||||
private static final int INSERTION_VARIANT = 1;
|
||||
|
|
@ -428,13 +428,11 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
*
|
||||
* @return reference allele, forward strand
|
||||
*/
|
||||
@Override
|
||||
public String getFWDRefBases() {
|
||||
return refBases;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getFWDAlleles() {
|
||||
return observedAlleles;
|
||||
}
|
||||
|
|
@ -500,34 +498,28 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
public double getVariantConfidence() {
|
||||
return variantScore;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return nNonref < 2;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getConsensusConfidence() {
|
||||
return consensusScore;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return eventLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isIndelGenotype() {
|
||||
return refBaseChar == '*';
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean isPointGenotype() {
|
||||
return ! isIndelGenotype();
|
||||
}
|
||||
|
|
@ -537,8 +529,7 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
* point genotype, this method returns null.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Genotype getIndelGenotype() {
|
||||
public SAMPileupRecord getIndelGenotype() {
|
||||
if ( isIndelGenotype() ) return this;
|
||||
else return null;
|
||||
}
|
||||
|
|
@ -548,8 +539,7 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
* indel genotype, this method returns null.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public Genotype getPointGenotype() {
|
||||
public SAMPileupRecord getPointGenotype() {
|
||||
if ( isPointGenotype() ) return this;
|
||||
else return null;
|
||||
}
|
||||
|
|
@ -558,7 +548,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
* indel genotype is what it only has).
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean hasIndelGenotype() {
|
||||
return isIndelGenotype();
|
||||
}
|
||||
|
|
@ -567,13 +556,10 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
* point genotype is what it only has.
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public boolean hasPointGenotype() {
|
||||
return isPointGenotype();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(ReferenceOrderedDatum o) {
|
||||
return getLocation().compareTo(o.getLocation());
|
||||
}
|
||||
|
|
@ -596,7 +582,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
rodName = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return parser.hasNext();
|
||||
}
|
||||
|
|
@ -605,7 +590,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iterator has no more elements.
|
||||
*/
|
||||
@Override
|
||||
public SAMPileupRecord next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("SAMPileupRecord next called on iterator with no more elements");
|
||||
// if ( z == 0 ) t = System.currentTimeMillis();
|
||||
|
|
@ -621,7 +605,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod {
|
||||
public class SimpleIndelROD extends TabularROD implements VariationRod {
|
||||
|
||||
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
|
||||
|
||||
|
|
@ -24,7 +24,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return the reference base or bases, as a string
|
||||
*/
|
||||
@Override
|
||||
public String getReference() {
|
||||
return String.valueOf(getRef());
|
||||
}
|
||||
|
|
@ -53,13 +52,11 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return VariantFrequency with the stored frequency
|
||||
*/
|
||||
@Override
|
||||
public double getNonRefAlleleFrequency() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
/** @return the VARIANT_TYPE of the current variant */
|
||||
@Override
|
||||
public VARIANT_TYPE getType() {
|
||||
return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION;
|
||||
}
|
||||
|
|
@ -85,7 +82,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getAlternativeBaseForSNP() {
|
||||
return getAltSnpFWD();
|
||||
}
|
||||
|
|
@ -95,7 +91,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return a char, representing the alternate base
|
||||
*/
|
||||
@Override
|
||||
public char getReferenceForSNP() {
|
||||
return getRefSnpFWD();
|
||||
}
|
||||
|
|
@ -110,7 +105,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
@Override
|
||||
public double getNegLog10PError() {
|
||||
return getVariationConfidence();
|
||||
}
|
||||
|
|
@ -123,7 +117,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlternateAlleleList() {
|
||||
List<String> ret = getAlleleList();
|
||||
for (String val : ret) {
|
||||
|
|
@ -139,7 +132,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
|
|||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
@Override
|
||||
public List<String> getAlleleList() {
|
||||
return this.getFWDAlleles();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ import net.sf.picard.reference.ReferenceSequenceFileWalker;
|
|||
|
||||
|
||||
|
||||
public class rodSAMPileup extends BasicReferenceOrderedDatum implements GenotypeList {
|
||||
public class rodSAMPileup extends BasicReferenceOrderedDatum {
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
|
|
@ -71,22 +71,18 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
|
|||
return b.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Genotype getIndelGenotype() {
|
||||
public SAMPileupRecord getIndelGenotype() {
|
||||
return indelGenotype;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Genotype getPointGenotype() {
|
||||
public SAMPileupRecord getPointGenotype() {
|
||||
return pointGenotype;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasIndelGenotype() {
|
||||
return indelGenotype != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasPointGenotype() {
|
||||
return pointGenotype != null;
|
||||
}
|
||||
|
|
@ -108,7 +104,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
|
|||
rodName = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return parser.hasNext();
|
||||
}
|
||||
|
|
@ -117,7 +112,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
|
|||
* @return the next element in the iteration.
|
||||
* @throws NoSuchElementException - iterator has no more elements.
|
||||
*/
|
||||
@Override
|
||||
public rodSAMPileup next() {
|
||||
if (!this.hasNext()) throw new NoSuchElementException("rodSAMPileup next called on iterator with no more elements");
|
||||
rodSAMPileup result = new rodSAMPileup(rodName);
|
||||
|
|
@ -170,7 +164,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
|
|||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
public boolean CONTINUE_AFTER_AN_ERROR = false;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
SAMPileupRecord truePileup = getTruePileup( tracker );
|
||||
|
||||
if ( truePileup == null ) {
|
||||
|
|
@ -105,9 +105,9 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
|
|||
return null;
|
||||
|
||||
if( pileup.hasPointGenotype() )
|
||||
return (SAMPileupRecord)pileup.getPointGenotype();
|
||||
return pileup.getPointGenotype();
|
||||
else if( pileup.hasIndelGenotype() )
|
||||
return (SAMPileupRecord)pileup.getIndelGenotype();
|
||||
return pileup.getIndelGenotype();
|
||||
else
|
||||
throw new StingException("Unsupported pileup type: " + pileup);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,216 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.Genotype;
|
||||
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
|
||||
*
|
||||
* @author asivache
|
||||
*
|
||||
*/
|
||||
public class GenotypeUtils {
|
||||
|
||||
public static enum VariantType {
|
||||
POINT, INDEL;
|
||||
// VariantType() {}
|
||||
}
|
||||
|
||||
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
|
||||
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
|
||||
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
|
||||
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call
|
||||
* of the other type was made, or b) rod is a Genotype and its type is difference from variant_type, then it will be assumed that the implicit ref/ref call
|
||||
* of the specified type also exists at this genomic position, and new object representing such default call will be returned. If rod argument
|
||||
* is null, then this method safely (and silently) returns null.
|
||||
*
|
||||
* @param rod GenotypeList or Genotype to extract requested call from/upgrade to requested call
|
||||
* @param variant_type type of the variant to extract (POINT mutations or INDEL)
|
||||
* @param assume_implicit_ref_calls true if presence of only a call of different type means that ref/ref call of the requested type is implicitly present
|
||||
* @return Genotyping call of the requested type or null if no explicit or implicit call was made.
|
||||
*/
|
||||
public static Genotype extractGenotype(ReferenceOrderedDatum rod, VariantType variant_type, boolean assume_implicit_ref_calls) {
|
||||
|
||||
if ( rod == null ) return null;
|
||||
|
||||
if ( rod instanceof GenotypeList ) {
|
||||
|
||||
GenotypeList rod_gl = (GenotypeList)rod;
|
||||
switch ( variant_type ) {
|
||||
case POINT :
|
||||
if ( rod_gl.hasPointGenotype() ) return rod_gl.getPointGenotype();
|
||||
else {
|
||||
if ( assume_implicit_ref_calls && rod_gl.hasIndelGenotype() ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
|
||||
else return null;
|
||||
}
|
||||
case INDEL:
|
||||
if ( rod_gl.hasIndelGenotype() ) return rod_gl.getIndelGenotype();
|
||||
else {
|
||||
if ( assume_implicit_ref_calls && rod_gl.hasPointGenotype() ) return new DefaultIndelGenotype(rod_gl.getLocation());
|
||||
else return null;
|
||||
}
|
||||
default: throw new StingException("Unrecognized variant type: "+variant_type);
|
||||
}
|
||||
|
||||
} else if ( rod instanceof Genotype ) {
|
||||
|
||||
Genotype rod_g = (Genotype)rod;
|
||||
switch ( variant_type ) {
|
||||
case POINT:
|
||||
if ( rod_g.isIndelGenotype() ) {
|
||||
if ( assume_implicit_ref_calls ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
|
||||
else return null;
|
||||
} else return rod_g;
|
||||
case INDEL:
|
||||
if ( rod_g.isPointGenotype() ) {
|
||||
if ( assume_implicit_ref_calls ) return new DefaultIndelGenotype(rod_g.getLocation());
|
||||
else return null;
|
||||
} else return rod_g;
|
||||
default: throw new StingException("Unrecognized variant type: "+variant_type);
|
||||
}
|
||||
|
||||
}
|
||||
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
|
||||
}
|
||||
|
||||
public static boolean isHet(Variation var) {
|
||||
if ( var instanceof Genotype )
|
||||
return ((Genotype)var).isHet();
|
||||
|
||||
String genotype = Utils.join("",var.getAlleleList());
|
||||
if ( genotype.length() < 1 )
|
||||
return false;
|
||||
|
||||
char first = genotype.charAt(0);
|
||||
for (char base : genotype.toCharArray()) {
|
||||
if (base != first) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)
|
||||
* call. All the interface methods are implemented and return consistent values. Use this class when working with
|
||||
* genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed
|
||||
* (SAM pileup is one such example).
|
||||
* @author asivache
|
||||
*
|
||||
*/
|
||||
public static class DefaultIndelGenotype implements Genotype {
|
||||
private GenomeLoc location;
|
||||
private static final List<String> alleles = Arrays.asList("","") ;
|
||||
private int confidence ;
|
||||
|
||||
/** Creates default indel genotype (ref/ref = no indel) at the specified position
|
||||
* with default consensus confidence of 1000 (variant confidence is 0).
|
||||
* @param l reference position to associate the genotyping call with
|
||||
*/
|
||||
public DefaultIndelGenotype(GenomeLoc l) {
|
||||
this(l,1000);
|
||||
}
|
||||
|
||||
/** Creates ref/ref (i.e. absense of indel) indel genotype at the specified position
|
||||
* with the specified consensus confidence (variant confidence is 0).
|
||||
* @param l reference position to associate the genotyping call with
|
||||
*/
|
||||
public DefaultIndelGenotype(GenomeLoc l, int confidence) {
|
||||
location = l;
|
||||
this.confidence = confidence;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getConsensusConfidence() {
|
||||
return confidence;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getFWDAlleles() {
|
||||
return alleles;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFWDRefBases() {
|
||||
return alleles.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return location;
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getRef() {
|
||||
return '*';
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getVariantConfidence() {
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBiallelic() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDeletion() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHet() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isHom() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isIndel() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isIndelGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isInsertion() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPointGenotype() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isSNP() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return (int)(location.getStop() - location.getStart() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ReferenceOrderedDatum o) {
|
||||
return location.compareTo(o.getLocation());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.genotype.BasicVariation;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GenotypeUtilsTest
|
||||
*
|
||||
* a test class for the various function in the genotype utils class
|
||||
*/
|
||||
public class GenotypeUtilsTest extends BaseTest {
|
||||
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* make sure that the variation is a het
|
||||
*/
|
||||
@Test
|
||||
public void isHetTest() {
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,2);
|
||||
BasicVariation var = new BasicVariation("AA", "A", 0, loc,0.0);
|
||||
Assert.assertTrue(!GenotypeUtils.isHet(var));
|
||||
BasicVariation var2 = new BasicVariation("AG", "A", 0, loc,0.0);
|
||||
Assert.assertTrue(GenotypeUtils.isHet(var2));
|
||||
BasicVariation var3 = new BasicVariation("GG", "A", 0, loc,0.0);
|
||||
Assert.assertTrue(!GenotypeUtils.isHet(var3));
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue