1. Removing an old version of the Genotype interface which is no longer being used. Needed to do this now so that the naming conflicts would cease.

2. Adding a preliminary version of the new Genotype/Allele interface (putting it into refdata/ as the VariantContext really only applies to rods) with updates to VariantContext.  This is by no means complete - further updates coming tomorrow.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2533 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-07 05:51:10 +00:00
parent a9245a58e2
commit 6c739e30e0
10 changed files with 465 additions and 611 deletions

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.contexts; package org.broadinstitute.sting.gatk.contexts;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Variation; import org.broadinstitute.sting.gatk.refdata.*;
import java.util.*; import java.util.*;
import org.apache.commons.jexl.*; import org.apache.commons.jexl.*;
@ -11,159 +11,305 @@ import org.apache.commons.jexl.*;
/** /**
* @author ebanks * @author ebanks
* <p/> * <p/>
* Interface VariantContext * Class VariantContext
* <p/> * <p/>
* This class represents a context that unifies one or more variants * This class represents a context that unifies one or more variants
*/ */
public interface VariantContext { public class VariantContext {
private static final String UNIQUIFIED_SUFFIX = ".unique";
private Set<Allele> alleles;
private Set<Genotype> genotypes;
private Allele reference;
private GenomeLoc loc;
private HashMap<Object, Object> attributes;
public VariantContext(VariationRod rod) {
// TODO -- VariationRod should eventually require classes to implement toVariationContext()
// TODO -- (instead of using a temporary adapter class)
loc = rod.getLocation();
reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
// TODO -- populate the alleles and genotypes through an adapter
alleles = new HashSet<Allele>();
genotypes = new HashSet<Genotype>();
attributes = new HashMap<Object, Object>();
}
protected VariantContext(GenomeLoc loc, Allele reference, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
this.loc = loc;
this.reference = reference;
this.alleles = new HashSet<Allele>(alleles);
this.genotypes = new HashSet<Genotype>(genotypes);
this.attributes = new HashMap<Object, Object>(attributes);
}
/**
* @param other another variant context
*
* throws an exception if there is a collision such that the same sample exists in both contexts
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext other) {
return merge(other, false);
}
/**
* @param other another variant context
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
* the samples will be uniquified(based on their sources);
* otherwise, an exception will be thrown
*
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext other, boolean uniquifySamples) {
if ( !loc.equals(other.getLocation()) )
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
Set<String> samples = getSampleNames();
Set<Genotype> Gs = new HashSet<Genotype>(genotypes);
for ( Genotype g : other.getGenotypes() ) {
if ( samples.contains(g.getSample()) ) {
if ( uniquifySamples )
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
else
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
}
Gs.add(g);
}
HashMap<Object, Object> attrs = new HashMap<Object, Object>(attributes);
attrs.putAll(other.getAttributes());
return createNewContext(Gs, attrs);
}
/** /**
* @return the location of this context * @return the location of this context
*/ */
public GenomeLoc getLocation(); public GenomeLoc getLocation() { return loc; }
/** /**
* @return the reference base or bases for this context, as a string * @return the reference allele for this context
*/ */
public String getReference(); public Allele getReference() { return reference; }
/** /**
* @return true if the context is variant (i.e. contains a non-reference allele) * @return true if the context is variant (i.e. contains a non-reference allele)
*/ */
public boolean isVariant(); public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() )
return true;
}
return false;
}
/** /**
* @return true if the context is strictly bi-allelic * @return true if the context is strictly bi-allelic
*/ */
public boolean isBiallelic(); public boolean isBiallelic() {
return getAlternateAlleles().size() == 1;
}
/** /**
* @return true if the context represents point mutations only (i.e. no indels or structural variants) * @return true if the context represents point alleles only (i.e. no indels or structural variants)
*/ */
public boolean isPointMutation(); public boolean isPointAllele() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() && !allele.isSNP() )
return false;
}
return true;
}
/** /**
* @param sample the sample name * @return the set of all sample names in this context
*
* @return true if the context contains data for the given sample
*/ */
public boolean hasSample(String sample); public Set<String> getSampleNames() {
Set<String> samples = new TreeSet<String>();
/** for ( Genotype g : genotypes )
* @param sample the sample name samples.add(g.getSample());
* return samples;
* @return true if the context contains data for only a single instance of the given sample }
*/
public boolean hasSingleInstanceOfSample(String sample);
/**
* @return the number of samples in this context.
* If a given sample has more than one instance in this context, each one is counted separately
*/
public int size();
/**
* @return the number of unique samples in this context
*/
public int numSamples();
/**
* @return the set of all unique sample names in this context
*/
public Set<String> getSampleNames();
/** /**
* @return true if the context represents variants with associated genotypes * @return true if the context represents variants with associated genotypes
*/ */
public boolean hasGenotypes(); public boolean hasGenotypes() { return genotypes.size() > 0; }
/** /**
* @return set of all Genotypes associated with this context * @return set of all Genotypes associated with this context
*/ */
public Set<Genotype> getGenotypes(); public Set<Genotype> getGenotypes() { return genotypes; }
/**
* @param sample the sample name
*
* @return set of all Genotypes associated with the given sample in this context
*/
public Set<Genotype> getGenotypes(String sample);
/** /**
* @param sample the sample name * @param sample the sample name
* *
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context * @return the Genotype associated with the given sample in this context or null if the sample is not in this context
* throws an exception if multiple instances of the sample exist in the context
*/ */
public Genotype getGenotype(String sample); public Genotype getGenotype(String sample) {
for ( Genotype g : genotypes ) {
/** if ( g.getSample().equals(sample) )
* @param sample the sample name return g;
* }
* @return the Variation associated with the given sample in this context or null if the sample is not in this context return null;
*/ }
public Variation toVariation(String sample);
/** /**
* @return set of all subclasses within this context * @return set of all subclasses within this context
*/ */
public Set<String> getSubclasses(); public Set<Object> getSubclasses() {
Set<Object> subclasses = new HashSet<Object>();
for ( Genotype g : genotypes )
subclasses.addAll(g.getAttributes().keySet());
return subclasses;
}
/** /**
* @param subclass the name of a subclass of variants to select * @param subclass the name of a subclass of variants to select
* *
* @return a subset of this context which selects based on the given subclass * @return a subset of this context which selects based on the given subclass
*/ */
public VariantContext select(String subclass); public VariantContext select(String subclass) {
HashSet<Genotype> Gs = new HashSet<Genotype>();
for ( Genotype g : genotypes ) {
if ( g.getAttribute(subclass) != null )
Gs.add(g);
}
return createNewContext(Gs, attributes);
}
/** /**
* @param expr a jexl expression describing how to filter this context * @param expr a jexl expression describing how to filter this context
* *
* @return a subset of this context which is filtered based on the given expression * @return a subset of this context which is filtered based on the given expression
*/ */
public VariantContext filter(String expr); public VariantContext filter(String expr) {
HashSet<Genotype> Gs = new HashSet<Genotype>();
try {
Expression filterExpression = ExpressionFactory.createExpression(expr);
for ( Genotype g : genotypes ) {
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(g.getAttributes());
if ( (Boolean)filterExpression.evaluate(jContext) )
Gs.add(g);
}
} catch (Exception e) {
throw new StingException("JEXL error in VariantContext: " + e.getMessage());
}
return createNewContext(Gs, attributes);
}
/** /**
* @return a set of variant contexts where each one represents (all instances of) a single sample from this context * @return a set of new variant contexts, one for each sample from this context
*/ */
public Set<VariantContext> splitBySample(); public Set<VariantContext> splitBySample() {
Set<VariantContext> contexts = new HashSet<VariantContext>();
for ( Genotype g : genotypes ) {
HashSet<Genotype> gAsSet = new HashSet<Genotype>();
gAsSet.add(g);
contexts.add(createNewContext(gAsSet, attributes));
}
return contexts;
}
/** /**
* get the alternate allele frequency of this variant context, if the context is variable. * @param Gs the set of genotypes from which to create a new context
* If this context is not variable or a frequency cannot be provided, this method should return 0. * @param attrs the attributes for the new context
* *
* WARNING: This method is valid only for bi-allelic contexts; the contract is to check isBiallelic() * @return a new context based on the given genotypes
* before calling this method
*
* @return double the minor allele frequency
*/ */
public double getNonRefAlleleFrequency(); private VariantContext createNewContext(Set<Genotype> Gs, HashMap<Object, Object> attrs) {
HashSet<Allele> As = new HashSet<Allele>();
for ( Genotype g : Gs )
As.addAll(g.getAlleles());
return new VariantContext(loc, reference, As, Gs, attrs);
}
/** /**
* get the -1 * (log 10 of the error value) * @param allele the allele to be queried
* *
* @return the postive number space log based error estimate * @return the frequency of the given allele in this context
*/ */
public double getNegLog10PError(); public double getAlleleFrequency(Allele allele) {
int alleleCount = 0;
int totalCount = 0;
for ( Genotype g : genotypes ) {
for ( Allele a : g.getAlleles() ) {
totalCount++;
if ( allele.equals(a) )
alleleCount++;
}
}
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
}
/** /**
* gets the alleles. This method should return all of the alleles present at the location, * Gets the alleles. This method should return all of the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed * including the reference allele. There are no constraints imposed on the ordering of alleles
* by an unordered list of alternate alleles. If the reference base is not an allele in this varation * in the set. If the reference is not an allele in this context it will not be included.
* it will not be in the list (i.e. there is no guarantee that the reference base is in the list).
* *
* @return an alternate allele list * @return the set of alleles
*/ */
public List<String> getAlleles(); public Set<Allele> getAlleles() { return alleles; }
/** /**
* gets the alternate alleles. This method should return all the alleles present at the location, * Gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering * NOT including the reference allele. There are no constraints imposed on the ordering of alleles
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest * in the set.
* frequency).
* *
* @return an alternate allele list * @return the set of alternate alleles
*/ */
public List<String> getAlternateAlleleList(); public Set<Allele> getAlternateAlleles() {
HashSet<Allele> altAlleles = new HashSet<Allele>();
for ( Allele allele : alleles ) {
if ( !allele.equals(reference) )
altAlleles.add(allele);
}
return altAlleles;
}
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void setAttribute(Object key, Object value) {
attributes.put(key, value);
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
} }

View File

@ -0,0 +1,79 @@
package org.broadinstitute.sting.gatk.refdata;
/**
* @author ebanks
* <p/>
* Class Allele
* <p/>
* This class emcompasses all the basic information about an allele
*/
public class Allele {
private AlleleType type;
private String bases;
// the types of variants we currently allow
public enum AlleleType {
REFERENCE, SNP, INSERTION, DELETION, INVERSION
}
public Allele(AlleleType type, String bases) {
this.type = type;
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
if ( type == AlleleType.DELETION && bases.length() > 0 )
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
this.bases = bases.toUpperCase();
}
/**
* convenience method for switching over the allele type
*
* @return the AlleleType of this allele
**/
public AlleleType getType() { return type; }
/**
* convenience method for SNPs
*
* @return true if this is a SNP, false otherwise
*/
public boolean isSNP() { return type == AlleleType.SNP; }
/**
* convenience method for variants
*
* @return true if this is a variant allele, false if it's reference
*/
public boolean isVariant() { return type != AlleleType.REFERENCE; }
/**
* convenience method for indels
*
* @return true if this is an indel, false otherwise
*/
public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
/**
* For deletions, this method returns an empty String.
* For everything else, observed bases for the allele are returned.
*
* @return the bases, as a string
*/
public String getBases() { return bases; }
/**
* @param other the other allele
*
* @return true if these alleles are equal
*/
public boolean equals(Allele other) {
return type == other.getType() && bases.equals(other.getBases());
}
}

View File

@ -1,172 +1,147 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc; import java.util.*;
import java.util.List;
/** /**
* Created by IntelliJ IDEA. * @author ebanks
* User: asivache * <p/>
* Date: Apr 1, 2009 * Class Genotype
* Time: 11:02:03 AM * <p/>
* To change this template use File | Settings | File Templates. * This class emcompasses all the basic information about a genotype
*/ */
@Deprecated public class Genotype {
public interface Genotype extends Comparable<ReferenceOrderedDatum> {
// ---------------------------------------------------------------------- private List<Allele> alleles;
//
// manipulating the SNP information private double negLog10PError;
//
// ---------------------------------------------------------------------- private String sample;
/** Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location private HashMap<Object, Object> attributes;
public Genotype(List<Allele> alleles, String sample, double negLog10PError) {
this.alleles = new ArrayList<Allele>(alleles);
this.sample = sample;
this.negLog10PError = negLog10PError;
attributes = new HashMap<Object, Object>();
}
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() { return alleles; }
/**
* @return the ploidy of this genotype
*/
public int getPloidy() { return alleles.size(); }
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
*/
public boolean isHom() {
// do not assume ploidy == 2
if ( alleles.size() == 0 )
return true;
Allele first = alleles.get(0);
for ( int i = 1; i < alleles.size(); i++ ) {
if ( !first.equals(alleles.get(i)) )
return false;
}
return true;
}
/**
* @return true if we're het (observed alleles differ)
*/
public boolean isHet() { return !isHom(); }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
*/
public boolean isNoCall() {
// TODO -- implement me
return false;
}
/**
* @return true if all alleles for this genotype are SNPs or reference
*/
public boolean isPointGenotype() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() && !allele.isSNP() )
return false;
}
return true;
}
/**
* @return true if this is a variant genotype, false if it's reference
*/
public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() )
return true;
}
return false;
}
/**
* @return the -1 * log10-based error estimate
*/
public double getNegLog10PError() { return negLog10PError; }
/**
* @return the sample name
*/
public String getSample() { return sample; }
/**
* Sets the sample name
* *
* @return position on the genome wrapped in GenomeLoc object * @param sample the sample name
*/ */
GenomeLoc getLocation(); public void setSample(String sample) {
this.sample = sample;
}
/** Returns actual observed alleles on the fwd strand. Allowed to return more than two alleles (@see #getPloidy()). /**
* For point genotypes (those that report themselves as isPointGenotype()), the alleles are observed bases. For indels * @param ref the reference base
* (i.e. if isIndelGenotype() is true), a no-event allele is represented by an empty string,
* while the event allele is represented by '+' (insertion) or '-' (deletion) followed by actual inserted or deleted bases.
* @return list of alleles
*/
List<String> getFWDAlleles() ;
/** Returns bases on the fwd strand in the true reference allele (regardless of whether the reference allele
* was observed at this site in this particular genotype!)
* as a String. String can be empty (if alt allele is insertion into
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (if alt is a longer deletion). For the actually observed alleles, see #getFWDAlleles()
* *
* @return reference allele, forward strand * @return this genotype as a variation
*/ */
String getFWDRefBases(); // TODO -- implement me
// public Variation toVariation(char ref);
/** Returns reference base (regardless of whether the reference allele was actually observed. This method /**
* is similar to getFWDRefBases() except it returns a single char. For non-point variants (i.e. indels), behavior * Sets the given attribute
* of this method is not specified and can be defined by implementing classes.
* @return
*/
char getRef() ;
/** Returns true if the object represents a point genotype (e.g. reference base vs. a SNP),
* regardless of what the actual observation is (see isReference() and isSNP()).
* @return
*/
boolean isPointGenotype() ;
/** Returns true if the object represents an extended (indel) genotype (e.g. insertion/deletion event vs. no event),
* regardless of what the actual observation is (see isReference(), isIndel(), isInsertion(), isDeletion()).
* @return
*/
boolean isIndelGenotype() ;
/** Is this variant a SNP? Returns true only if this is point genotype AND a non-reference base was observed, false otherwise.
* *
* @return true or false * @param key the attribute key
* @param value the attribute value
*/ */
boolean isSNP(); public void setAttribute(Object key, Object value) {
attributes.put(key, value);
}
/** Returns true if all observed alleles are reference ones (only reference base was observed if this is a point genotype, /**
* or no indel events were observed if this is an indel genotype). All is"Variant" methods (where Variant=SNP,Insertion, etc) should * @param key the attribute key
* return false at a site where isReference() is true to ensure consistency.
* @return
*/
boolean isReference();
/** Is this variant an insertion? Returns true if this is indel genotype AND only insertion event(s) was observed,
* false otherwise. The contract requires isIndel() to return true
* whenever isInsertion() method returns true.
* *
* @return true or false * @return the attribute value for the given key (or null if not set)
*/ */
boolean isInsertion(); public Object getAttribute(Object key) {
return attributes.get(key);
}
/**
/** Is this variant a deletion? Returns true if this is indel genotype AND only deletion event(s) was observed, * @return the attribute map
* false otherwise. The contract requires isIndel() to return true
* whenever isDeletion() method returns true.
*
* @return true or false
*/ */
boolean isDeletion(); public Map<Object, Object> getAttributes() {
return attributes;
}
/** Is this variant an insertion or a deletion? The contract requires
* this to be true if either isInsertion() or isDeletion() returns true. However,
* this method is also allowed to return true even if neither isInsertion()
* nor isDeletion() do (e.g. a het genotype with one insertion and one deletion). Always returns
* false if the object does not represent indel genotype.
* @return
*/
boolean isIndel();
/** Returns phred-scaled confidence for the variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
*
* @return
*/
double getVariantConfidence();
/** Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
* best vs next-best.
* @return
*/
double getConsensusConfidence();
/** Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
* @return
*/
boolean isBiallelic() ;
/** Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return
*/
boolean isHom() ;
/** Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return
*/
boolean isHet() ;
/** Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return reference base on the forward strand
*/
//char getRefSnpFWD() throws IllegalStateException;
/** Returns bases in the alternative allele as a String. String can be empty (as in deletion from
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
* (for longer indels).
*
* @return alternative allele, forward strand
*/
//String getAltBasesFWD();
/** Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return alternative allele base on the forward starnd
*/
// char getAltSnpFWD() throws IllegalStateException;
int length();
/** Return actual number of observed alleles (chromosomes) in the genotype.
* @return
*/
// int getPloidy() throws IllegalStateException;
} }

View File

@ -1,46 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
/** A simple wrapper interface that allows for keeping any of the "main" (point mutation) or "associated" (indel)
* genotypes, or both, at any given reference position. This is the abstraction of "complete" genotyping information:
* one might have or might have not genotyped independently a base at the given position (point) or an indel right after
* that position. The query and getter methods allow clients to figure out what pieces of the genotyping information
* are actually available and to retrieve them. Behavior of the getters when corresponding queries return false is
* not bound by a contract but it is suggested that they return null or a "trivial" (reference) genotype.
*
* @author asivache
*
*/
public interface GenotypeList {
/** Returns true if point mutation genotyping information is available at the
* current position.
* @return
*/
boolean hasPointGenotype();
/** Returns true if indel genotyping information is available at the
* current position.
* @return
*/
boolean hasIndelGenotype();
/** Returns point mutation genotyping information if available;
* behavior is not specified in the case when there is no such information.
* @return
*/
Genotype getPointGenotype();
/** Returns indel genotyping information if available;
* behavior is not specified in the case when there is no such information.
* @return
*/
Genotype getIndelGenotype();
/** Returns position on the reference the stored genotyping data are associated with.
*
* @return
*/
GenomeLoc getLocation();
}

View File

@ -29,7 +29,7 @@ import net.sf.samtools.util.StringUtil;
*/ */
public class SAMPileupRecord implements Genotype, GenotypeList { public class SAMPileupRecord {
private static final int NO_VARIANT = -1; private static final int NO_VARIANT = -1;
private static final int SNP_VARIANT = 0; private static final int SNP_VARIANT = 0;
private static final int INSERTION_VARIANT = 1; private static final int INSERTION_VARIANT = 1;
@ -428,13 +428,11 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* *
* @return reference allele, forward strand * @return reference allele, forward strand
*/ */
@Override
public String getFWDRefBases() { public String getFWDRefBases() {
return refBases; return refBases;
} }
@Override
public List<String> getFWDAlleles() { public List<String> getFWDAlleles() {
return observedAlleles; return observedAlleles;
} }
@ -500,34 +498,28 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
} }
@Override
public double getVariantConfidence() { public double getVariantConfidence() {
return variantScore; return variantScore;
} }
@Override
public boolean isBiallelic() { public boolean isBiallelic() {
return nNonref < 2; return nNonref < 2;
} }
@Override
public double getConsensusConfidence() { public double getConsensusConfidence() {
return consensusScore; return consensusScore;
} }
@Override
public int length() { public int length() {
return eventLength; return eventLength;
} }
@Override
public boolean isIndelGenotype() { public boolean isIndelGenotype() {
return refBaseChar == '*'; return refBaseChar == '*';
} }
@Override
public boolean isPointGenotype() { public boolean isPointGenotype() {
return ! isIndelGenotype(); return ! isIndelGenotype();
} }
@ -537,8 +529,7 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* point genotype, this method returns null. * point genotype, this method returns null.
* @return * @return
*/ */
@Override public SAMPileupRecord getIndelGenotype() {
public Genotype getIndelGenotype() {
if ( isIndelGenotype() ) return this; if ( isIndelGenotype() ) return this;
else return null; else return null;
} }
@ -548,8 +539,7 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* indel genotype, this method returns null. * indel genotype, this method returns null.
* @return * @return
*/ */
@Override public SAMPileupRecord getPointGenotype() {
public Genotype getPointGenotype() {
if ( isPointGenotype() ) return this; if ( isPointGenotype() ) return this;
else return null; else return null;
} }
@ -558,7 +548,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* indel genotype is what it only has). * indel genotype is what it only has).
* @return * @return
*/ */
@Override
public boolean hasIndelGenotype() { public boolean hasIndelGenotype() {
return isIndelGenotype(); return isIndelGenotype();
} }
@ -567,13 +556,10 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* point genotype is what it only has. * point genotype is what it only has.
* @return * @return
*/ */
@Override
public boolean hasPointGenotype() { public boolean hasPointGenotype() {
return isPointGenotype(); return isPointGenotype();
} }
@Override
public int compareTo(ReferenceOrderedDatum o) { public int compareTo(ReferenceOrderedDatum o) {
return getLocation().compareTo(o.getLocation()); return getLocation().compareTo(o.getLocation());
} }
@ -596,7 +582,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
rodName = name; rodName = name;
} }
@Override
public boolean hasNext() { public boolean hasNext() {
return parser.hasNext(); return parser.hasNext();
} }
@ -605,7 +590,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* @return the next element in the iteration. * @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements. * @throws NoSuchElementException - iterator has no more elements.
*/ */
@Override
public SAMPileupRecord next() { public SAMPileupRecord next() {
if (!this.hasNext()) throw new NoSuchElementException("SAMPileupRecord next called on iterator with no more elements"); if (!this.hasNext()) throw new NoSuchElementException("SAMPileupRecord next called on iterator with no more elements");
// if ( z == 0 ) t = System.currentTimeMillis(); // if ( z == 0 ) t = System.currentTimeMillis();
@ -621,7 +605,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
} }
@Override
public void remove() { public void remove() {
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups"); throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
} }

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod { public class SimpleIndelROD extends TabularROD implements VariationRod {
private boolean KGENOMES_FORMAT = false, checkedFormat = false; private boolean KGENOMES_FORMAT = false, checkedFormat = false;
@ -24,7 +24,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return the reference base or bases, as a string * @return the reference base or bases, as a string
*/ */
@Override
public String getReference() { public String getReference() {
return String.valueOf(getRef()); return String.valueOf(getRef());
} }
@ -53,13 +52,11 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return VariantFrequency with the stored frequency * @return VariantFrequency with the stored frequency
*/ */
@Override
public double getNonRefAlleleFrequency() { public double getNonRefAlleleFrequency() {
return 0.0; return 0.0;
} }
/** @return the VARIANT_TYPE of the current variant */ /** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() { public VARIANT_TYPE getType() {
return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION; return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION;
} }
@ -85,7 +82,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return a char, representing the alternate base * @return a char, representing the alternate base
*/ */
@Override
public char getAlternativeBaseForSNP() { public char getAlternativeBaseForSNP() {
return getAltSnpFWD(); return getAltSnpFWD();
} }
@ -95,7 +91,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return a char, representing the alternate base * @return a char, representing the alternate base
*/ */
@Override
public char getReferenceForSNP() { public char getReferenceForSNP() {
return getRefSnpFWD(); return getRefSnpFWD();
} }
@ -110,7 +105,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return the log based error estimate * @return the log based error estimate
*/ */
@Override
public double getNegLog10PError() { public double getNegLog10PError() {
return getVariationConfidence(); return getVariationConfidence();
} }
@ -123,7 +117,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return an alternate allele list * @return an alternate allele list
*/ */
@Override
public List<String> getAlternateAlleleList() { public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList(); List<String> ret = getAlleleList();
for (String val : ret) { for (String val : ret) {
@ -139,7 +132,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
* *
* @return an alternate allele list * @return an alternate allele list
*/ */
@Override
public List<String> getAlleleList() { public List<String> getAlleleList() {
return this.getFWDAlleles(); return this.getFWDAlleles();
} }

View File

@ -30,7 +30,7 @@ import net.sf.picard.reference.ReferenceSequenceFileWalker;
public class rodSAMPileup extends BasicReferenceOrderedDatum implements GenotypeList { public class rodSAMPileup extends BasicReferenceOrderedDatum {
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
// //
// Constructors // Constructors
@ -71,22 +71,18 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
return b.toString(); return b.toString();
} }
@Override public SAMPileupRecord getIndelGenotype() {
public Genotype getIndelGenotype() {
return indelGenotype; return indelGenotype;
} }
@Override public SAMPileupRecord getPointGenotype() {
public Genotype getPointGenotype() {
return pointGenotype; return pointGenotype;
} }
@Override
public boolean hasIndelGenotype() { public boolean hasIndelGenotype() {
return indelGenotype != null; return indelGenotype != null;
} }
@Override
public boolean hasPointGenotype() { public boolean hasPointGenotype() {
return pointGenotype != null; return pointGenotype != null;
} }
@ -108,7 +104,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
rodName = name; rodName = name;
} }
@Override
public boolean hasNext() { public boolean hasNext() {
return parser.hasNext(); return parser.hasNext();
} }
@ -117,7 +112,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
* @return the next element in the iteration. * @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements. * @throws NoSuchElementException - iterator has no more elements.
*/ */
@Override
public rodSAMPileup next() { public rodSAMPileup next() {
if (!this.hasNext()) throw new NoSuchElementException("rodSAMPileup next called on iterator with no more elements"); if (!this.hasNext()) throw new NoSuchElementException("rodSAMPileup next called on iterator with no more elements");
rodSAMPileup result = new rodSAMPileup(rodName); rodSAMPileup result = new rodSAMPileup(rodName);
@ -170,7 +164,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
} }
@Override
public void remove() { public void remove() {
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups"); throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
} }

View File

@ -24,7 +24,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
public boolean CONTINUE_AFTER_AN_ERROR = false; public boolean CONTINUE_AFTER_AN_ERROR = false;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = context.getPileup(); ReadBackedPileup pileup = context.getBasePileup();
SAMPileupRecord truePileup = getTruePileup( tracker ); SAMPileupRecord truePileup = getTruePileup( tracker );
if ( truePileup == null ) { if ( truePileup == null ) {
@ -105,9 +105,9 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
return null; return null;
if( pileup.hasPointGenotype() ) if( pileup.hasPointGenotype() )
return (SAMPileupRecord)pileup.getPointGenotype(); return pileup.getPointGenotype();
else if( pileup.hasIndelGenotype() ) else if( pileup.hasIndelGenotype() )
return (SAMPileupRecord)pileup.getIndelGenotype(); return pileup.getIndelGenotype();
else else
throw new StingException("Unsupported pileup type: " + pileup); throw new StingException("Unsupported pileup type: " + pileup);
} }

View File

@ -1,216 +0,0 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.Arrays;
import java.util.List;
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
*
* @author asivache
*
*/
public class GenotypeUtils {
public static enum VariantType {
POINT, INDEL;
// VariantType() {}
}
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call
* of the other type was made, or b) rod is a Genotype and its type is difference from variant_type, then it will be assumed that the implicit ref/ref call
* of the specified type also exists at this genomic position, and new object representing such default call will be returned. If rod argument
* is null, then this method safely (and silently) returns null.
*
* @param rod GenotypeList or Genotype to extract requested call from/upgrade to requested call
* @param variant_type type of the variant to extract (POINT mutations or INDEL)
* @param assume_implicit_ref_calls true if presence of only a call of different type means that ref/ref call of the requested type is implicitly present
* @return Genotyping call of the requested type or null if no explicit or implicit call was made.
*/
public static Genotype extractGenotype(ReferenceOrderedDatum rod, VariantType variant_type, boolean assume_implicit_ref_calls) {
if ( rod == null ) return null;
if ( rod instanceof GenotypeList ) {
GenotypeList rod_gl = (GenotypeList)rod;
switch ( variant_type ) {
case POINT :
if ( rod_gl.hasPointGenotype() ) return rod_gl.getPointGenotype();
else {
if ( assume_implicit_ref_calls && rod_gl.hasIndelGenotype() ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
else return null;
}
case INDEL:
if ( rod_gl.hasIndelGenotype() ) return rod_gl.getIndelGenotype();
else {
if ( assume_implicit_ref_calls && rod_gl.hasPointGenotype() ) return new DefaultIndelGenotype(rod_gl.getLocation());
else return null;
}
default: throw new StingException("Unrecognized variant type: "+variant_type);
}
} else if ( rod instanceof Genotype ) {
Genotype rod_g = (Genotype)rod;
switch ( variant_type ) {
case POINT:
if ( rod_g.isIndelGenotype() ) {
if ( assume_implicit_ref_calls ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
else return null;
} else return rod_g;
case INDEL:
if ( rod_g.isPointGenotype() ) {
if ( assume_implicit_ref_calls ) return new DefaultIndelGenotype(rod_g.getLocation());
else return null;
} else return rod_g;
default: throw new StingException("Unrecognized variant type: "+variant_type);
}
}
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
}
public static boolean isHet(Variation var) {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
String genotype = Utils.join("",var.getAlleleList());
if ( genotype.length() < 1 )
return false;
char first = genotype.charAt(0);
for (char base : genotype.toCharArray()) {
if (base != first) return true;
}
return false;
}
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)
* call. All the interface methods are implemented and return consistent values. Use this class when working with
* genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed
* (SAM pileup is one such example).
* @author asivache
*
*/
public static class DefaultIndelGenotype implements Genotype {
private GenomeLoc location;
private static final List<String> alleles = Arrays.asList("","") ;
private int confidence ;
/** Creates default indel genotype (ref/ref = no indel) at the specified position
* with default consensus confidence of 1000 (variant confidence is 0).
* @param l reference position to associate the genotyping call with
*/
public DefaultIndelGenotype(GenomeLoc l) {
this(l,1000);
}
/** Creates ref/ref (i.e. absense of indel) indel genotype at the specified position
* with the specified consensus confidence (variant confidence is 0).
* @param l reference position to associate the genotyping call with
*/
public DefaultIndelGenotype(GenomeLoc l, int confidence) {
location = l;
this.confidence = confidence;
}
@Override
public double getConsensusConfidence() {
return confidence;
}
@Override
public List<String> getFWDAlleles() {
return alleles;
}
@Override
public String getFWDRefBases() {
return alleles.get(0);
}
@Override
public GenomeLoc getLocation() {
return location;
}
@Override
public char getRef() {
return '*';
}
@Override
public double getVariantConfidence() {
return 0.0;
}
@Override
public boolean isBiallelic() {
return false;
}
@Override
public boolean isDeletion() {
return false;
}
@Override
public boolean isHet() {
return false;
}
@Override
public boolean isHom() {
return true;
}
@Override
public boolean isIndel() {
return false;
}
@Override
public boolean isIndelGenotype() {
return true;
}
@Override
public boolean isInsertion() {
return false;
}
@Override
public boolean isPointGenotype() {
return false;
}
@Override
public boolean isReference() {
return true;
}
@Override
public boolean isSNP() {
return false;
}
@Override
public int length() {
return (int)(location.getStop() - location.getStart() + 1);
}
@Override
public int compareTo(ReferenceOrderedDatum o) {
return location.compareTo(o.getLocation());
}
}
}

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.genotype.BasicVariation;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
/**
*
* @author aaron
*
* Class GenotypeUtilsTest
*
* a test class for the various function in the genotype utils class
*/
public class GenotypeUtilsTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
/**
* make sure that the variation is a het
*/
@Test
public void isHetTest() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,2);
BasicVariation var = new BasicVariation("AA", "A", 0, loc,0.0);
Assert.assertTrue(!GenotypeUtils.isHet(var));
BasicVariation var2 = new BasicVariation("AG", "A", 0, loc,0.0);
Assert.assertTrue(GenotypeUtils.isHet(var2));
BasicVariation var3 = new BasicVariation("GG", "A", 0, loc,0.0);
Assert.assertTrue(!GenotypeUtils.isHet(var3));
}
}