1. Removing an old version of the Genotype interface which is no longer being used. Needed to do this now so that the naming conflicts would cease.

2. Adding a preliminary version of the new Genotype/Allele interface (putting it into refdata/ as the VariantContext really only applies to rods) with updates to VariantContext.  This is by no means complete - further updates coming tomorrow.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2533 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-07 05:51:10 +00:00
parent a9245a58e2
commit 6c739e30e0
10 changed files with 465 additions and 611 deletions

View File

@ -1,8 +1,8 @@
package org.broadinstitute.sting.gatk.contexts;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.refdata.*;
import java.util.*;
import org.apache.commons.jexl.*;
@ -11,159 +11,305 @@ import org.apache.commons.jexl.*;
/**
* @author ebanks
* <p/>
* Interface VariantContext
* Class VariantContext
* <p/>
* This class represents a context that unifies one or more variants
*/
public interface VariantContext {
public class VariantContext {
private static final String UNIQUIFIED_SUFFIX = ".unique";
private Set<Allele> alleles;
private Set<Genotype> genotypes;
private Allele reference;
private GenomeLoc loc;
private HashMap<Object, Object> attributes;
public VariantContext(VariationRod rod) {
// TODO -- VariationRod should eventually require classes to implement toVariationContext()
// TODO -- (instead of using a temporary adapter class)
loc = rod.getLocation();
reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
// TODO -- populate the alleles and genotypes through an adapter
alleles = new HashSet<Allele>();
genotypes = new HashSet<Genotype>();
attributes = new HashMap<Object, Object>();
}
protected VariantContext(GenomeLoc loc, Allele reference, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
this.loc = loc;
this.reference = reference;
this.alleles = new HashSet<Allele>(alleles);
this.genotypes = new HashSet<Genotype>(genotypes);
this.attributes = new HashMap<Object, Object>(attributes);
}
/**
* @param other another variant context
*
* throws an exception if there is a collision such that the same sample exists in both contexts
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext other) {
return merge(other, false);
}
/**
* @param other another variant context
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
* the samples will be uniquified(based on their sources);
* otherwise, an exception will be thrown
*
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext other, boolean uniquifySamples) {
if ( !loc.equals(other.getLocation()) )
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
Set<String> samples = getSampleNames();
Set<Genotype> Gs = new HashSet<Genotype>(genotypes);
for ( Genotype g : other.getGenotypes() ) {
if ( samples.contains(g.getSample()) ) {
if ( uniquifySamples )
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
else
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
}
Gs.add(g);
}
HashMap<Object, Object> attrs = new HashMap<Object, Object>(attributes);
attrs.putAll(other.getAttributes());
return createNewContext(Gs, attrs);
}
/**
* @return the location of this context
*/
public GenomeLoc getLocation();
public GenomeLoc getLocation() { return loc; }
/**
* @return the reference base or bases for this context, as a string
* @return the reference allele for this context
*/
public String getReference();
public Allele getReference() { return reference; }
/**
* @return true if the context is variant (i.e. contains a non-reference allele)
*/
public boolean isVariant();
public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() )
return true;
}
return false;
}
/**
* @return true if the context is strictly bi-allelic
*/
public boolean isBiallelic();
public boolean isBiallelic() {
return getAlternateAlleles().size() == 1;
}
/**
* @return true if the context represents point mutations only (i.e. no indels or structural variants)
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
*/
public boolean isPointMutation();
public boolean isPointAllele() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() && !allele.isSNP() )
return false;
}
return true;
}
/**
* @param sample the sample name
*
* @return true if the context contains data for the given sample
* @return the set of all sample names in this context
*/
public boolean hasSample(String sample);
/**
* @param sample the sample name
*
* @return true if the context contains data for only a single instance of the given sample
*/
public boolean hasSingleInstanceOfSample(String sample);
/**
* @return the number of samples in this context.
* If a given sample has more than one instance in this context, each one is counted separately
*/
public int size();
/**
* @return the number of unique samples in this context
*/
public int numSamples();
/**
* @return the set of all unique sample names in this context
*/
public Set<String> getSampleNames();
public Set<String> getSampleNames() {
Set<String> samples = new TreeSet<String>();
for ( Genotype g : genotypes )
samples.add(g.getSample());
return samples;
}
/**
* @return true if the context represents variants with associated genotypes
*/
public boolean hasGenotypes();
public boolean hasGenotypes() { return genotypes.size() > 0; }
/**
* @return set of all Genotypes associated with this context
*/
public Set<Genotype> getGenotypes();
/**
* @param sample the sample name
*
* @return set of all Genotypes associated with the given sample in this context
*/
public Set<Genotype> getGenotypes(String sample);
public Set<Genotype> getGenotypes() { return genotypes; }
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
* throws an exception if multiple instances of the sample exist in the context
*/
public Genotype getGenotype(String sample);
/**
* @param sample the sample name
*
* @return the Variation associated with the given sample in this context or null if the sample is not in this context
*/
public Variation toVariation(String sample);
public Genotype getGenotype(String sample) {
for ( Genotype g : genotypes ) {
if ( g.getSample().equals(sample) )
return g;
}
return null;
}
/**
* @return set of all subclasses within this context
*/
public Set<String> getSubclasses();
public Set<Object> getSubclasses() {
Set<Object> subclasses = new HashSet<Object>();
for ( Genotype g : genotypes )
subclasses.addAll(g.getAttributes().keySet());
return subclasses;
}
/**
* @param subclass the name of a subclass of variants to select
*
* @return a subset of this context which selects based on the given subclass
*/
public VariantContext select(String subclass);
public VariantContext select(String subclass) {
HashSet<Genotype> Gs = new HashSet<Genotype>();
for ( Genotype g : genotypes ) {
if ( g.getAttribute(subclass) != null )
Gs.add(g);
}
return createNewContext(Gs, attributes);
}
/**
* @param expr a jexl expression describing how to filter this context
*
* @return a subset of this context which is filtered based on the given expression
*/
public VariantContext filter(String expr);
public VariantContext filter(String expr) {
HashSet<Genotype> Gs = new HashSet<Genotype>();
try {
Expression filterExpression = ExpressionFactory.createExpression(expr);
for ( Genotype g : genotypes ) {
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(g.getAttributes());
if ( (Boolean)filterExpression.evaluate(jContext) )
Gs.add(g);
}
} catch (Exception e) {
throw new StingException("JEXL error in VariantContext: " + e.getMessage());
}
return createNewContext(Gs, attributes);
}
/**
* @return a set of variant contexts where each one represents (all instances of) a single sample from this context
* @return a set of new variant contexts, one for each sample from this context
*/
public Set<VariantContext> splitBySample();
public Set<VariantContext> splitBySample() {
Set<VariantContext> contexts = new HashSet<VariantContext>();
for ( Genotype g : genotypes ) {
HashSet<Genotype> gAsSet = new HashSet<Genotype>();
gAsSet.add(g);
contexts.add(createNewContext(gAsSet, attributes));
}
return contexts;
}
/**
* get the alternate allele frequency of this variant context, if the context is variable.
* If this context is not variable or a frequency cannot be provided, this method should return 0.
* @param Gs the set of genotypes from which to create a new context
* @param attrs the attributes for the new context
*
* WARNING: This method is valid only for bi-allelic contexts; the contract is to check isBiallelic()
* before calling this method
*
* @return double the minor allele frequency
* @return a new context based on the given genotypes
*/
public double getNonRefAlleleFrequency();
private VariantContext createNewContext(Set<Genotype> Gs, HashMap<Object, Object> attrs) {
HashSet<Allele> As = new HashSet<Allele>();
for ( Genotype g : Gs )
As.addAll(g.getAlleles());
return new VariantContext(loc, reference, As, Gs, attrs);
}
/**
* get the -1 * (log 10 of the error value)
* @param allele the allele to be queried
*
* @return the postive number space log based error estimate
* @return the frequency of the given allele in this context
*/
public double getNegLog10PError();
public double getAlleleFrequency(Allele allele) {
int alleleCount = 0;
int totalCount = 0;
for ( Genotype g : genotypes ) {
for ( Allele a : g.getAlleles() ) {
totalCount++;
if ( allele.equals(a) )
alleleCount++;
}
}
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
}
/**
* gets the alleles. This method should return all of the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles. If the reference base is not an allele in this varation
* it will not be in the list (i.e. there is no guarantee that the reference base is in the list).
* Gets the alleles. This method should return all of the alleles present at the location,
* including the reference allele. There are no constraints imposed on the ordering of alleles
* in the set. If the reference is not an allele in this context it will not be included.
*
* @return an alternate allele list
* @return the set of alleles
*/
public List<String> getAlleles();
public Set<Allele> getAlleles() { return alleles; }
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
* Gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
* in the set.
*
* @return an alternate allele list
* @return the set of alternate alleles
*/
public List<String> getAlternateAlleleList();
public Set<Allele> getAlternateAlleles() {
HashSet<Allele> altAlleles = new HashSet<Allele>();
for ( Allele allele : alleles ) {
if ( !allele.equals(reference) )
altAlleles.add(allele);
}
return altAlleles;
}
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void setAttribute(Object key, Object value) {
attributes.put(key, value);
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
}

View File

@ -0,0 +1,79 @@
package org.broadinstitute.sting.gatk.refdata;
/**
* @author ebanks
* <p/>
* Class Allele
* <p/>
* This class emcompasses all the basic information about an allele
*/
public class Allele {
private AlleleType type;
private String bases;
// the types of variants we currently allow
public enum AlleleType {
REFERENCE, SNP, INSERTION, DELETION, INVERSION
}
public Allele(AlleleType type, String bases) {
this.type = type;
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
if ( type == AlleleType.DELETION && bases.length() > 0 )
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
this.bases = bases.toUpperCase();
}
/**
* convenience method for switching over the allele type
*
* @return the AlleleType of this allele
**/
public AlleleType getType() { return type; }
/**
* convenience method for SNPs
*
* @return true if this is a SNP, false otherwise
*/
public boolean isSNP() { return type == AlleleType.SNP; }
/**
* convenience method for variants
*
* @return true if this is a variant allele, false if it's reference
*/
public boolean isVariant() { return type != AlleleType.REFERENCE; }
/**
* convenience method for indels
*
* @return true if this is an indel, false otherwise
*/
public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
/**
* For deletions, this method returns an empty String.
* For everything else, observed bases for the allele are returned.
*
* @return the bases, as a string
*/
public String getBases() { return bases; }
/**
* @param other the other allele
*
* @return true if these alleles are equal
*/
public boolean equals(Allele other) {
return type == other.getType() && bases.equals(other.getBases());
}
}

View File

@ -1,172 +1,147 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Apr 1, 2009
* Time: 11:02:03 AM
* To change this template use File | Settings | File Templates.
*/
@Deprecated
public interface Genotype extends Comparable<ReferenceOrderedDatum> {
// ----------------------------------------------------------------------
//
// manipulating the SNP information
//
// ----------------------------------------------------------------------
/** Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
GenomeLoc getLocation();
/** Returns actual observed alleles on the fwd strand. Allowed to return more than two alleles (@see #getPloidy()).
* For point genotypes (those that report themselves as isPointGenotype()), the alleles are observed bases. For indels
* (i.e. if isIndelGenotype() is true), a no-event allele is represented by an empty string,
* while the event allele is represented by '+' (insertion) or '-' (deletion) followed by actual inserted or deleted bases.
* @return list of alleles
*/
List<String> getFWDAlleles() ;
/** Returns bases on the fwd strand in the true reference allele (regardless of whether the reference allele
* was observed at this site in this particular genotype!)
* as a String. String can be empty (if alt allele is insertion into
* the reference), can contain a single character (as in SNP or one-base deletion), or multiple characters
* (if alt is a longer deletion). For the actually observed alleles, see #getFWDAlleles()
*
* @return reference allele, forward strand
*/
String getFWDRefBases();
/** Returns reference base (regardless of whether the reference allele was actually observed. This method
* is similar to getFWDRefBases() except it returns a single char. For non-point variants (i.e. indels), behavior
* of this method is not specified and can be defined by implementing classes.
* @return
*/
char getRef() ;
/** Returns true if the object represents a point genotype (e.g. reference base vs. a SNP),
* regardless of what the actual observation is (see isReference() and isSNP()).
* @return
*/
boolean isPointGenotype() ;
/** Returns true if the object represents an extended (indel) genotype (e.g. insertion/deletion event vs. no event),
* regardless of what the actual observation is (see isReference(), isIndel(), isInsertion(), isDeletion()).
* @return
*/
boolean isIndelGenotype() ;
/** Is this variant a SNP? Returns true only if this is point genotype AND a non-reference base was observed, false otherwise.
*
* @return true or false
*/
boolean isSNP();
/** Returns true if all observed alleles are reference ones (only reference base was observed if this is a point genotype,
* or no indel events were observed if this is an indel genotype). All is"Variant" methods (where Variant=SNP,Insertion, etc) should
* return false at a site where isReference() is true to ensure consistency.
* @return
*/
boolean isReference();
/** Is this variant an insertion? Returns true if this is indel genotype AND only insertion event(s) was observed,
* false otherwise. The contract requires isIndel() to return true
* whenever isInsertion() method returns true.
*
* @return true or false
*/
boolean isInsertion();
/** Is this variant a deletion? Returns true if this is indel genotype AND only deletion event(s) was observed,
* false otherwise. The contract requires isIndel() to return true
* whenever isDeletion() method returns true.
*
* @return true or false
*/
boolean isDeletion();
/** Is this variant an insertion or a deletion? The contract requires
* this to be true if either isInsertion() or isDeletion() returns true. However,
* this method is also allowed to return true even if neither isInsertion()
* nor isDeletion() do (e.g. a het genotype with one insertion and one deletion). Always returns
* false if the object does not represent indel genotype.
* @return
*/
boolean isIndel();
/** Returns phred-scaled confidence for the variation event (e.g. MAQ's SNP confidence, or AlleleCaller's best vs. ref).
*
* @return
*/
double getVariantConfidence();
/** Returns phred-mapped confidence in called genotype (e.g. MAQ's consensus confidence, or AlleleCaller's
* best vs next-best.
* @return
*/
double getConsensusConfidence();
/** Returns true if the site has at most two known or observed alleles (ref and non-ref), or false if there are > 2 allelic variants known or observed. When
* the implementing class is a genotype, alleles should be always counted including the reference allele whether it was observed in the particular
* individual or not: i.e. if the reference is 'C', then both 'CA' and 'AA' genotypes must be reported as bi-allelic, while 'AT' is <i>not</i> bi-allelic (since there are
* two allelic variants, 'A' and 'T' <i>in addition</i> to the (known) reference variant 'C').
* @return
*/
boolean isBiallelic() ;
/** Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return
*/
boolean isHom() ;
/** Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return
*/
boolean isHet() ;
/** Returns reference (major) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return reference base on the forward strand
*/
//char getRefSnpFWD() throws IllegalStateException;
/** Returns bases in the alternative allele as a String. String can be empty (as in deletion from
* the reference), can contain a single character (as in SNP or one-base insertion), or multiple characters
* (for longer indels).
*
* @return alternative allele, forward strand
*/
//String getAltBasesFWD();
/** Returns alternative (minor) allele base for a SNP variant as a character; should throw IllegalStateException
* if variant is not a SNP.
*
* @return alternative allele base on the forward starnd
*/
// char getAltSnpFWD() throws IllegalStateException;
int length();
/** Return actual number of observed alleles (chromosomes) in the genotype.
* @return
*/
// int getPloidy() throws IllegalStateException;
}
package org.broadinstitute.sting.gatk.refdata;
import java.util.*;
/**
* @author ebanks
* <p/>
* Class Genotype
* <p/>
* This class emcompasses all the basic information about a genotype
*/
public class Genotype {
private List<Allele> alleles;
private double negLog10PError;
private String sample;
private HashMap<Object, Object> attributes;
public Genotype(List<Allele> alleles, String sample, double negLog10PError) {
this.alleles = new ArrayList<Allele>(alleles);
this.sample = sample;
this.negLog10PError = negLog10PError;
attributes = new HashMap<Object, Object>();
}
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() { return alleles; }
/**
* @return the ploidy of this genotype
*/
public int getPloidy() { return alleles.size(); }
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
*/
public boolean isHom() {
// do not assume ploidy == 2
if ( alleles.size() == 0 )
return true;
Allele first = alleles.get(0);
for ( int i = 1; i < alleles.size(); i++ ) {
if ( !first.equals(alleles.get(i)) )
return false;
}
return true;
}
/**
* @return true if we're het (observed alleles differ)
*/
public boolean isHet() { return !isHom(); }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
*/
public boolean isNoCall() {
// TODO -- implement me
return false;
}
/**
* @return true if all alleles for this genotype are SNPs or reference
*/
public boolean isPointGenotype() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() && !allele.isSNP() )
return false;
}
return true;
}
/**
* @return true if this is a variant genotype, false if it's reference
*/
public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() )
return true;
}
return false;
}
/**
* @return the -1 * log10-based error estimate
*/
public double getNegLog10PError() { return negLog10PError; }
/**
* @return the sample name
*/
public String getSample() { return sample; }
/**
* Sets the sample name
*
* @param sample the sample name
*/
public void setSample(String sample) {
this.sample = sample;
}
/**
* @param ref the reference base
*
* @return this genotype as a variation
*/
// TODO -- implement me
// public Variation toVariation(char ref);
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void setAttribute(Object key, Object value) {
attributes.put(key, value);
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
}

View File

@ -1,46 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.utils.GenomeLoc;
/** A simple wrapper interface that allows for keeping any of the "main" (point mutation) or "associated" (indel)
* genotypes, or both, at any given reference position. This is the abstraction of "complete" genotyping information:
* one might have or might have not genotyped independently a base at the given position (point) or an indel right after
* that position. The query and getter methods allow clients to figure out what pieces of the genotyping information
* are actually available and to retrieve them. Behavior of the getters when corresponding queries return false is
* not bound by a contract but it is suggested that they return null or a "trivial" (reference) genotype.
*
* @author asivache
*
*/
public interface GenotypeList {
/** Returns true if point mutation genotyping information is available at the
* current position.
* @return
*/
boolean hasPointGenotype();
/** Returns true if indel genotyping information is available at the
* current position.
* @return
*/
boolean hasIndelGenotype();
/** Returns point mutation genotyping information if available;
* behavior is not specified in the case when there is no such information.
* @return
*/
Genotype getPointGenotype();
/** Returns indel genotyping information if available;
* behavior is not specified in the case when there is no such information.
* @return
*/
Genotype getIndelGenotype();
/** Returns position on the reference the stored genotyping data are associated with.
*
* @return
*/
GenomeLoc getLocation();
}

View File

@ -29,7 +29,7 @@ import net.sf.samtools.util.StringUtil;
*/
public class SAMPileupRecord implements Genotype, GenotypeList {
public class SAMPileupRecord {
private static final int NO_VARIANT = -1;
private static final int SNP_VARIANT = 0;
private static final int INSERTION_VARIANT = 1;
@ -428,13 +428,11 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
*
* @return reference allele, forward strand
*/
@Override
public String getFWDRefBases() {
return refBases;
}
@Override
public List<String> getFWDAlleles() {
return observedAlleles;
}
@ -500,34 +498,28 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
}
@Override
public double getVariantConfidence() {
return variantScore;
}
@Override
public boolean isBiallelic() {
return nNonref < 2;
}
@Override
public double getConsensusConfidence() {
return consensusScore;
}
@Override
public int length() {
return eventLength;
}
@Override
public boolean isIndelGenotype() {
return refBaseChar == '*';
}
@Override
public boolean isPointGenotype() {
return ! isIndelGenotype();
}
@ -537,8 +529,7 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* point genotype, this method returns null.
* @return
*/
@Override
public Genotype getIndelGenotype() {
public SAMPileupRecord getIndelGenotype() {
if ( isIndelGenotype() ) return this;
else return null;
}
@ -548,8 +539,7 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* indel genotype, this method returns null.
* @return
*/
@Override
public Genotype getPointGenotype() {
public SAMPileupRecord getPointGenotype() {
if ( isPointGenotype() ) return this;
else return null;
}
@ -558,7 +548,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* indel genotype is what it only has).
* @return
*/
@Override
public boolean hasIndelGenotype() {
return isIndelGenotype();
}
@ -567,13 +556,10 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* point genotype is what it only has.
* @return
*/
@Override
public boolean hasPointGenotype() {
return isPointGenotype();
}
@Override
public int compareTo(ReferenceOrderedDatum o) {
return getLocation().compareTo(o.getLocation());
}
@ -596,7 +582,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
rodName = name;
}
@Override
public boolean hasNext() {
return parser.hasNext();
}
@ -605,7 +590,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
* @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements.
*/
@Override
public SAMPileupRecord next() {
if (!this.hasNext()) throw new NoSuchElementException("SAMPileupRecord next called on iterator with no more elements");
// if ( z == 0 ) t = System.currentTimeMillis();
@ -621,7 +605,6 @@ public class SAMPileupRecord implements Genotype, GenotypeList {
}
@Override
public void remove() {
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
}

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Arrays;
import java.util.List;
public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod {
public class SimpleIndelROD extends TabularROD implements VariationRod {
private boolean KGENOMES_FORMAT = false, checkedFormat = false;
@ -24,7 +24,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return the reference base or bases, as a string
*/
@Override
public String getReference() {
return String.valueOf(getRef());
}
@ -53,13 +52,11 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return VariantFrequency with the stored frequency
*/
@Override
public double getNonRefAlleleFrequency() {
return 0.0;
}
/** @return the VARIANT_TYPE of the current variant */
@Override
public VARIANT_TYPE getType() {
return isInsertion() ? VARIANT_TYPE.INSERTION : VARIANT_TYPE.DELETION;
}
@ -85,7 +82,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return a char, representing the alternate base
*/
@Override
public char getAlternativeBaseForSNP() {
return getAltSnpFWD();
}
@ -95,7 +91,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return a char, representing the alternate base
*/
@Override
public char getReferenceForSNP() {
return getRefSnpFWD();
}
@ -110,7 +105,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return the log based error estimate
*/
@Override
public double getNegLog10PError() {
return getVariationConfidence();
}
@ -123,7 +117,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return an alternate allele list
*/
@Override
public List<String> getAlternateAlleleList() {
List<String> ret = getAlleleList();
for (String val : ret) {
@ -139,7 +132,6 @@ public class SimpleIndelROD extends TabularROD implements Genotype, VariationRod
*
* @return an alternate allele list
*/
@Override
public List<String> getAlleleList() {
return this.getFWDAlleles();
}

View File

@ -30,7 +30,7 @@ import net.sf.picard.reference.ReferenceSequenceFileWalker;
public class rodSAMPileup extends BasicReferenceOrderedDatum implements GenotypeList {
public class rodSAMPileup extends BasicReferenceOrderedDatum {
// ----------------------------------------------------------------------
//
// Constructors
@ -71,22 +71,18 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
return b.toString();
}
@Override
public Genotype getIndelGenotype() {
public SAMPileupRecord getIndelGenotype() {
return indelGenotype;
}
@Override
public Genotype getPointGenotype() {
public SAMPileupRecord getPointGenotype() {
return pointGenotype;
}
@Override
public boolean hasIndelGenotype() {
return indelGenotype != null;
}
@Override
public boolean hasPointGenotype() {
return pointGenotype != null;
}
@ -108,7 +104,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
rodName = name;
}
@Override
public boolean hasNext() {
return parser.hasNext();
}
@ -117,7 +112,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
* @return the next element in the iteration.
* @throws NoSuchElementException - iterator has no more elements.
*/
@Override
public rodSAMPileup next() {
if (!this.hasNext()) throw new NoSuchElementException("rodSAMPileup next called on iterator with no more elements");
rodSAMPileup result = new rodSAMPileup(rodName);
@ -170,7 +164,6 @@ public class rodSAMPileup extends BasicReferenceOrderedDatum implements Genotype
}
@Override
public void remove() {
throw new UnsupportedOperationException("'remove' operation is not supported for file-backed SAM pileups");
}

View File

@ -24,7 +24,7 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
public boolean CONTINUE_AFTER_AN_ERROR = false;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
ReadBackedPileup pileup = context.getPileup();
ReadBackedPileup pileup = context.getBasePileup();
SAMPileupRecord truePileup = getTruePileup( tracker );
if ( truePileup == null ) {
@ -105,9 +105,9 @@ public class ValidatingPileupWalker extends LocusWalker<Integer, ValidationStats
return null;
if( pileup.hasPointGenotype() )
return (SAMPileupRecord)pileup.getPointGenotype();
return pileup.getPointGenotype();
else if( pileup.hasIndelGenotype() )
return (SAMPileupRecord)pileup.getIndelGenotype();
return pileup.getIndelGenotype();
else
throw new StingException("Unsupported pileup type: " + pileup);
}

View File

@ -1,216 +0,0 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.refdata.Genotype;
import org.broadinstitute.sting.gatk.refdata.GenotypeList;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.Arrays;
import java.util.List;
/** Holds useful utility methods and auxiliary default classes for working with Genotype objects
*
* @author asivache
*
*/
public class GenotypeUtils {
public static enum VariantType {
POINT, INDEL;
// VariantType() {}
}
/** This method accepts rods that implement either Genotype or GenotypeList interface (all others will result in an exception). Variant
* (Genotype object) of the specified type (point mutation or indel) will be extracted from GenotypeList rod if such variant exists, or the rod itself
* will be typecasted and returned back if it implements Genotype and represents the specified variant type. If the last argument is false, then
* null will be returned in all other cases. If the last argument is true and either a) rod is a GenotypeList that lacks a call of the specified type, but call
* of the other type was made, or b) rod is a Genotype and its type is difference from variant_type, then it will be assumed that the implicit ref/ref call
* of the specified type also exists at this genomic position, and new object representing such default call will be returned. If rod argument
* is null, then this method safely (and silently) returns null.
*
* @param rod GenotypeList or Genotype to extract requested call from/upgrade to requested call
* @param variant_type type of the variant to extract (POINT mutations or INDEL)
* @param assume_implicit_ref_calls true if presence of only a call of different type means that ref/ref call of the requested type is implicitly present
* @return Genotyping call of the requested type or null if no explicit or implicit call was made.
*/
public static Genotype extractGenotype(ReferenceOrderedDatum rod, VariantType variant_type, boolean assume_implicit_ref_calls) {
if ( rod == null ) return null;
if ( rod instanceof GenotypeList ) {
GenotypeList rod_gl = (GenotypeList)rod;
switch ( variant_type ) {
case POINT :
if ( rod_gl.hasPointGenotype() ) return rod_gl.getPointGenotype();
else {
if ( assume_implicit_ref_calls && rod_gl.hasIndelGenotype() ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
else return null;
}
case INDEL:
if ( rod_gl.hasIndelGenotype() ) return rod_gl.getIndelGenotype();
else {
if ( assume_implicit_ref_calls && rod_gl.hasPointGenotype() ) return new DefaultIndelGenotype(rod_gl.getLocation());
else return null;
}
default: throw new StingException("Unrecognized variant type: "+variant_type);
}
} else if ( rod instanceof Genotype ) {
Genotype rod_g = (Genotype)rod;
switch ( variant_type ) {
case POINT:
if ( rod_g.isIndelGenotype() ) {
if ( assume_implicit_ref_calls ) throw new StingException("Default (reference) implicit POINT genotype is not implemented yet");
else return null;
} else return rod_g;
case INDEL:
if ( rod_g.isPointGenotype() ) {
if ( assume_implicit_ref_calls ) return new DefaultIndelGenotype(rod_g.getLocation());
else return null;
} else return rod_g;
default: throw new StingException("Unrecognized variant type: "+variant_type);
}
}
else throw new StingException("track "+rod.getName()+" is not a Genotype or GenotypeList");
}
public static boolean isHet(Variation var) {
if ( var instanceof Genotype )
return ((Genotype)var).isHet();
String genotype = Utils.join("",var.getAlleleList());
if ( genotype.length() < 1 )
return false;
char first = genotype.charAt(0);
for (char base : genotype.toCharArray()) {
if (base != first) return true;
}
return false;
}
/** This class represents a "default" indel-type genotype with homozygous reference (i.e. confidently no indel)
* call. All the interface methods are implemented and return consistent values. Use this class when working with
* genotyping data where absence of explicit indel call actually means that no evidence for an indel was observed
* (SAM pileup is one such example).
* @author asivache
*
*/
public static class DefaultIndelGenotype implements Genotype {
private GenomeLoc location;
private static final List<String> alleles = Arrays.asList("","") ;
private int confidence ;
/** Creates default indel genotype (ref/ref = no indel) at the specified position
* with default consensus confidence of 1000 (variant confidence is 0).
* @param l reference position to associate the genotyping call with
*/
public DefaultIndelGenotype(GenomeLoc l) {
this(l,1000);
}
/** Creates ref/ref (i.e. absense of indel) indel genotype at the specified position
* with the specified consensus confidence (variant confidence is 0).
* @param l reference position to associate the genotyping call with
*/
public DefaultIndelGenotype(GenomeLoc l, int confidence) {
location = l;
this.confidence = confidence;
}
@Override
public double getConsensusConfidence() {
return confidence;
}
@Override
public List<String> getFWDAlleles() {
return alleles;
}
@Override
public String getFWDRefBases() {
return alleles.get(0);
}
@Override
public GenomeLoc getLocation() {
return location;
}
@Override
public char getRef() {
return '*';
}
@Override
public double getVariantConfidence() {
return 0.0;
}
@Override
public boolean isBiallelic() {
return false;
}
@Override
public boolean isDeletion() {
return false;
}
@Override
public boolean isHet() {
return false;
}
@Override
public boolean isHom() {
return true;
}
@Override
public boolean isIndel() {
return false;
}
@Override
public boolean isIndelGenotype() {
return true;
}
@Override
public boolean isInsertion() {
return false;
}
@Override
public boolean isPointGenotype() {
return false;
}
@Override
public boolean isReference() {
return true;
}
@Override
public boolean isSNP() {
return false;
}
@Override
public int length() {
return (int)(location.getStop() - location.getStart() + 1);
}
@Override
public int compareTo(ReferenceOrderedDatum o) {
return location.compareTo(o.getLocation());
}
}
}

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.genotype.BasicVariation;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
/**
*
* @author aaron
*
* Class GenotypeUtilsTest
*
* a test class for the various function in the genotype utils class
*/
public class GenotypeUtilsTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
private static File vcfFile = new File(validationDataLocation + "vcfexample.vcf");
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
/**
* make sure that the variation is a het
*/
@Test
public void isHetTest() {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(0,1,2);
BasicVariation var = new BasicVariation("AA", "A", 0, loc,0.0);
Assert.assertTrue(!GenotypeUtils.isHet(var));
BasicVariation var2 = new BasicVariation("AG", "A", 0, loc,0.0);
Assert.assertTrue(GenotypeUtils.isHet(var2));
BasicVariation var3 = new BasicVariation("GG", "A", 0, loc,0.0);
Assert.assertTrue(!GenotypeUtils.isHet(var3));
}
}