Finalizing new Genotype object and associated routines

-- Builder now provides a depreciated log10pError function to make a new GQ value
-- Genotype is an abstract class, with most of the associated functions implemented here and not in the derived Fast and Slow versions
-- Lots of contracts
-- Bugfixes throughout
This commit is contained in:
Mark DePristo 2012-06-02 20:10:17 -04:00
parent 8b0a629a31
commit cebd37609c
11 changed files with 599 additions and 827 deletions

View File

@ -326,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
else {
originalAttributes.put("OG",".");
}
Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).GQ(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
beagleVarCounts++;
}

View File

@ -378,7 +378,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
log10Error = genotype.getLikelihoods().getLog10GQ(phasedGenotype.getType());
return new GenotypeBuilder(genotype).alleles(phasedAlleles)
.GQ(log10Error)
.log10PError(log10Error)
.attributes(genotypeAttributes)
.phased(phasedGenotype.isPhased()).make();
}

View File

@ -115,7 +115,7 @@ class PhasingUtils {
if (phaseQual.PQ != null)
mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ);
Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).GQ(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make();
Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).log10PError(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make();
mergedGenotypes.add(mergedGt);
}

View File

@ -181,10 +181,12 @@ public class VCF3Codec extends AbstractVCFCodec {
// add it to the list
try {
final Genotype g = new GenotypeBuilder(sampleName)
.alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap))
.GQ(GTQual).filters(genotypeFilters).attributes(gtAttributes).phased(phased).make();
genotypes.add(g);
final GenotypeBuilder gb = new GenotypeBuilder(sampleName);
gb.alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap));
gb.log10PError(GTQual);
if ( genotypeFilters != null ) gb.filters(genotypeFilters);
gb.attributes(gtAttributes).phased(phased);
genotypes.add(gb.make());
} catch (TribbleException e) {
throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos);
}

View File

@ -214,7 +214,7 @@ public class VCFCodec extends AbstractVCFCodec {
// don't add missing values to the map
} else {
if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) {
gb.GQ(Double.valueOf(GTValueArray[i]));
gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i])));
} else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) {
gb.AD(decodeInts(GTValueArray[i]));
} else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) {

View File

@ -77,8 +77,7 @@ import java.util.*;
* @author Mark DePristo
* @since 05/12
*/
public final class FastGenotype implements Genotype {
private final String sampleName;
public final class FastGenotype extends Genotype {
private final List<Allele> alleles;
private final boolean isPhased;
private final int GQ;
@ -87,8 +86,6 @@ public final class FastGenotype implements Genotype {
private final int[] PL;
private final Map<String, Object> extendedAttributes;
private GenotypeType type = null;
/**
* The only way to make one of these, for use by GenotypeBuilder only
*
@ -118,7 +115,7 @@ public final class FastGenotype implements Genotype {
final int[] AD,
final int[] PL,
final Map<String, Object> extendedAttributes) {
this.sampleName = sampleName;
super(sampleName);
this.alleles = alleles;
this.isPhased = isPhased;
this.GQ = GQ;
@ -130,333 +127,45 @@ public final class FastGenotype implements Genotype {
// ---------------------------------------------------------------------------------------------------------
//
// Basic getters
// Implmenting the abstract methods
//
// ---------------------------------------------------------------------------------------------------------
/**
* @return the alleles for this genotype. Cannot be null. May be empty
*/
@Ensures("result != null")
public List<Allele> getAlleles() {
@Override public List<Allele> getAlleles() {
return alleles;
}
/**
* Returns how many times allele appears in this genotype object?
*
* @param allele
* @return a value >= 0 indicating how many times the allele occurred in this sample's genotype
*/
@Requires("allele != null")
@Ensures("result >= 0")
public int countAllele(final Allele allele) {
int c = 0;
for ( final Allele a : alleles )
if ( a.equals(allele) )
c++;
return c;
}
/**
* Get the ith allele in this genotype
*
* @param i the ith allele, must be < the ploidy, starting with 0
* @return the allele at position i, which cannot be null
*/
@Requires("i >=0 && i < getPloidy()")
@Ensures("result != null")
public Allele getAllele(int i) {
if ( getType() == GenotypeType.UNAVAILABLE )
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
@Override public Allele getAllele(int i) {
return alleles.get(i);
}
/**
* Are the alleles phased w.r.t. the global phasing system?
*
* @return true if yes
*/
public boolean isPhased() {
@Override public boolean isPhased() {
return isPhased;
}
/**
* What is the ploidy of this sample?
*
* @return the ploidy of this genotype. 0 if the site is no-called.
*/
@Ensures("result >= 0")
public int getPloidy() {
return alleles.size();
}
/**
* @return the sequencing depth of this sample, or -1 if this value is missing
*/
@Ensures("result >= -1")
public int getDP() {
@Override public int getDP() {
return DP;
}
/**
* @return the count of reads, one for each allele in the surrounding Variant context,
* matching the corresponding allele, or null if this value is missing. MUST
* NOT BE MODIFIED!
*/
public int[] getAD() {
@Override public int[] getAD() {
return AD;
}
@Ensures("result != null")
public String getSampleName() {
return sampleName;
}
/**
* Returns a phred-scaled quality score, or -1 if none is available
* @return
*/
@Ensures("result >= -1")
public int getGQ() {
@Override public int getGQ() {
return GQ;
}
// ---------------------------------------------------------------------------------------------------------
//
// The type of this genotype
//
// ---------------------------------------------------------------------------------------------------------
/**
* @return the high-level type of this sample's genotype
*/
@Ensures({"type != null", "result != null"})
public GenotypeType getType() {
if ( type == null ) {
type = determineType();
}
return type;
@Override public List<String> getFilters() {
return (List<String>)getAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
}
/**
* Internal code to determine the type of the genotype from the alleles vector
* @return the type
*/
protected GenotypeType determineType() {
// TODO -- this code is slow and could be optimized for the diploid case
if ( alleles.isEmpty() )
return GenotypeType.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
for ( Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
if ( sawNoCall ) {
if ( observedAllele == null )
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == GenotypeType.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
// ------------------------------------------------------------------------------
//
// methods for getting genotype likelihoods for a genotype object, if present
//
// ------------------------------------------------------------------------------
/**
* @return Returns true if this Genotype has PL field values
*/
public boolean hasLikelihoods() {
return PL != null;
}
/**
* Convenience function that returns a string representation of the PL field of this
* genotype, or . if none is available.
*
* @return a non-null String representation for the PL of this sample
*/
@Ensures("result != null")
public String getLikelihoodsString() {
return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4;
}
/**
* Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
* @return null or a GenotypesLikelihood object for this sample's PL field
*/
@Ensures({"hasLikelihoods() && result != null", "! hasLikelihoods() && result == null"})
public GenotypeLikelihoods getLikelihoods() {
return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
}
/**
* Unsafe low-level accessor the PL field itself, may be null.
*
* @return a pointer to the underlying PL data. MUST NOT BE MODIFIED!
*/
public int[] getPL() {
return PL;
}
// ---------------------------------------------------------------------------------------------------------
//
// Many different string representations
//
// ---------------------------------------------------------------------------------------------------------
/**
* Return a VCF-like string representation for the alleles of this genotype.
*
* Does not append the reference * marker on the alleles.
*
* @return a string representing the genotypes, or null if the type is unavailable.
*/
@Ensures("result != null || ! isAvailable()")
public String getGenotypeString() {
return getGenotypeString(true);
}
private final static String PHASED_ALLELE_SEPARATOR = "|";
private final static String UNPHASED_ALLELE_SEPARATOR = "/";
/**
* Return a VCF-like string representation for the alleles of this genotype.
*
* If ignoreRefState is true, will not append the reference * marker on the alleles.
*
* @return a string representing the genotypes, or null if the type is unavailable.
*/
@Ensures("result != null || ! isAvailable()")
public String getGenotypeString(boolean ignoreRefState) {
if ( alleles.size() == 0 )
return null;
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
}
/**
* Utility that returns a list of allele strings corresponding to the alleles in this sample
* @return
*/
private List<String> getAlleleStrings() {
List<String> al = new ArrayList<String>();
for ( Allele a : alleles )
al.add(a.getBaseString());
return al;
}
public String toString() {
return String.format("[%s %s%s%s%s%s%s]",
getSampleName(),
getGenotypeString(false),
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, GQ),
toStringIfExists(VCFConstants.DEPTH_KEY, DP),
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD),
toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, PL),
sortedString(getExtendedAttributes()));
}
public String toBriefString() {
return String.format("%s:Q%d", getGenotypeString(false), getGQ());
}
// ---------------------------------------------------------------------------------------------------------
//
// Comparison operations
//
// ---------------------------------------------------------------------------------------------------------
/**
* comparable genotypes -> compareTo on the sample names
* @param genotype
* @return
*/
@Override
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
public boolean filtersWereApplied() {
return hasAttribute(VCFConstants.GENOTYPE_FILTER_KEY);
}
public boolean sameGenotype(final Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
// By default, compare the elements in the lists of alleles, element-by-element
Collection<Allele> thisAlleles = this.getAlleles();
Collection<Allele> otherAlleles = other.getAlleles();
if (ignorePhase) { // do not care about order, only identity of Alleles
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
otherAlleles = new TreeSet<Allele>(otherAlleles);
}
return thisAlleles.equals(otherAlleles);
@Override public int[] getPL() {
return PL;
}
// ---------------------------------------------------------------------------------------------------------
@ -465,136 +174,10 @@ public final class FastGenotype implements Genotype {
//
// ---------------------------------------------------------------------------------------------------------
/**
* Returns the extended attributes for this object
* @return is never null, but is often isEmpty()
*/
@Ensures("result != null")
public Map<String, Object> getExtendedAttributes() {
return extendedAttributes;
}
/**
* Is key associated with a value (even a null one) in the extended attributes?
*
* Note this will not return true for the inline attributes DP, GQ, AD, or PL
*
* @param key a non-null string key to check for an association
* @return true if key has a value in the extendedAttributes
*/
@Requires("key != null")
public boolean hasAttribute(final String key) {
return extendedAttributes.containsKey(key);
}
/**
* Get the extended attribute value associated with key, if possible
*
* @param key a non-null string key to fetch a value for
* @param defaultValue the value to return if key isn't in the extended attributes
* @return a value (potentially) null associated with key, or defaultValue if no association exists
*/
@Requires("key != null")
@Ensures("hasAttribute(key) || result == defaultValue")
public Object getAttribute(final String key, final Object defaultValue) {
return hasAttribute(key) ? extendedAttributes.get(key) : defaultValue;
}
@Override
public Object getAttribute(final String key) {
return getAttribute(key, null);
}
// TODO -- add getAttributesAsX interface here
// ------------------------------------------------------------------------------
//
// private utilities
//
// ------------------------------------------------------------------------------
/**
* a utility method for generating sorted strings from a map key set.
* @param c the map
* @param <T> the key type
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
@Requires("c != null")
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
final List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
final List<String> pairs = new ArrayList<String>();
for (final T k : t) {
pairs.add(k + "=" + c.get(k));
}
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
}
/**
* Returns a display name for field name with value v if this isn't -1. Otherwise returns ""
* @param name of the field ("AD")
* @param v the value of the field, or -1 if missing
* @return a non-null string for display if the field is not missing
*/
@Requires("name != null")
@Ensures("result != null")
private final static String toStringIfExists(final String name, final int v) {
return v == -1 ? "" : " " + name + " " + v;
}
/**
* Returns a display name for field name with values vs if this isn't null. Otherwise returns ""
* @param name of the field ("AD")
* @param vs the value of the field, or null if missing
* @return a non-null string for display if the field is not missing
*/
@Requires("name != null")
@Ensures("result != null")
private final static String toStringIfExists(final String name, final int[] vs) {
if ( vs == null )
return "";
else {
StringBuilder b = new StringBuilder();
b.append(" ").append(name).append(" ");
for ( int i = 0; i < vs.length; i++ ) {
if ( i != 0 ) b.append(",");
b.append(vs[i]);
}
return b.toString();
}
}
/**
* A list of genotype field keys corresponding to values we
* manage inline in the Genotype object. They must not appear in the
* extended attributes map
*/
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_ALLELE_DEPTHS,
VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
/**
* Does the attribute map have a mapping involving a forbidden key (i.e.,
* one that's managed inline by this Genotypes object?
*
* @param attributes the extended attributes key
* @return
*/
private final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
for ( final String forbidden : PRIMARY_KEYS)
if ( attributes.containsKey(forbidden) )
return true;
return false;
}
/**
* Is values a valid AD or PL field
* @param values
@ -607,44 +190,4 @@ public final class FastGenotype implements Genotype {
return false;
return true;
}
@Override public boolean hasPL() { return PL != null; }
@Override public boolean hasAD() { return AD != null; }
@Override public boolean hasGQ() { return GQ != -1; }
@Override public boolean hasDP() { return DP != -1; }
// TODO -- remove me
@Override public List<String> getFilters() {
return (List<String>)getAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList());
}
@Override public boolean isFiltered() { return false; }
@Override public boolean filtersWereApplied() { return false; }
@Override public boolean hasLog10PError() { return hasGQ(); }
@Override public double getLog10PError() { return getGQ() / -10.0; }
@Override public int getPhredScaledQual() { return getGQ(); }
@Override
public String getAttributeAsString(String key, String defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof String ) return (String)x;
return String.valueOf(x); // throws an exception if this isn't a string
}
@Override
public int getAttributeAsInt(String key, int defaultValue) {
Object x = getAttribute(key);
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
if ( x instanceof Integer ) return (Integer)x;
return Integer.valueOf((String)x); // throws an exception if this isn't a string
}
@Override
public double getAttributeAsDouble(String key, double defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Double ) return (Double)x;
return Double.valueOf((String)x); // throws an exception if this isn't a string
}
}

View File

@ -1,6 +1,9 @@
package org.broadinstitute.sting.utils.variantcontext;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -12,99 +15,555 @@ import java.util.*;
*
* @author Mark DePristo
*/
public interface Genotype extends Comparable<Genotype> {
List<Allele> getAlleles();
@Invariant({
"getAlleles() != null",
"getSampleName() != null",
"getPloidy() >= 0",
"! hasForbiddenKey(getExtendedAttributes())"})
public abstract class Genotype implements Comparable<Genotype> {
/**
* A list of genotype field keys corresponding to values we
* manage inline in the Genotype object. They must not appear in the
* extended attributes map
*/
public final static Collection<String> PRIMARY_KEYS = Arrays.asList(
VCFConstants.GENOTYPE_KEY,
VCFConstants.GENOTYPE_QUALITY_KEY,
VCFConstants.DEPTH_KEY,
VCFConstants.GENOTYPE_ALLELE_DEPTHS,
VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY);
int countAllele(final Allele allele);
public final static String PHASED_ALLELE_SEPARATOR = "|";
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
Allele getAllele(int i);
private final String sampleName;
private GenotypeType type = null;
boolean isPhased();
protected Genotype(final String sampleName) {
this.sampleName = sampleName;
}
int getPloidy();
protected Genotype(final String sampleName, final GenotypeType type) {
this.sampleName = sampleName;
this.type = type;
}
GenotypeType getType();
/**
* @return the alleles for this genotype. Cannot be null. May be empty
*/
@Ensures("result != null")
public abstract List<Allele> getAlleles();
boolean isHom();
/**
* Returns how many times allele appears in this genotype object?
*
* @param allele
* @return a value >= 0 indicating how many times the allele occurred in this sample's genotype
*/
@Requires("allele != null")
@Ensures("result >= 0")
public int countAllele(final Allele allele) {
int c = 0;
for ( final Allele a : getAlleles() )
if ( a.equals(allele) )
c++;
boolean isHomRef();
return c;
}
boolean isHomVar();
/**
* Get the ith allele in this genotype
*
* @param i the ith allele, must be < the ploidy, starting with 0
* @return the allele at position i, which cannot be null
*/
@Requires({"i >=0 && i < getPloidy()", "getType() != GenotypeType.UNAVAILABLE"})
@Ensures("result != null")
public abstract Allele getAllele(int i);
boolean isHet();
/**
* Are the alleles phased w.r.t. the global phasing system?
*
* @return true if yes
*/
public abstract boolean isPhased();
boolean isNoCall();
/**
* What is the ploidy of this sample?
*
* @return the ploidy of this genotype. 0 if the site is no-called.
*/
@Ensures("result >= 0")
public int getPloidy() {
return getAlleles().size();
}
boolean isCalled();
/**
* @return the sequencing depth of this sample, or -1 if this value is missing
*/
@Ensures("result >= -1")
public abstract int getDP();
boolean isMixed();
/**
* @return the count of reads, one for each allele in the surrounding Variant context,
* matching the corresponding allele, or null if this value is missing. MUST
* NOT BE MODIFIED!
*/
public abstract int[] getAD();
boolean isAvailable();
/**
* Returns the name associated with this sample.
*
* @return a non-null String
*/
@Ensures("result != null")
public String getSampleName() {
return sampleName;
}
//
// Useful methods for getting genotype likelihoods for a genotype object, if present
//
boolean hasLikelihoods();
/**
* Returns a phred-scaled quality score, or -1 if none is available
* @return
*/
@Ensures("result >= -1")
public abstract int getGQ();
String getLikelihoodsString();
/**
* Does the PL field have a value?
* @return true if there's a PL field value
*/
@Ensures("(result == false && getPL() == null) || (result == true && getPL() != null)")
public boolean hasPL() {
return getPL() != null;
}
GenotypeLikelihoods getLikelihoods();
/**
* Does the AD field have a value?
* @return true if there's a AD field value
*/
@Ensures("(result == false && getAD() == null) || (result == true && getAD() != null)")
public boolean hasAD() {
return getAD() != null;
}
String getGenotypeString();
/**
* Does the GQ field have a value?
* @return true if there's a GQ field value
*/
@Ensures("(result == false && getGQ() == -1) || (result == true && getGQ() >= 0)")
public boolean hasGQ() {
return getGQ() != -1;
}
String getGenotypeString(boolean ignoreRefState);
String toBriefString();
boolean sameGenotype(Genotype other);
boolean sameGenotype(Genotype other, boolean ignorePhase);
/**
* Does the DP field have a value?
* @return true if there's a DP field value
*/
@Ensures("(result == false && getDP() == -1) || (result == true && getDP() >= 0)")
public boolean hasDP() {
return getDP() != -1;
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines to access context info fields
// The type of this genotype
//
// ---------------------------------------------------------------------------------------------------------
String getSampleName();
public int[] getPL();
public boolean hasPL();
/**
* @return the high-level type of this sample's genotype
*/
@Ensures({"type != null", "result != null"})
public GenotypeType getType() {
if ( type == null ) {
type = determineType();
}
return type;
}
public int getDP();
public boolean hasDP();
/**
* Internal code to determine the type of the genotype from the alleles vector
* @return the type
*/
@Requires("type == null") // we should never call if already calculated
protected GenotypeType determineType() {
// TODO -- this code is slow and could be optimized for the diploid case
final List<Allele> alleles = getAlleles();
if ( alleles.isEmpty() )
return GenotypeType.UNAVAILABLE;
public int[] getAD();
public boolean hasAD();
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
public int getGQ();
public boolean hasGQ();
for ( final Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
List<String> getFilters();
boolean isFiltered();
boolean filtersWereApplied();
if ( sawNoCall ) {
if ( observedAllele == null )
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == GenotypeType.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
// ------------------------------------------------------------------------------
//
// methods for getting genotype likelihoods for a genotype object, if present
//
// ------------------------------------------------------------------------------
/**
* @return Returns true if this Genotype has PL field values
*/
public boolean hasLikelihoods() {
return getPL() != null;
}
/**
* Convenience function that returns a string representation of the PL field of this
* genotype, or . if none is available.
*
* @return a non-null String representation for the PL of this sample
*/
@Ensures("result != null")
public String getLikelihoodsString() {
return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4;
}
/**
* Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing
* @return null or a GenotypesLikelihood object for this sample's PL field
*/
@Ensures({"hasLikelihoods() && result != null", "! hasLikelihoods() && result == null"})
public GenotypeLikelihoods getLikelihoods() {
return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null;
}
/**
* Unsafe low-level accessor the PL field itself, may be null.
*
* @return a pointer to the underlying PL data. MUST NOT BE MODIFIED!
*/
public abstract int[] getPL();
// ---------------------------------------------------------------------------------------------------------
//
// Many different string representations
//
// ---------------------------------------------------------------------------------------------------------
/**
* Return a VCF-like string representation for the alleles of this genotype.
*
* Does not append the reference * marker on the alleles.
*
* @return a string representing the genotypes, or null if the type is unavailable.
*/
@Ensures("result != null || ! isAvailable()")
public String getGenotypeString() {
return getGenotypeString(true);
}
/**
* Return a VCF-like string representation for the alleles of this genotype.
*
* If ignoreRefState is true, will not append the reference * marker on the alleles.
*
* @return a string representing the genotypes, or null if the type is unavailable.
*/
@Ensures("result != null || ! isAvailable()")
public String getGenotypeString(boolean ignoreRefState) {
if ( getPloidy() == 0 )
return null;
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
}
/**
* Utility that returns a list of allele strings corresponding to the alleles in this sample
* @return
*/
protected List<String> getAlleleStrings() {
final List<String> al = new ArrayList<String>(getPloidy());
for ( Allele a : getAlleles() )
al.add(a.getBaseString());
return al;
}
public String toString() {
return String.format("[%s %s%s%s%s%s%s]",
getSampleName(),
getGenotypeString(false),
toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()),
toStringIfExists(VCFConstants.DEPTH_KEY, getDP()),
toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()),
toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, getPL()),
sortedString(getExtendedAttributes()));
}
public String toBriefString() {
return String.format("%s:Q%d", getGenotypeString(false), getGQ());
}
// ---------------------------------------------------------------------------------------------------------
//
// Comparison operations
//
// ---------------------------------------------------------------------------------------------------------
/**
* comparable genotypes -> compareTo on the sample names
* @param genotype
* @return
*/
@Override
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
}
public boolean sameGenotype(final Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(final Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
// By default, compare the elements in the lists of alleles, element-by-element
Collection<Allele> thisAlleles = this.getAlleles();
Collection<Allele> otherAlleles = other.getAlleles();
if (ignorePhase) { // do not care about order, only identity of Alleles
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
otherAlleles = new TreeSet<Allele>(otherAlleles);
}
return thisAlleles.equals(otherAlleles);
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines for extended attributes
//
// ---------------------------------------------------------------------------------------------------------
/**
* Returns the extended attributes for this object
* @return is never null, but is often isEmpty()
*/
@Ensures({"result != null", "! hasForbiddenKey(result)"})
public abstract Map<String, Object> getExtendedAttributes();
/**
* Is key associated with a value (even a null one) in the extended attributes?
*
* Note this will not return true for the inline attributes DP, GQ, AD, or PL
*
* @param key a non-null string key to check for an association
* @return true if key has a value in the extendedAttributes
*/
@Requires("key != null")
public boolean hasAttribute(final String key) {
return getExtendedAttributes().containsKey(key);
}
/**
* Get the extended attribute value associated with key, if possible
*
* @param key a non-null string key to fetch a value for
* @param defaultValue the value to return if key isn't in the extended attributes
* @return a value (potentially) null associated with key, or defaultValue if no association exists
*/
@Requires("key != null")
@Ensures("hasAttribute(key) || result == defaultValue")
public Object getAttribute(final String key, final Object defaultValue) {
return hasAttribute(key) ? getExtendedAttributes().get(key) : defaultValue;
}
/**
* Same as #getAttribute with a null default
*
* @param key
* @return
*/
public Object getAttribute(final String key) {
return getAttribute(key, null);
}
/**
*
* @return
*/
@Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"})
public abstract List<String> getFilters();
@Ensures({"result != getFilters().isEmpty()"})
public boolean isFiltered() {
return ! getFilters().isEmpty();
}
@Ensures("result == true || getFilters().isEmpty()")
public abstract boolean filtersWereApplied();
@Deprecated public boolean hasLog10PError() { return hasGQ(); }
@Deprecated public double getLog10PError() { return getGQ() / -10.0; }
@Deprecated public int getPhredScaledQual() { return getGQ(); }
@Deprecated
boolean hasLog10PError();
public String getAttributeAsString(String key, String defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof String ) return (String)x;
return String.valueOf(x); // throws an exception if this isn't a string
}
@Deprecated
double getLog10PError();
public int getAttributeAsInt(String key, int defaultValue) {
Object x = getAttribute(key);
if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue;
if ( x instanceof Integer ) return (Integer)x;
return Integer.valueOf((String)x); // throws an exception if this isn't a string
}
@Deprecated
int getPhredScaledQual();
public double getAttributeAsDouble(String key, double defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
if ( x instanceof Double ) return (Double)x;
return Double.valueOf((String)x); // throws an exception if this isn't a string
}
public boolean hasAttribute(final String key);
// TODO -- add getAttributesAsX interface here
Map<String, Object> getExtendedAttributes();
// ------------------------------------------------------------------------------
//
// private utilities
//
// ------------------------------------------------------------------------------
Object getAttribute(String key);
Object getAttribute(final String key, final Object defaultValue);
/**
* a utility method for generating sorted strings from a map key set.
* @param c the map
* @param <T> the key type
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
@Requires("c != null")
protected static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
@Deprecated
String getAttributeAsString(String key, String defaultValue);
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
final List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
@Deprecated
int getAttributeAsInt(String key, int defaultValue);
final List<String> pairs = new ArrayList<String>();
for (final T k : t) {
pairs.add(k + "=" + c.get(k));
}
@Deprecated
double getAttributeAsDouble(String key, double defaultValue);
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
}
/**
* Returns a display name for field name with value v if this isn't -1. Otherwise returns ""
* @param name of the field ("AD")
* @param v the value of the field, or -1 if missing
* @return a non-null string for display if the field is not missing
*/
@Requires("name != null")
@Ensures("result != null")
protected final static String toStringIfExists(final String name, final int v) {
return v == -1 ? "" : " " + name + " " + v;
}
/**
* Returns a display name for field name with values vs if this isn't null. Otherwise returns ""
* @param name of the field ("AD")
* @param vs the value of the field, or null if missing
* @return a non-null string for display if the field is not missing
*/
@Requires("name != null")
@Ensures("result != null")
protected final static String toStringIfExists(final String name, final int[] vs) {
if ( vs == null )
return "";
else {
StringBuilder b = new StringBuilder();
b.append(" ").append(name).append(" ");
for ( int i = 0; i < vs.length; i++ ) {
if ( i != 0 ) b.append(",");
b.append(vs[i]);
}
return b.toString();
}
}
/**
* Does the attribute map have a mapping involving a forbidden key (i.e.,
* one that's managed inline by this Genotypes object?
*
* @param attributes the extended attributes key
* @return
*/
protected final static boolean hasForbiddenKey(final Map<String, Object> attributes) {
for ( final String forbidden : PRIMARY_KEYS)
if ( attributes.containsKey(forbidden) )
return true;
return false;
}
}

View File

@ -168,11 +168,14 @@ public final class GenotypeBuilder {
} else {
final Map<String, Object> attributes = new LinkedHashMap<String, Object>();
if ( extendedAttributes != null ) attributes.putAll(extendedAttributes);
final double log10PError = GQ != -1 ? GQ / -10.0 : SlowGenotype.NO_LOG10_PERROR;
final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0);
Set<String> filters = Collections.emptySet();
if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) ) {
filters = new LinkedHashSet<String>((List<String>)extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY));
Set<String> filters = null;
if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) )
{
final Object f = extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY);
if ( f != null )
filters = new LinkedHashSet<String>((List<String>)f);
attributes.remove(VCFConstants.GENOTYPE_FILTER_KEY);
}
@ -216,8 +219,20 @@ public final class GenotypeBuilder {
return this;
}
public GenotypeBuilder GQ(final double GQ) {
return GQ((int)Math.round(GQ));
/**
* Adaptor interface from the pLog10Error system.
*
* Will be retired when
*
* @param pLog10Error
* @return
*/
@Deprecated
public GenotypeBuilder log10PError(final double pLog10Error) {
if ( pLog10Error == CommonInfo.NO_LOG10_PERROR )
return GQ(-1);
else
return GQ((int)Math.round(pLog10Error * -10));
}
public GenotypeBuilder noGQ() { GQ = -1; return this; }
@ -270,9 +285,17 @@ public final class GenotypeBuilder {
return this;
}
/**
* Tells this builder to make a Genotype object that has had filters applied,
* which may be empty (passes) or have some value indicating the reasons
* why it's been filtered.
*
* @param filters non-null list of filters. empty list => PASS
* @return this builder
*/
@Requires("filters != null")
public GenotypeBuilder filters(final List<String> filters) {
attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters);
// TODO
return this;
}

View File

@ -36,22 +36,16 @@ import java.util.*;
*
* @author Mark DePristo
*/
public class SlowGenotype implements Genotype {
public final static String PHASED_ALLELE_SEPARATOR = "|";
public final static String UNPHASED_ALLELE_SEPARATOR = "/";
@Deprecated
public class SlowGenotype extends Genotype {
protected CommonInfo commonInfo;
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
protected List<Allele> alleles = null; // new ArrayList<Allele>();
protected GenotypeType type = null;
protected List<Allele> alleles = null;
protected boolean isPhased = false;
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased) {
// this(sampleName, alleles, log10PError, filters, attributes, isPhased, null);
// }
protected SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, Set<String> filters, Map<String, Object> attributes, boolean isPhased, double[] log10Likelihoods) {
super(sampleName);
if ( alleles == null || alleles.isEmpty() )
this.alleles = Collections.emptyList();
else
@ -63,175 +57,27 @@ public class SlowGenotype implements Genotype {
validate();
}
// /**
// * Creates a new Genotype for sampleName with genotype according to alleles.
// * @param sampleName
// * @param alleles
// * @param log10PError the confidence in these alleles
// * @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known
// */
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError, double[] log10Likelihoods) {
// this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods);
// }
//
// public SlowGenotype(String sampleName, List<Allele> alleles, double log10PError) {
// this(sampleName, alleles, log10PError, null, null, false);
// }
//
// public SlowGenotype(String sampleName, List<Allele> alleles) {
// this(sampleName, alleles, NO_LOG10_PERROR, null, null, false);
// }
//
// public SlowGenotype(String sampleName, SlowGenotype parent) {
// this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased());
// }
//
//
//
// // ---------------------------------------------------------------------------------------------------------
// //
// // Partial-cloning routines (because Genotype is immutable).
// //
// // ---------------------------------------------------------------------------------------------------------
//
// public static SlowGenotype modifyName(SlowGenotype g, String name) {
// return new SlowGenotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
// }
//
// public static SlowGenotype modifyAttributes(SlowGenotype g, Map<String, Object> attributes) {
// return new SlowGenotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased());
// }
//
// public static SlowGenotype modifyAlleles(SlowGenotype g, List<Allele> alleles) {
// return new SlowGenotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased());
// }
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() {
@Override public List<Allele> getAlleles() {
return alleles;
}
public int countAllele(final Allele allele) {
int c = 0;
for ( final Allele a : alleles )
if ( a.equals(allele) )
c++;
return c;
}
public Allele getAllele(int i) {
@Override public Allele getAllele(int i) {
if ( getType() == GenotypeType.UNAVAILABLE )
throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype");
return alleles.get(i);
}
public boolean isPhased() { return isPhased; }
/**
* @return the ploidy of this genotype. 0 if the site is no-called.
*/
public int getPloidy() {
return alleles.size();
}
public GenotypeType getType() {
if ( type == null ) {
type = determineType();
}
return type;
}
protected GenotypeType determineType() {
if ( alleles.size() == 0 )
return GenotypeType.UNAVAILABLE;
boolean sawNoCall = false, sawMultipleAlleles = false;
Allele observedAllele = null;
for ( Allele allele : alleles ) {
if ( allele.isNoCall() )
sawNoCall = true;
else if ( observedAllele == null )
observedAllele = allele;
else if ( !allele.equals(observedAllele) )
sawMultipleAlleles = true;
}
if ( sawNoCall ) {
if ( observedAllele == null )
return GenotypeType.NO_CALL;
return GenotypeType.MIXED;
}
if ( observedAllele == null )
throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null");
return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false.
*/
public boolean isHom() { return isHomRef() || isHomVar(); }
/**
* @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false.
*/
public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; }
/**
* @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false.
*/
public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false.
*/
public boolean isHet() { return getType() == GenotypeType.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false.
*/
public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; }
/**
* @return true if this genotype is comprised of any alleles that are not no-calls (even if some are).
*/
public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; }
/**
* @return true if this genotype is comprised of both calls and no-calls.
*/
public boolean isMixed() { return getType() == GenotypeType.MIXED; }
/**
* @return true if the type of this genotype is set.
*/
public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; }
@Override public boolean isPhased() { return isPhased; }
//
// Useful methods for getting genotype likelihoods for a genotype object, if present
//
public boolean hasLikelihoods() {
@Override public boolean hasLikelihoods() {
return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) ||
(hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4));
}
/**
* Convenience function that returns a string representation of the PL field of this
* genotype, or . if none is available.
*
* @return
*/
public String getLikelihoodsString() {
GenotypeLikelihoods gl = getLikelihoods();
return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString();
}
public GenotypeLikelihoods getLikelihoods() {
@Override public GenotypeLikelihoods getLikelihoods() {
GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true);
if ( x != null )
return x;
@ -253,144 +99,40 @@ public class SlowGenotype implements Genotype {
else return null;
}
public void validate() {
private final void validate() {
if ( alleles.size() == 0) return;
// int nNoCalls = 0;
for ( Allele allele : alleles ) {
if ( allele == null )
throw new IllegalArgumentException("BUG: allele cannot be null in Genotype");
// nNoCalls += allele.isNoCall() ? 1 : 0;
}
// Technically, the spec does allow for the below case so this is not an illegal state
//if ( nNoCalls > 0 && nNoCalls != alleles.size() )
// throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
}
public String getGenotypeString() {
return getGenotypeString(true);
}
public String getGenotypeString(boolean ignoreRefState) {
if ( alleles.size() == 0 )
return null;
// Notes:
// 1. Make sure to use the appropriate separator depending on whether the genotype is phased
// 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele)
// 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course)
return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR,
ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles())));
}
private List<String> getAlleleStrings() {
List<String> al = new ArrayList<String>();
for ( Allele a : alleles )
al.add(a.getBaseString());
return al;
}
public String toString() {
int Q = getPhredScaledQual();
return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false),
Q == -1 ? "." : String.format("%2d",Q), sortedString(getExtendedAttributes()));
}
public String toBriefString() {
return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual());
}
public boolean sameGenotype(Genotype other) {
return sameGenotype(other, true);
}
public boolean sameGenotype(Genotype other, boolean ignorePhase) {
if (getPloidy() != other.getPloidy())
return false; // gotta have the same number of allele to be equal
// By default, compare the elements in the lists of alleles, element-by-element
Collection<Allele> thisAlleles = this.getAlleles();
Collection<Allele> otherAlleles = other.getAlleles();
if (ignorePhase) { // do not care about order, only identity of Alleles
thisAlleles = new TreeSet<Allele>(thisAlleles); //implemented Allele.compareTo()
otherAlleles = new TreeSet<Allele>(otherAlleles);
}
return thisAlleles.equals(otherAlleles);
}
/**
* a utility method for generating sorted strings from a map key set.
* @param c the map
* @param <T> the key type
* @param <V> the value type
* @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys
*/
private static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
// NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS
List<T> t = new ArrayList<T>(c.keySet());
Collections.sort(t);
List<String> pairs = new ArrayList<String>();
for (T k : t) {
pairs.add(k + "=" + c.get(k));
}
return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}";
}
// ---------------------------------------------------------------------------------------------------------
//
// get routines to access context info fields
//
// ---------------------------------------------------------------------------------------------------------
public String getSampleName() { return commonInfo.getName(); }
public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
public boolean isFiltered() { return commonInfo.isFiltered(); }
public boolean isNotFiltered() { return commonInfo.isNotFiltered(); }
public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
public double getLog10PError() { return commonInfo.getLog10PError(); }
/**
* Returns a phred-scaled quality score, or -1 if none is available
* @return
*/
public int getPhredScaledQual() {
if ( commonInfo.hasLog10PError() )
return (int)Math.round(commonInfo.getPhredScaledQual());
else
return -1;
}
@Override public List<String> getFilters() { return new ArrayList<String>(commonInfo.getFilters()); }
@Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); }
@Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); }
@Override public double getLog10PError() { return commonInfo.getLog10PError(); }
@Override
public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); }
@Override
public Object getAttribute(String key) { return commonInfo.getAttribute(key); }
@Override
public Object getAttribute(String key, Object defaultValue) {
return commonInfo.getAttribute(key, defaultValue);
}
public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
/**
* comparable genotypes -> compareTo on the sample names
* @param genotype
* @return
*/
@Override
public int compareTo(final Genotype genotype) {
return getSampleName().compareTo(genotype.getSampleName());
}
// public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); }
// public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); }
// public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); }
// public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); }
@Override
public int[] getPL() {
@ -427,12 +169,15 @@ public class SlowGenotype implements Genotype {
@Override
public int getGQ() {
return getPhredScaledQual();
if ( commonInfo.hasLog10PError() )
return (int)Math.round(commonInfo.getPhredScaledQual());
else
return -1;
}
@Override
public boolean hasGQ() {
return hasLikelihoods();
return hasLog10PError();
}
@Override

View File

@ -1287,7 +1287,7 @@ public class VariantContextUtils {
GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2)));
if ( numNewAltAlleles != 0 ) gb.GQ(-10 * GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods));
}
newGTs.add(gb.make());
}

View File

@ -68,12 +68,12 @@ public class VariantContextUtilsUnitTest extends BaseTest {
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double... pls) {
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).PL(pls).make();
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).log10PError(log10pError).PL(pls).make();
}
private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) {
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).make();
return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).log10PError(log10pError).make();
}
private VariantContext makeVC(String source, List<Allele> alleles) {
@ -524,7 +524,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
Genotype expectedValue = expected.get(value.getSampleName());
Assert.assertEquals(value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal");
Assert.assertEquals(value.getLog10PError(), expectedValue.getLog10PError(), "GQ values aren't equal");
Assert.assertEquals(value.getGQ(), expectedValue.getGQ(), "GQ values aren't equal");
Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not");
if ( value.hasLikelihoods() )
Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector(), "Genotype likelihoods aren't equal");