diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java index 0288a954b..f9ef52857 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java @@ -326,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker { else { originalAttributes.put("OG","."); } - Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).GQ(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make(); + Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make(); if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) { beagleVarCounts++; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index dd4c3ac70..1f9e07382 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -378,7 +378,7 @@ public class PhaseByTransmission extends RodWalker, HashMa log10Error = genotype.getLikelihoods().getLog10GQ(phasedGenotype.getType()); return new GenotypeBuilder(genotype).alleles(phasedAlleles) - .GQ(log10Error) + .log10PError(log10Error) .attributes(genotypeAttributes) .phased(phasedGenotype.isPhased()).make(); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java index 142b99344..4bf24cf86 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingUtils.java @@ -115,7 +115,7 @@ class PhasingUtils { if (phaseQual.PQ != null) mergedGtAttribs.put(ReadBackedPhasingWalker.PQ_KEY, phaseQual.PQ); - Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).GQ(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make(); + Genotype mergedGt = new GenotypeBuilder(gt1.getSampleName(), mergedAllelesForSample).log10PError(mergedGQ).attributes(mergedGtAttribs).phased(phaseQual.isPhased).make(); mergedGenotypes.add(mergedGt); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java index 50abd5d6a..5bf8d4557 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCF3Codec.java @@ -181,10 +181,12 @@ public class VCF3Codec extends AbstractVCFCodec { // add it to the list try { - final Genotype g = new GenotypeBuilder(sampleName) - .alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)) - .GQ(GTQual).filters(genotypeFilters).attributes(gtAttributes).phased(phased).make(); - genotypes.add(g); + final GenotypeBuilder gb = new GenotypeBuilder(sampleName); + gb.alleles(parseGenotypeAlleles(GTValueArray[genotypeAlleleLocation], alleles, alleleMap)); + gb.log10PError(GTQual); + if ( genotypeFilters != null ) gb.filters(genotypeFilters); + gb.attributes(gtAttributes).phased(phased); + genotypes.add(gb.make()); } catch (TribbleException e) { throw new TribbleException.InternalCodecException(e.getMessage() + ", at position " + chr+":"+pos); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index 935d401a3..79feb13ed 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -214,7 +214,7 @@ public class VCFCodec extends AbstractVCFCodec { // don't add missing values to the map } else { if (gtKey.equals(VCFConstants.GENOTYPE_QUALITY_KEY)) { - gb.GQ(Double.valueOf(GTValueArray[i])); + gb.GQ((int)Math.round(Double.valueOf(GTValueArray[i]))); } else if (gtKey.equals(VCFConstants.GENOTYPE_ALLELE_DEPTHS)) { gb.AD(decodeInts(GTValueArray[i])); } else if (gtKey.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY)) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java index d76682a10..80d5c6d7d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/FastGenotype.java @@ -77,8 +77,7 @@ import java.util.*; * @author Mark DePristo * @since 05/12 */ -public final class FastGenotype implements Genotype { - private final String sampleName; +public final class FastGenotype extends Genotype { private final List alleles; private final boolean isPhased; private final int GQ; @@ -87,8 +86,6 @@ public final class FastGenotype implements Genotype { private final int[] PL; private final Map extendedAttributes; - private GenotypeType type = null; - /** * The only way to make one of these, for use by GenotypeBuilder only * @@ -118,7 +115,7 @@ public final class FastGenotype implements Genotype { final int[] AD, final int[] PL, final Map extendedAttributes) { - this.sampleName = sampleName; + super(sampleName); this.alleles = alleles; this.isPhased = isPhased; this.GQ = GQ; @@ -130,333 +127,45 @@ public final class FastGenotype implements Genotype { // --------------------------------------------------------------------------------------------------------- // - // Basic getters + // Implmenting the abstract methods // // --------------------------------------------------------------------------------------------------------- - /** - * @return the alleles for this genotype. Cannot be null. May be empty - */ - @Ensures("result != null") - public List getAlleles() { + @Override public List getAlleles() { return alleles; } - /** - * Returns how many times allele appears in this genotype object? - * - * @param allele - * @return a value >= 0 indicating how many times the allele occurred in this sample's genotype - */ - @Requires("allele != null") - @Ensures("result >= 0") - public int countAllele(final Allele allele) { - int c = 0; - for ( final Allele a : alleles ) - if ( a.equals(allele) ) - c++; - - return c; - } - - /** - * Get the ith allele in this genotype - * - * @param i the ith allele, must be < the ploidy, starting with 0 - * @return the allele at position i, which cannot be null - */ - @Requires("i >=0 && i < getPloidy()") - @Ensures("result != null") - public Allele getAllele(int i) { - if ( getType() == GenotypeType.UNAVAILABLE ) - throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); + @Override public Allele getAllele(int i) { return alleles.get(i); } - /** - * Are the alleles phased w.r.t. the global phasing system? - * - * @return true if yes - */ - public boolean isPhased() { + @Override public boolean isPhased() { return isPhased; } - /** - * What is the ploidy of this sample? - * - * @return the ploidy of this genotype. 0 if the site is no-called. - */ - @Ensures("result >= 0") - public int getPloidy() { - return alleles.size(); - } - - /** - * @return the sequencing depth of this sample, or -1 if this value is missing - */ - @Ensures("result >= -1") - public int getDP() { + @Override public int getDP() { return DP; } - /** - * @return the count of reads, one for each allele in the surrounding Variant context, - * matching the corresponding allele, or null if this value is missing. MUST - * NOT BE MODIFIED! - */ - public int[] getAD() { + @Override public int[] getAD() { return AD; } - @Ensures("result != null") - public String getSampleName() { - return sampleName; - } - - /** - * Returns a phred-scaled quality score, or -1 if none is available - * @return - */ - @Ensures("result >= -1") - public int getGQ() { + @Override public int getGQ() { return GQ; } - // --------------------------------------------------------------------------------------------------------- - // - // The type of this genotype - // - // --------------------------------------------------------------------------------------------------------- - - /** - * @return the high-level type of this sample's genotype - */ - @Ensures({"type != null", "result != null"}) - public GenotypeType getType() { - if ( type == null ) { - type = determineType(); - } - return type; + @Override public List getFilters() { + return (List)getAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList()); } - /** - * Internal code to determine the type of the genotype from the alleles vector - * @return the type - */ - protected GenotypeType determineType() { - // TODO -- this code is slow and could be optimized for the diploid case - if ( alleles.isEmpty() ) - return GenotypeType.UNAVAILABLE; - - boolean sawNoCall = false, sawMultipleAlleles = false; - Allele observedAllele = null; - - for ( Allele allele : alleles ) { - if ( allele.isNoCall() ) - sawNoCall = true; - else if ( observedAllele == null ) - observedAllele = allele; - else if ( !allele.equals(observedAllele) ) - sawMultipleAlleles = true; - } - - if ( sawNoCall ) { - if ( observedAllele == null ) - return GenotypeType.NO_CALL; - return GenotypeType.MIXED; - } - - if ( observedAllele == null ) - throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); - - return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR; - } - - /** - * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false. - */ - public boolean isHom() { return isHomRef() || isHomVar(); } - - /** - * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false. - */ - public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; } - - /** - * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false. - */ - public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; } - - /** - * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false. - */ - public boolean isHet() { return getType() == GenotypeType.HET; } - - /** - * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false. - */ - public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; } - - /** - * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are). - */ - public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; } - - /** - * @return true if this genotype is comprised of both calls and no-calls. - */ - public boolean isMixed() { return getType() == GenotypeType.MIXED; } - - /** - * @return true if the type of this genotype is set. - */ - public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; } - - // ------------------------------------------------------------------------------ - // - // methods for getting genotype likelihoods for a genotype object, if present - // - // ------------------------------------------------------------------------------ - - /** - * @return Returns true if this Genotype has PL field values - */ - public boolean hasLikelihoods() { - return PL != null; - } - - /** - * Convenience function that returns a string representation of the PL field of this - * genotype, or . if none is available. - * - * @return a non-null String representation for the PL of this sample - */ - @Ensures("result != null") - public String getLikelihoodsString() { - return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4; - } - - /** - * Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing - * @return null or a GenotypesLikelihood object for this sample's PL field - */ - @Ensures({"hasLikelihoods() && result != null", "! hasLikelihoods() && result == null"}) - public GenotypeLikelihoods getLikelihoods() { - return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null; - } - - /** - * Unsafe low-level accessor the PL field itself, may be null. - * - * @return a pointer to the underlying PL data. MUST NOT BE MODIFIED! - */ - public int[] getPL() { - return PL; - } - - // --------------------------------------------------------------------------------------------------------- - // - // Many different string representations - // - // --------------------------------------------------------------------------------------------------------- - - /** - * Return a VCF-like string representation for the alleles of this genotype. - * - * Does not append the reference * marker on the alleles. - * - * @return a string representing the genotypes, or null if the type is unavailable. - */ - @Ensures("result != null || ! isAvailable()") - public String getGenotypeString() { - return getGenotypeString(true); - } - - private final static String PHASED_ALLELE_SEPARATOR = "|"; - private final static String UNPHASED_ALLELE_SEPARATOR = "/"; - - /** - * Return a VCF-like string representation for the alleles of this genotype. - * - * If ignoreRefState is true, will not append the reference * marker on the alleles. - * - * @return a string representing the genotypes, or null if the type is unavailable. - */ - @Ensures("result != null || ! isAvailable()") - public String getGenotypeString(boolean ignoreRefState) { - if ( alleles.size() == 0 ) - return null; - - // Notes: - // 1. Make sure to use the appropriate separator depending on whether the genotype is phased - // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) - // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course) - return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR, - ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles()))); - } - - /** - * Utility that returns a list of allele strings corresponding to the alleles in this sample - * @return - */ - private List getAlleleStrings() { - List al = new ArrayList(); - for ( Allele a : alleles ) - al.add(a.getBaseString()); - - return al; - } - - public String toString() { - return String.format("[%s %s%s%s%s%s%s]", - getSampleName(), - getGenotypeString(false), - toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, GQ), - toStringIfExists(VCFConstants.DEPTH_KEY, DP), - toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, AD), - toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, PL), - sortedString(getExtendedAttributes())); - } - - public String toBriefString() { - return String.format("%s:Q%d", getGenotypeString(false), getGQ()); - } - - // --------------------------------------------------------------------------------------------------------- - // - // Comparison operations - // - // --------------------------------------------------------------------------------------------------------- - - /** - * comparable genotypes -> compareTo on the sample names - * @param genotype - * @return - */ @Override - public int compareTo(final Genotype genotype) { - return getSampleName().compareTo(genotype.getSampleName()); + public boolean filtersWereApplied() { + return hasAttribute(VCFConstants.GENOTYPE_FILTER_KEY); } - public boolean sameGenotype(final Genotype other) { - return sameGenotype(other, true); - } - - public boolean sameGenotype(final Genotype other, boolean ignorePhase) { - if (getPloidy() != other.getPloidy()) - return false; // gotta have the same number of allele to be equal - - // By default, compare the elements in the lists of alleles, element-by-element - Collection thisAlleles = this.getAlleles(); - Collection otherAlleles = other.getAlleles(); - - if (ignorePhase) { // do not care about order, only identity of Alleles - thisAlleles = new TreeSet(thisAlleles); //implemented Allele.compareTo() - otherAlleles = new TreeSet(otherAlleles); - } - - return thisAlleles.equals(otherAlleles); + @Override public int[] getPL() { + return PL; } // --------------------------------------------------------------------------------------------------------- @@ -465,136 +174,10 @@ public final class FastGenotype implements Genotype { // // --------------------------------------------------------------------------------------------------------- - /** - * Returns the extended attributes for this object - * @return is never null, but is often isEmpty() - */ - @Ensures("result != null") public Map getExtendedAttributes() { return extendedAttributes; } - /** - * Is key associated with a value (even a null one) in the extended attributes? - * - * Note this will not return true for the inline attributes DP, GQ, AD, or PL - * - * @param key a non-null string key to check for an association - * @return true if key has a value in the extendedAttributes - */ - @Requires("key != null") - public boolean hasAttribute(final String key) { - return extendedAttributes.containsKey(key); - } - - /** - * Get the extended attribute value associated with key, if possible - * - * @param key a non-null string key to fetch a value for - * @param defaultValue the value to return if key isn't in the extended attributes - * @return a value (potentially) null associated with key, or defaultValue if no association exists - */ - @Requires("key != null") - @Ensures("hasAttribute(key) || result == defaultValue") - public Object getAttribute(final String key, final Object defaultValue) { - return hasAttribute(key) ? extendedAttributes.get(key) : defaultValue; - } - - @Override - public Object getAttribute(final String key) { - return getAttribute(key, null); - } - - // TODO -- add getAttributesAsX interface here - - // ------------------------------------------------------------------------------ - // - // private utilities - // - // ------------------------------------------------------------------------------ - - /** - * a utility method for generating sorted strings from a map key set. - * @param c the map - * @param the key type - * @param the value type - * @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys - */ - @Requires("c != null") - private static , V> String sortedString(Map c) { - - // NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS - final List t = new ArrayList(c.keySet()); - Collections.sort(t); - - final List pairs = new ArrayList(); - for (final T k : t) { - pairs.add(k + "=" + c.get(k)); - } - - return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}"; - } - - /** - * Returns a display name for field name with value v if this isn't -1. Otherwise returns "" - * @param name of the field ("AD") - * @param v the value of the field, or -1 if missing - * @return a non-null string for display if the field is not missing - */ - @Requires("name != null") - @Ensures("result != null") - private final static String toStringIfExists(final String name, final int v) { - return v == -1 ? "" : " " + name + " " + v; - } - - /** - * Returns a display name for field name with values vs if this isn't null. Otherwise returns "" - * @param name of the field ("AD") - * @param vs the value of the field, or null if missing - * @return a non-null string for display if the field is not missing - */ - @Requires("name != null") - @Ensures("result != null") - private final static String toStringIfExists(final String name, final int[] vs) { - if ( vs == null ) - return ""; - else { - StringBuilder b = new StringBuilder(); - b.append(" ").append(name).append(" "); - for ( int i = 0; i < vs.length; i++ ) { - if ( i != 0 ) b.append(","); - b.append(vs[i]); - } - return b.toString(); - } - } - - /** - * A list of genotype field keys corresponding to values we - * manage inline in the Genotype object. They must not appear in the - * extended attributes map - */ - public final static Collection PRIMARY_KEYS = Arrays.asList( - VCFConstants.GENOTYPE_KEY, - VCFConstants.GENOTYPE_QUALITY_KEY, - VCFConstants.DEPTH_KEY, - VCFConstants.GENOTYPE_ALLELE_DEPTHS, - VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - - /** - * Does the attribute map have a mapping involving a forbidden key (i.e., - * one that's managed inline by this Genotypes object? - * - * @param attributes the extended attributes key - * @return - */ - private final static boolean hasForbiddenKey(final Map attributes) { - for ( final String forbidden : PRIMARY_KEYS) - if ( attributes.containsKey(forbidden) ) - return true; - return false; - } - /** * Is values a valid AD or PL field * @param values @@ -607,44 +190,4 @@ public final class FastGenotype implements Genotype { return false; return true; } - - @Override public boolean hasPL() { return PL != null; } - @Override public boolean hasAD() { return AD != null; } - @Override public boolean hasGQ() { return GQ != -1; } - @Override public boolean hasDP() { return DP != -1; } - - // TODO -- remove me - @Override public List getFilters() { - return (List)getAttribute(VCFConstants.GENOTYPE_FILTER_KEY, Collections.emptyList()); - } - @Override public boolean isFiltered() { return false; } - @Override public boolean filtersWereApplied() { return false; } - @Override public boolean hasLog10PError() { return hasGQ(); } - @Override public double getLog10PError() { return getGQ() / -10.0; } - - @Override public int getPhredScaledQual() { return getGQ(); } - - @Override - public String getAttributeAsString(String key, String defaultValue) { - Object x = getAttribute(key); - if ( x == null ) return defaultValue; - if ( x instanceof String ) return (String)x; - return String.valueOf(x); // throws an exception if this isn't a string - } - - @Override - public int getAttributeAsInt(String key, int defaultValue) { - Object x = getAttribute(key); - if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue; - if ( x instanceof Integer ) return (Integer)x; - return Integer.valueOf((String)x); // throws an exception if this isn't a string - } - - @Override - public double getAttributeAsDouble(String key, double defaultValue) { - Object x = getAttribute(key); - if ( x == null ) return defaultValue; - if ( x instanceof Double ) return (Double)x; - return Double.valueOf((String)x); // throws an exception if this isn't a string - } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java index 59b6f4092..52cd6fb77 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/Genotype.java @@ -1,6 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -12,99 +15,555 @@ import java.util.*; * * @author Mark DePristo */ -public interface Genotype extends Comparable { - List getAlleles(); +@Invariant({ + "getAlleles() != null", + "getSampleName() != null", + "getPloidy() >= 0", + "! hasForbiddenKey(getExtendedAttributes())"}) +public abstract class Genotype implements Comparable { + /** + * A list of genotype field keys corresponding to values we + * manage inline in the Genotype object. They must not appear in the + * extended attributes map + */ + public final static Collection PRIMARY_KEYS = Arrays.asList( + VCFConstants.GENOTYPE_KEY, + VCFConstants.GENOTYPE_QUALITY_KEY, + VCFConstants.DEPTH_KEY, + VCFConstants.GENOTYPE_ALLELE_DEPTHS, + VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - int countAllele(final Allele allele); + public final static String PHASED_ALLELE_SEPARATOR = "|"; + public final static String UNPHASED_ALLELE_SEPARATOR = "/"; - Allele getAllele(int i); + private final String sampleName; + private GenotypeType type = null; - boolean isPhased(); + protected Genotype(final String sampleName) { + this.sampleName = sampleName; + } - int getPloidy(); + protected Genotype(final String sampleName, final GenotypeType type) { + this.sampleName = sampleName; + this.type = type; + } - GenotypeType getType(); + /** + * @return the alleles for this genotype. Cannot be null. May be empty + */ + @Ensures("result != null") + public abstract List getAlleles(); - boolean isHom(); + /** + * Returns how many times allele appears in this genotype object? + * + * @param allele + * @return a value >= 0 indicating how many times the allele occurred in this sample's genotype + */ + @Requires("allele != null") + @Ensures("result >= 0") + public int countAllele(final Allele allele) { + int c = 0; + for ( final Allele a : getAlleles() ) + if ( a.equals(allele) ) + c++; - boolean isHomRef(); + return c; + } - boolean isHomVar(); + /** + * Get the ith allele in this genotype + * + * @param i the ith allele, must be < the ploidy, starting with 0 + * @return the allele at position i, which cannot be null + */ + @Requires({"i >=0 && i < getPloidy()", "getType() != GenotypeType.UNAVAILABLE"}) + @Ensures("result != null") + public abstract Allele getAllele(int i); - boolean isHet(); + /** + * Are the alleles phased w.r.t. the global phasing system? + * + * @return true if yes + */ + public abstract boolean isPhased(); - boolean isNoCall(); + /** + * What is the ploidy of this sample? + * + * @return the ploidy of this genotype. 0 if the site is no-called. + */ + @Ensures("result >= 0") + public int getPloidy() { + return getAlleles().size(); + } - boolean isCalled(); + /** + * @return the sequencing depth of this sample, or -1 if this value is missing + */ + @Ensures("result >= -1") + public abstract int getDP(); - boolean isMixed(); + /** + * @return the count of reads, one for each allele in the surrounding Variant context, + * matching the corresponding allele, or null if this value is missing. MUST + * NOT BE MODIFIED! + */ + public abstract int[] getAD(); - boolean isAvailable(); + /** + * Returns the name associated with this sample. + * + * @return a non-null String + */ + @Ensures("result != null") + public String getSampleName() { + return sampleName; + } - // - // Useful methods for getting genotype likelihoods for a genotype object, if present - // - boolean hasLikelihoods(); + /** + * Returns a phred-scaled quality score, or -1 if none is available + * @return + */ + @Ensures("result >= -1") + public abstract int getGQ(); - String getLikelihoodsString(); + /** + * Does the PL field have a value? + * @return true if there's a PL field value + */ + @Ensures("(result == false && getPL() == null) || (result == true && getPL() != null)") + public boolean hasPL() { + return getPL() != null; + } - GenotypeLikelihoods getLikelihoods(); + /** + * Does the AD field have a value? + * @return true if there's a AD field value + */ + @Ensures("(result == false && getAD() == null) || (result == true && getAD() != null)") + public boolean hasAD() { + return getAD() != null; + } - String getGenotypeString(); + /** + * Does the GQ field have a value? + * @return true if there's a GQ field value + */ + @Ensures("(result == false && getGQ() == -1) || (result == true && getGQ() >= 0)") + public boolean hasGQ() { + return getGQ() != -1; + } - String getGenotypeString(boolean ignoreRefState); - - String toBriefString(); - - boolean sameGenotype(Genotype other); - - boolean sameGenotype(Genotype other, boolean ignorePhase); + /** + * Does the DP field have a value? + * @return true if there's a DP field value + */ + @Ensures("(result == false && getDP() == -1) || (result == true && getDP() >= 0)") + public boolean hasDP() { + return getDP() != -1; + } // --------------------------------------------------------------------------------------------------------- // - // get routines to access context info fields + // The type of this genotype // // --------------------------------------------------------------------------------------------------------- - String getSampleName(); - public int[] getPL(); - public boolean hasPL(); + /** + * @return the high-level type of this sample's genotype + */ + @Ensures({"type != null", "result != null"}) + public GenotypeType getType() { + if ( type == null ) { + type = determineType(); + } + return type; + } - public int getDP(); - public boolean hasDP(); + /** + * Internal code to determine the type of the genotype from the alleles vector + * @return the type + */ + @Requires("type == null") // we should never call if already calculated + protected GenotypeType determineType() { + // TODO -- this code is slow and could be optimized for the diploid case + final List alleles = getAlleles(); + if ( alleles.isEmpty() ) + return GenotypeType.UNAVAILABLE; - public int[] getAD(); - public boolean hasAD(); + boolean sawNoCall = false, sawMultipleAlleles = false; + Allele observedAllele = null; - public int getGQ(); - public boolean hasGQ(); + for ( final Allele allele : alleles ) { + if ( allele.isNoCall() ) + sawNoCall = true; + else if ( observedAllele == null ) + observedAllele = allele; + else if ( !allele.equals(observedAllele) ) + sawMultipleAlleles = true; + } - List getFilters(); - boolean isFiltered(); - boolean filtersWereApplied(); + if ( sawNoCall ) { + if ( observedAllele == null ) + return GenotypeType.NO_CALL; + return GenotypeType.MIXED; + } + + if ( observedAllele == null ) + throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); + + return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR; + } + + /** + * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false. + */ + public boolean isHom() { return isHomRef() || isHomVar(); } + + /** + * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false. + */ + public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; } + + /** + * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false. + */ + public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; } + + /** + * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false. + */ + public boolean isHet() { return getType() == GenotypeType.HET; } + + /** + * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false. + */ + public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; } + + /** + * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are). + */ + public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; } + + /** + * @return true if this genotype is comprised of both calls and no-calls. + */ + public boolean isMixed() { return getType() == GenotypeType.MIXED; } + + /** + * @return true if the type of this genotype is set. + */ + public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; } + + // ------------------------------------------------------------------------------ + // + // methods for getting genotype likelihoods for a genotype object, if present + // + // ------------------------------------------------------------------------------ + + /** + * @return Returns true if this Genotype has PL field values + */ + public boolean hasLikelihoods() { + return getPL() != null; + } + + /** + * Convenience function that returns a string representation of the PL field of this + * genotype, or . if none is available. + * + * @return a non-null String representation for the PL of this sample + */ + @Ensures("result != null") + public String getLikelihoodsString() { + return hasLikelihoods() ? getLikelihoods().toString() : VCFConstants.MISSING_VALUE_v4; + } + + /** + * Returns the GenotypesLikelihoods data associated with this Genotype, or null if missing + * @return null or a GenotypesLikelihood object for this sample's PL field + */ + @Ensures({"hasLikelihoods() && result != null", "! hasLikelihoods() && result == null"}) + public GenotypeLikelihoods getLikelihoods() { + return hasLikelihoods() ? GenotypeLikelihoods.fromPLs(getPL()) : null; + } + + /** + * Unsafe low-level accessor the PL field itself, may be null. + * + * @return a pointer to the underlying PL data. MUST NOT BE MODIFIED! + */ + public abstract int[] getPL(); + + // --------------------------------------------------------------------------------------------------------- + // + // Many different string representations + // + // --------------------------------------------------------------------------------------------------------- + + /** + * Return a VCF-like string representation for the alleles of this genotype. + * + * Does not append the reference * marker on the alleles. + * + * @return a string representing the genotypes, or null if the type is unavailable. + */ + @Ensures("result != null || ! isAvailable()") + public String getGenotypeString() { + return getGenotypeString(true); + } + + /** + * Return a VCF-like string representation for the alleles of this genotype. + * + * If ignoreRefState is true, will not append the reference * marker on the alleles. + * + * @return a string representing the genotypes, or null if the type is unavailable. + */ + @Ensures("result != null || ! isAvailable()") + public String getGenotypeString(boolean ignoreRefState) { + if ( getPloidy() == 0 ) + return null; + + // Notes: + // 1. Make sure to use the appropriate separator depending on whether the genotype is phased + // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) + // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course) + return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR, + ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles()))); + } + + /** + * Utility that returns a list of allele strings corresponding to the alleles in this sample + * @return + */ + protected List getAlleleStrings() { + final List al = new ArrayList(getPloidy()); + for ( Allele a : getAlleles() ) + al.add(a.getBaseString()); + + return al; + } + + public String toString() { + return String.format("[%s %s%s%s%s%s%s]", + getSampleName(), + getGenotypeString(false), + toStringIfExists(VCFConstants.GENOTYPE_QUALITY_KEY, getGQ()), + toStringIfExists(VCFConstants.DEPTH_KEY, getDP()), + toStringIfExists(VCFConstants.GENOTYPE_ALLELE_DEPTHS, getAD()), + toStringIfExists(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, getPL()), + sortedString(getExtendedAttributes())); + } + + public String toBriefString() { + return String.format("%s:Q%d", getGenotypeString(false), getGQ()); + } + + // --------------------------------------------------------------------------------------------------------- + // + // Comparison operations + // + // --------------------------------------------------------------------------------------------------------- + + /** + * comparable genotypes -> compareTo on the sample names + * @param genotype + * @return + */ + @Override + public int compareTo(final Genotype genotype) { + return getSampleName().compareTo(genotype.getSampleName()); + } + + public boolean sameGenotype(final Genotype other) { + return sameGenotype(other, true); + } + + public boolean sameGenotype(final Genotype other, boolean ignorePhase) { + if (getPloidy() != other.getPloidy()) + return false; // gotta have the same number of allele to be equal + + // By default, compare the elements in the lists of alleles, element-by-element + Collection thisAlleles = this.getAlleles(); + Collection otherAlleles = other.getAlleles(); + + if (ignorePhase) { // do not care about order, only identity of Alleles + thisAlleles = new TreeSet(thisAlleles); //implemented Allele.compareTo() + otherAlleles = new TreeSet(otherAlleles); + } + + return thisAlleles.equals(otherAlleles); + } + + // --------------------------------------------------------------------------------------------------------- + // + // get routines for extended attributes + // + // --------------------------------------------------------------------------------------------------------- + + /** + * Returns the extended attributes for this object + * @return is never null, but is often isEmpty() + */ + @Ensures({"result != null", "! hasForbiddenKey(result)"}) + public abstract Map getExtendedAttributes(); + + /** + * Is key associated with a value (even a null one) in the extended attributes? + * + * Note this will not return true for the inline attributes DP, GQ, AD, or PL + * + * @param key a non-null string key to check for an association + * @return true if key has a value in the extendedAttributes + */ + @Requires("key != null") + public boolean hasAttribute(final String key) { + return getExtendedAttributes().containsKey(key); + } + + /** + * Get the extended attribute value associated with key, if possible + * + * @param key a non-null string key to fetch a value for + * @param defaultValue the value to return if key isn't in the extended attributes + * @return a value (potentially) null associated with key, or defaultValue if no association exists + */ + @Requires("key != null") + @Ensures("hasAttribute(key) || result == defaultValue") + public Object getAttribute(final String key, final Object defaultValue) { + return hasAttribute(key) ? getExtendedAttributes().get(key) : defaultValue; + } + + /** + * Same as #getAttribute with a null default + * + * @param key + * @return + */ + public Object getAttribute(final String key) { + return getAttribute(key, null); + } + + /** + * + * @return + */ + @Ensures({"result != null", "filtersWereApplied() || result.isEmpty()"}) + public abstract List getFilters(); + + @Ensures({"result != getFilters().isEmpty()"}) + public boolean isFiltered() { + return ! getFilters().isEmpty(); + } + + @Ensures("result == true || getFilters().isEmpty()") + public abstract boolean filtersWereApplied(); + + @Deprecated public boolean hasLog10PError() { return hasGQ(); } + @Deprecated public double getLog10PError() { return getGQ() / -10.0; } + @Deprecated public int getPhredScaledQual() { return getGQ(); } @Deprecated - boolean hasLog10PError(); + public String getAttributeAsString(String key, String defaultValue) { + Object x = getAttribute(key); + if ( x == null ) return defaultValue; + if ( x instanceof String ) return (String)x; + return String.valueOf(x); // throws an exception if this isn't a string + } @Deprecated - double getLog10PError(); + public int getAttributeAsInt(String key, int defaultValue) { + Object x = getAttribute(key); + if ( x == null || x == VCFConstants.MISSING_VALUE_v4 ) return defaultValue; + if ( x instanceof Integer ) return (Integer)x; + return Integer.valueOf((String)x); // throws an exception if this isn't a string + } @Deprecated - int getPhredScaledQual(); + public double getAttributeAsDouble(String key, double defaultValue) { + Object x = getAttribute(key); + if ( x == null ) return defaultValue; + if ( x instanceof Double ) return (Double)x; + return Double.valueOf((String)x); // throws an exception if this isn't a string + } - public boolean hasAttribute(final String key); + // TODO -- add getAttributesAsX interface here - Map getExtendedAttributes(); + // ------------------------------------------------------------------------------ + // + // private utilities + // + // ------------------------------------------------------------------------------ - Object getAttribute(String key); - Object getAttribute(final String key, final Object defaultValue); + /** + * a utility method for generating sorted strings from a map key set. + * @param c the map + * @param the key type + * @param the value type + * @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys + */ + @Requires("c != null") + protected static , V> String sortedString(Map c) { - @Deprecated - String getAttributeAsString(String key, String defaultValue); + // NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS + final List t = new ArrayList(c.keySet()); + Collections.sort(t); - @Deprecated - int getAttributeAsInt(String key, int defaultValue); + final List pairs = new ArrayList(); + for (final T k : t) { + pairs.add(k + "=" + c.get(k)); + } - @Deprecated - double getAttributeAsDouble(String key, double defaultValue); + return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}"; + } + + /** + * Returns a display name for field name with value v if this isn't -1. Otherwise returns "" + * @param name of the field ("AD") + * @param v the value of the field, or -1 if missing + * @return a non-null string for display if the field is not missing + */ + @Requires("name != null") + @Ensures("result != null") + protected final static String toStringIfExists(final String name, final int v) { + return v == -1 ? "" : " " + name + " " + v; + } + + /** + * Returns a display name for field name with values vs if this isn't null. Otherwise returns "" + * @param name of the field ("AD") + * @param vs the value of the field, or null if missing + * @return a non-null string for display if the field is not missing + */ + @Requires("name != null") + @Ensures("result != null") + protected final static String toStringIfExists(final String name, final int[] vs) { + if ( vs == null ) + return ""; + else { + StringBuilder b = new StringBuilder(); + b.append(" ").append(name).append(" "); + for ( int i = 0; i < vs.length; i++ ) { + if ( i != 0 ) b.append(","); + b.append(vs[i]); + } + return b.toString(); + } + } + + /** + * Does the attribute map have a mapping involving a forbidden key (i.e., + * one that's managed inline by this Genotypes object? + * + * @param attributes the extended attributes key + * @return + */ + protected final static boolean hasForbiddenKey(final Map attributes) { + for ( final String forbidden : PRIMARY_KEYS) + if ( attributes.containsKey(forbidden) ) + return true; + return false; + } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java index 17bb03b2d..90f0381c9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeBuilder.java @@ -168,11 +168,14 @@ public final class GenotypeBuilder { } else { final Map attributes = new LinkedHashMap(); if ( extendedAttributes != null ) attributes.putAll(extendedAttributes); - final double log10PError = GQ != -1 ? GQ / -10.0 : SlowGenotype.NO_LOG10_PERROR; + final double log10PError = GQ == -1 ? SlowGenotype.NO_LOG10_PERROR : (GQ == 0 ? 0 : GQ / -10.0); - Set filters = Collections.emptySet(); - if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) ) { - filters = new LinkedHashSet((List)extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY)); + Set filters = null; + if ( extendedAttributes != null && extendedAttributes.containsKey(VCFConstants.GENOTYPE_FILTER_KEY) ) + { + final Object f = extendedAttributes.get(VCFConstants.GENOTYPE_FILTER_KEY); + if ( f != null ) + filters = new LinkedHashSet((List)f); attributes.remove(VCFConstants.GENOTYPE_FILTER_KEY); } @@ -216,8 +219,20 @@ public final class GenotypeBuilder { return this; } - public GenotypeBuilder GQ(final double GQ) { - return GQ((int)Math.round(GQ)); + /** + * Adaptor interface from the pLog10Error system. + * + * Will be retired when + * + * @param pLog10Error + * @return + */ + @Deprecated + public GenotypeBuilder log10PError(final double pLog10Error) { + if ( pLog10Error == CommonInfo.NO_LOG10_PERROR ) + return GQ(-1); + else + return GQ((int)Math.round(pLog10Error * -10)); } public GenotypeBuilder noGQ() { GQ = -1; return this; } @@ -270,9 +285,17 @@ public final class GenotypeBuilder { return this; } + /** + * Tells this builder to make a Genotype object that has had filters applied, + * which may be empty (passes) or have some value indicating the reasons + * why it's been filtered. + * + * @param filters non-null list of filters. empty list => PASS + * @return this builder + */ + @Requires("filters != null") public GenotypeBuilder filters(final List filters) { attribute(VCFConstants.GENOTYPE_FILTER_KEY, filters); - // TODO return this; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java index 5786f7e40..db480de45 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/SlowGenotype.java @@ -36,22 +36,16 @@ import java.util.*; * * @author Mark DePristo */ -public class SlowGenotype implements Genotype { - public final static String PHASED_ALLELE_SEPARATOR = "|"; - public final static String UNPHASED_ALLELE_SEPARATOR = "/"; - +@Deprecated +public class SlowGenotype extends Genotype { protected CommonInfo commonInfo; public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; - protected List alleles = null; // new ArrayList(); - protected GenotypeType type = null; - + protected List alleles = null; protected boolean isPhased = false; -// public SlowGenotype(String sampleName, List alleles, double log10PError, Set filters, Map attributes, boolean isPhased) { -// this(sampleName, alleles, log10PError, filters, attributes, isPhased, null); -// } - protected SlowGenotype(String sampleName, List alleles, double log10PError, Set filters, Map attributes, boolean isPhased, double[] log10Likelihoods) { + super(sampleName); + if ( alleles == null || alleles.isEmpty() ) this.alleles = Collections.emptyList(); else @@ -63,175 +57,27 @@ public class SlowGenotype implements Genotype { validate(); } -// /** -// * Creates a new Genotype for sampleName with genotype according to alleles. -// * @param sampleName -// * @param alleles -// * @param log10PError the confidence in these alleles -// * @param log10Likelihoods a log10 likelihoods for each of the genotype combinations possible for alleles, in the standard VCF ordering, or null if not known -// */ -// public SlowGenotype(String sampleName, List alleles, double log10PError, double[] log10Likelihoods) { -// this(sampleName, alleles, log10PError, null, null, false, log10Likelihoods); -// } -// -// public SlowGenotype(String sampleName, List alleles, double log10PError) { -// this(sampleName, alleles, log10PError, null, null, false); -// } -// -// public SlowGenotype(String sampleName, List alleles) { -// this(sampleName, alleles, NO_LOG10_PERROR, null, null, false); -// } -// -// public SlowGenotype(String sampleName, SlowGenotype parent) { -// this(sampleName, parent.getAlleles(), parent.getLog10PError(), parent.getFilters(), parent.getAttributes(), parent.isPhased()); -// } -// -// -// -// // --------------------------------------------------------------------------------------------------------- -// // -// // Partial-cloning routines (because Genotype is immutable). -// // -// // --------------------------------------------------------------------------------------------------------- -// -// public static SlowGenotype modifyName(SlowGenotype g, String name) { -// return new SlowGenotype(name, g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); -// } -// -// public static SlowGenotype modifyAttributes(SlowGenotype g, Map attributes) { -// return new SlowGenotype(g.getSampleName(), g.getAlleles(), g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, attributes, g.isPhased()); -// } -// -// public static SlowGenotype modifyAlleles(SlowGenotype g, List alleles) { -// return new SlowGenotype(g.getSampleName(), alleles, g.getLog10PError(), g.filtersWereApplied() ? g.getFilters() : null, g.getAttributes(), g.isPhased()); -// } - - /** - * @return the alleles for this genotype - */ - public List getAlleles() { + @Override public List getAlleles() { return alleles; } - public int countAllele(final Allele allele) { - int c = 0; - for ( final Allele a : alleles ) - if ( a.equals(allele) ) - c++; - - return c; - } - - public Allele getAllele(int i) { + @Override public Allele getAllele(int i) { if ( getType() == GenotypeType.UNAVAILABLE ) throw new ReviewedStingException("Requesting alleles for an UNAVAILABLE genotype"); return alleles.get(i); } - public boolean isPhased() { return isPhased; } - - /** - * @return the ploidy of this genotype. 0 if the site is no-called. - */ - public int getPloidy() { - return alleles.size(); - } - - public GenotypeType getType() { - if ( type == null ) { - type = determineType(); - } - return type; - } - - protected GenotypeType determineType() { - if ( alleles.size() == 0 ) - return GenotypeType.UNAVAILABLE; - - boolean sawNoCall = false, sawMultipleAlleles = false; - Allele observedAllele = null; - - for ( Allele allele : alleles ) { - if ( allele.isNoCall() ) - sawNoCall = true; - else if ( observedAllele == null ) - observedAllele = allele; - else if ( !allele.equals(observedAllele) ) - sawMultipleAlleles = true; - } - - if ( sawNoCall ) { - if ( observedAllele == null ) - return GenotypeType.NO_CALL; - return GenotypeType.MIXED; - } - - if ( observedAllele == null ) - throw new ReviewedStingException("BUG: there are no alleles present in this genotype but the alleles list is not null"); - - return sawMultipleAlleles ? GenotypeType.HET : observedAllele.isReference() ? GenotypeType.HOM_REF : GenotypeType.HOM_VAR; - } - - /** - * @return true if all observed alleles are the same (regardless of whether they are ref or alt); if any alleles are no-calls, this method will return false. - */ - public boolean isHom() { return isHomRef() || isHomVar(); } - - /** - * @return true if all observed alleles are ref; if any alleles are no-calls, this method will return false. - */ - public boolean isHomRef() { return getType() == GenotypeType.HOM_REF; } - - /** - * @return true if all observed alleles are alt; if any alleles are no-calls, this method will return false. - */ - public boolean isHomVar() { return getType() == GenotypeType.HOM_VAR; } - - /** - * @return true if we're het (observed alleles differ); if the ploidy is less than 2 or if any alleles are no-calls, this method will return false. - */ - public boolean isHet() { return getType() == GenotypeType.HET; } - - /** - * @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF); if any alleles are not no-calls (even if some are), this method will return false. - */ - public boolean isNoCall() { return getType() == GenotypeType.NO_CALL; } - - /** - * @return true if this genotype is comprised of any alleles that are not no-calls (even if some are). - */ - public boolean isCalled() { return getType() != GenotypeType.NO_CALL && getType() != GenotypeType.UNAVAILABLE; } - - /** - * @return true if this genotype is comprised of both calls and no-calls. - */ - public boolean isMixed() { return getType() == GenotypeType.MIXED; } - - /** - * @return true if the type of this genotype is set. - */ - public boolean isAvailable() { return getType() != GenotypeType.UNAVAILABLE; } + @Override public boolean isPhased() { return isPhased; } // // Useful methods for getting genotype likelihoods for a genotype object, if present // - public boolean hasLikelihoods() { + @Override public boolean hasLikelihoods() { return (hasAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)) || (hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) && !getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).equals(VCFConstants.MISSING_VALUE_v4)); } - /** - * Convenience function that returns a string representation of the PL field of this - * genotype, or . if none is available. - * - * @return - */ - public String getLikelihoodsString() { - GenotypeLikelihoods gl = getLikelihoods(); - return gl == null ? VCFConstants.MISSING_VALUE_v4 : gl.toString(); - } - - public GenotypeLikelihoods getLikelihoods() { + @Override public GenotypeLikelihoods getLikelihoods() { GenotypeLikelihoods x = getLikelihoods(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, true); if ( x != null ) return x; @@ -253,144 +99,40 @@ public class SlowGenotype implements Genotype { else return null; } - public void validate() { + private final void validate() { if ( alleles.size() == 0) return; - // int nNoCalls = 0; for ( Allele allele : alleles ) { if ( allele == null ) throw new IllegalArgumentException("BUG: allele cannot be null in Genotype"); - // nNoCalls += allele.isNoCall() ? 1 : 0; } - - // Technically, the spec does allow for the below case so this is not an illegal state - //if ( nNoCalls > 0 && nNoCalls != alleles.size() ) - // throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this); } - public String getGenotypeString() { - return getGenotypeString(true); - } - - public String getGenotypeString(boolean ignoreRefState) { - if ( alleles.size() == 0 ) - return null; - - // Notes: - // 1. Make sure to use the appropriate separator depending on whether the genotype is phased - // 2. If ignoreRefState is true, then we want just the bases of the Alleles (ignoring the '*' indicating a ref Allele) - // 3. So that everything is deterministic with regards to integration tests, we sort Alleles (when the genotype isn't phased, of course) - return ParsingUtils.join(isPhased() ? PHASED_ALLELE_SEPARATOR : UNPHASED_ALLELE_SEPARATOR, - ignoreRefState ? getAlleleStrings() : (isPhased() ? getAlleles() : ParsingUtils.sortList(getAlleles()))); - } - - private List getAlleleStrings() { - List al = new ArrayList(); - for ( Allele a : alleles ) - al.add(a.getBaseString()); - - return al; - } - - public String toString() { - int Q = getPhredScaledQual(); - return String.format("[%s %s Q%s %s]", getSampleName(), getGenotypeString(false), - Q == -1 ? "." : String.format("%2d",Q), sortedString(getExtendedAttributes())); - } - - public String toBriefString() { - return String.format("%s:Q%d", getGenotypeString(false), getPhredScaledQual()); - } - - public boolean sameGenotype(Genotype other) { - return sameGenotype(other, true); - } - - public boolean sameGenotype(Genotype other, boolean ignorePhase) { - if (getPloidy() != other.getPloidy()) - return false; // gotta have the same number of allele to be equal - - // By default, compare the elements in the lists of alleles, element-by-element - Collection thisAlleles = this.getAlleles(); - Collection otherAlleles = other.getAlleles(); - - if (ignorePhase) { // do not care about order, only identity of Alleles - thisAlleles = new TreeSet(thisAlleles); //implemented Allele.compareTo() - otherAlleles = new TreeSet(otherAlleles); - } - - return thisAlleles.equals(otherAlleles); - } - - /** - * a utility method for generating sorted strings from a map key set. - * @param c the map - * @param the key type - * @param the value type - * @return a sting, enclosed in {}, with comma seperated key value pairs in order of the keys - */ - private static , V> String sortedString(Map c) { - // NOTE -- THIS IS COPIED FROM GATK UTILS TO ALLOW US TO KEEP A SEPARATION BETWEEN THE GATK AND VCF CODECS - List t = new ArrayList(c.keySet()); - Collections.sort(t); - - List pairs = new ArrayList(); - for (T k : t) { - pairs.add(k + "=" + c.get(k)); - } - - return "{" + ParsingUtils.join(", ", pairs.toArray(new String[pairs.size()])) + "}"; - } - - // --------------------------------------------------------------------------------------------------------- // // get routines to access context info fields // // --------------------------------------------------------------------------------------------------------- - public String getSampleName() { return commonInfo.getName(); } - public List getFilters() { return new ArrayList(commonInfo.getFilters()); } - public boolean isFiltered() { return commonInfo.isFiltered(); } - public boolean isNotFiltered() { return commonInfo.isNotFiltered(); } - public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); } - public boolean hasLog10PError() { return commonInfo.hasLog10PError(); } - public double getLog10PError() { return commonInfo.getLog10PError(); } - - /** - * Returns a phred-scaled quality score, or -1 if none is available - * @return - */ - public int getPhredScaledQual() { - if ( commonInfo.hasLog10PError() ) - return (int)Math.round(commonInfo.getPhredScaledQual()); - else - return -1; - } + @Override public List getFilters() { return new ArrayList(commonInfo.getFilters()); } + @Override public boolean filtersWereApplied() { return commonInfo.filtersWereApplied(); } + @Override public boolean hasLog10PError() { return commonInfo.hasLog10PError(); } + @Override public double getLog10PError() { return commonInfo.getLog10PError(); } @Override public boolean hasAttribute(String key) { return commonInfo.hasAttribute(key); } @Override public Object getAttribute(String key) { return commonInfo.getAttribute(key); } - + + @Override public Object getAttribute(String key, Object defaultValue) { return commonInfo.getAttribute(key, defaultValue); } - public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); } - public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); } - public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); } - public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); } - - /** - * comparable genotypes -> compareTo on the sample names - * @param genotype - * @return - */ - @Override - public int compareTo(final Genotype genotype) { - return getSampleName().compareTo(genotype.getSampleName()); - } +// public String getAttributeAsString(String key, String defaultValue) { return commonInfo.getAttributeAsString(key, defaultValue); } +// public int getAttributeAsInt(String key, int defaultValue) { return commonInfo.getAttributeAsInt(key, defaultValue); } +// public double getAttributeAsDouble(String key, double defaultValue) { return commonInfo.getAttributeAsDouble(key, defaultValue); } +// public boolean getAttributeAsBoolean(String key, boolean defaultValue) { return commonInfo.getAttributeAsBoolean(key, defaultValue); } @Override public int[] getPL() { @@ -427,12 +169,15 @@ public class SlowGenotype implements Genotype { @Override public int getGQ() { - return getPhredScaledQual(); + if ( commonInfo.hasLog10PError() ) + return (int)Math.round(commonInfo.getPhredScaledQual()); + else + return -1; } @Override public boolean hasGQ() { - return hasLikelihoods(); + return hasLog10PError(); } @Override diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index a62b721c5..547f0425d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -1287,7 +1287,7 @@ public class VariantContextUtils { GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); gb.alleles(Arrays.asList(allelesToUse.get(alleles.alleleIndex1), allelesToUse.get(alleles.alleleIndex2))); - if ( numNewAltAlleles != 0 ) gb.GQ(-10 * GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); + if ( numNewAltAlleles != 0 ) gb.log10PError(GenotypeLikelihoods.getGQLog10FromLikelihoods(PLindex, newLikelihoods)); } newGTs.add(gb.make()); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java index e6a5f964f..b09a10d07 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java @@ -68,12 +68,12 @@ public class VariantContextUtilsUnitTest extends BaseTest { } private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError, double... pls) { - return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).PL(pls).make(); + return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).log10PError(log10pError).PL(pls).make(); } private Genotype makeG(String sample, Allele a1, Allele a2, double log10pError) { - return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).GQ(log10pError).make(); + return new GenotypeBuilder(sample, Arrays.asList(a1, a2)).log10PError(log10pError).make(); } private VariantContext makeVC(String source, List alleles) { @@ -524,7 +524,7 @@ public class VariantContextUtilsUnitTest extends BaseTest { Genotype expectedValue = expected.get(value.getSampleName()); Assert.assertEquals(value.getAlleles(), expectedValue.getAlleles(), "Alleles in Genotype aren't equal"); - Assert.assertEquals(value.getLog10PError(), expectedValue.getLog10PError(), "GQ values aren't equal"); + Assert.assertEquals(value.getGQ(), expectedValue.getGQ(), "GQ values aren't equal"); Assert.assertEquals(value.hasLikelihoods(), expectedValue.hasLikelihoods(), "Either both have likelihoods or both not"); if ( value.hasLikelihoods() ) Assert.assertEquals(value.getLikelihoods().getAsVector(), expectedValue.getLikelihoods().getAsVector(), "Genotype likelihoods aren't equal");