diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java index c2e093e46..c508a845e 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Allele.java @@ -64,16 +64,23 @@ import java.util.Arrays; * If you know where allele is the reference, you can determine whether the variant is an insertion or deletion */ public class Allele { + private static final byte[] NULL_ALLELE_BASES = new byte[0]; + private boolean isRef = false; private byte[] bases = null; public Allele(byte[] bases, boolean isRef) { - bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance - this.isRef = isRef; - if ( bases == null ) throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele"); + // standardize our representation of null allele and bases + if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0) + bases = NULL_ALLELE_BASES; + else + bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance + + this.isRef = isRef; + this.bases = bases; for ( byte b : bases ) { if ( ! BaseUtils.isRegularBase(b) ) { @@ -91,6 +98,10 @@ public class Allele { this(bases.getBytes(), isRef); } + public Allele() { this(false); } + public Allele(String bases) { this(bases, false); } + public Allele(byte[] bases) { this(bases, false); } + // // // accessor routines @@ -102,6 +113,9 @@ public class Allele { public boolean isReference() { return isRef; } public boolean isNonReference() { return ! isReference(); } + public String toString() { + return isNullAllele() ? "-" : new String(getBases()) + ( isReference() ? "*" : ""); + } /** * Return the DNA bases segregating in this allele. Note this isn't reference polarized, @@ -117,9 +131,14 @@ public class Allele { * @return true if these alleles are equal */ public boolean equals(Allele other) { - return Arrays.equals(bases, other.getBases()); + return isRef == other.isRef && this.basesMatch(other.getBases()); } + // todo -- notice case insensitivity + public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); } + public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); } + public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); } + public int length() { return bases.length; } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java new file mode 100755 index 000000000..8aa3cbfd2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/AttributedObject.java @@ -0,0 +1,112 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.StingException; + +import java.util.*; + + +/** + * @author ebanks + *

+ * Class VariantContext + *

+ * This class represents a context that unifies one or more variants + */ +public class AttributedObject { + private Map attributes = new HashMap(); + + public AttributedObject() { + ; + } + + public AttributedObject(Map attributes) { + setAttributes(attributes); + } + + // --------------------------------------------------------------------------------------------------------- + // + // Working with attributes + // + // --------------------------------------------------------------------------------------------------------- + public void clearAttributes() { + this.attributes.clear(); + } + + /** + * @return the attribute map + */ + public Map getAttributes() { + return attributes; + } + + // todo -- define common attributes as enum + + public void setAttributes(Map map) { + this.attributes.clear(); + putAttributes(attributes); + } + + public void putAttribute(Object key, Object value) { + putAttribute(key, value, false); + } + + public void putAttribute(Object key, Object value, boolean allowOverwrites) { + if ( hasAttribute(key) && ! allowOverwrites ) + throw new StingException("Attempting to overwrite key->value binding: key = " + key + " this = " + this); + + this.attributes.put(key, value); + } + + public void removeAttribute(Object key) { + this.attributes.remove(key); + } + + public void putAttributes(Map map) { + for ( Map.Entry elt : attributes.entrySet() ) { + putAttribute(elt.getKey(), elt.getValue()); + } + } + + public boolean hasAttribute(Object key) { + return attributes.containsKey(key); + } + + public int getNumAttributes() { + return attributes.size(); + } + + /** + * @param key the attribute key + * + * @return the attribute value for the given key (or null if not set) + */ + public Object getAttribute(Object key) { + return attributes.get(key); + } + + public Object getAttribute(Object key, Object defaultValue) { + if ( hasAttribute(key) ) + return attributes.get(key); + else + return defaultValue; + } + + public AttributedObject getAttributes(Collection keys) { + AttributedObject selected = new AttributedObject(); + + for ( Object key : keys ) + selected.putAttribute(key, this.getAttribute(key)); + + return selected; + } + + + public String getAttributeAsString(Object key) { return (String)getAttribute(key); } + public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); } + public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); } + + public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); } + public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); } + public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java index 5bfd2dbdc..f508ebfcc 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/Genotype.java @@ -9,21 +9,17 @@ import java.util.*; *

* This class emcompasses all the basic information about a genotype */ -public class Genotype { +public class Genotype extends AttributedObject { private List alleles; private double negLog10PError; private String sample; - private HashMap attributes; - - public Genotype(List alleles, String sample, double negLog10PError) { this.alleles = new ArrayList(alleles); this.sample = sample; this.negLog10PError = negLog10PError; - attributes = new HashMap(); } /** @@ -71,22 +67,22 @@ public class Genotype { /** * @return true if all alleles for this genotype are SNPs or reference */ - public boolean isPointGenotype() { -// for ( Allele allele : alleles ) { -// if ( allele.isVariant() && !allele.isSNP() ) -// return false; -// } - return true; - } +// public boolean isPointGenotype() { +//// for ( Allele allele : alleles ) { +//// if ( allele.isVariant() && !allele.isSNP() ) +//// return false; +//// } +// return true; +// } /** * @return true if this is a variant genotype, false if it's reference */ public boolean isVariant() { -// for ( Allele allele : alleles ) { -// if ( allele.isVariant() ) -// return true; -// } + for ( Allele allele : alleles ) { + if ( allele.isNonReference() ) + return true; + } return false; } @@ -116,31 +112,4 @@ public class Genotype { */ // TODO -- implement me // public Variation toVariation(char ref); - - /** - * Sets the given attribute - * - * @param key the attribute key - * @param value the attribute value - */ - public void setAttribute(Object key, Object value) { - attributes.put(key, value); - } - - /** - * @param key the attribute key - * - * @return the attribute value for the given key (or null if not set) - */ - public Object getAttribute(Object key) { - return attributes.get(key); - } - - /** - * @return the attribute map - */ - public Map getAttributes() { - return attributes; - } - } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java new file mode 100755 index 000000000..0a47ccd33 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/TestVariantContextWalker.java @@ -0,0 +1,50 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RODRecordList; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.*; + +/** + * Test routine for new VariantContext object + */ +public class TestVariantContextWalker extends RodWalker { + + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + GenomeLoc cur = context.getLocation(); + + if ( ref == null ) + return 0; + else { + RODRecordList dbsnpList = tracker.getTrackData("dbsnp", null); + + if (dbsnpList == null) + return 0; + else { + int n = 0; + for (ReferenceOrderedDatum d : dbsnpList) { + rodDbSNP dbsnpRecord = (rodDbSNP)d; + VariantContext vc = VariantContextAdaptors.dbsnp2VariantContext(dbsnpRecord); + if ( vc != null ) { + n++; + System.out.printf("%s%n", vc); + } + } + + return n; + } + } + } + + public Integer reduceInit() { + return 0; + } + + public Integer reduce(Integer point, Integer sum) { + return point + sum; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java index 0fc9727aa..f4de1b8f8 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContext.java @@ -15,21 +15,18 @@ import org.apache.commons.jexl.*; *

* This class represents a context that unifies one or more variants */ -public class VariantContext { +public class VariantContext extends AttributedObject { private GenomeLoc loc; private Set alleles = new HashSet(); - private Set genotypes = new HashSet(); - - private HashMap attributes = new HashMap(); + private Map genotypes = new HashMap(); Type type = null; - private double negLog10PError = 0.0; // todo - fixme + // todo -- add QUAL and FILTER - /** Have we checked this VariantContext already? */ - private boolean validatedP = false; + //private double negLog10PError = 0.0; // todo - fixme // public VariantContext(VariationRod rod) { // @@ -62,16 +59,47 @@ public class VariantContext { this(parent.getLocation(), parent.getAlleles(), genotypes, attributes); } - public VariantContext(GenomeLoc loc, Set alleles, Set genotypes, HashMap attributes) { + // todo -- add more convenience methods + public VariantContext(GenomeLoc loc, Set alleles) { this(loc, alleles, null); } + public VariantContext(GenomeLoc loc, List alleles ) { this(loc, alleles, null); } + + public VariantContext(GenomeLoc loc, List alleles, Map attributes) { this(loc); - // todo -- add extensive testing here + HashSet alleleSet = new HashSet(); + for ( Allele a : alleles ) { + if ( alleleSet.contains(a) ) + throw new IllegalArgumentException("List contains duplicate elements " + loc + " " + alleles ); + alleleSet.add(a); + } - // todo -- check that exactly one allele is tagged as reference + setAlleles(alleleSet); + setAttributes(attributes); + validate(); + } - this.alleles = new HashSet(alleles); - this.genotypes = new HashSet(genotypes); - this.attributes = new HashMap(attributes); + public VariantContext(GenomeLoc loc, Set alleles, Map attributes) { + this(loc); + setAlleles(alleles); + setAttributes(attributes); + validate(); + } + + public VariantContext(GenomeLoc loc, Set alleles, Set genotypes, Map attributes) { + this(loc); + setAlleles(alleles); + setGenotypes(genotypes); + setAttributes(attributes); + validate(); + } + + + public VariantContext(GenomeLoc loc, Set alleles, Map genotypes, Map attributes) { + this(loc); + setAlleles(alleles); + setGenotypes(genotypes); + setAttributes(attributes); + validate(); } // --------------------------------------------------------------------------------------------------------- @@ -167,7 +195,6 @@ public class VariantContext { */ public boolean isVariant() { return getType() != Type.NO_VARIATION; } - /** * convenience method for indels * @@ -175,6 +202,9 @@ public class VariantContext { */ public boolean isIndel() { return getType() == Type.INDEL; } + // todo -- implement, looking at reference allele + //public boolean isInsertion() { return getType() == Type.INDEL; } + //public boolean isDeletion() { return getType() == Type.INDEL; } /** * convenience method for indels @@ -205,21 +235,32 @@ public class VariantContext { * @return the reference allele for this context */ public Allele getReference() { + Allele ref = getReferenceWithoutError(); + if ( ref == null ) + throw new StingException("BUG: no reference allele found at " + this); + return ref; + } + + private Allele getReferenceWithoutError() { for ( Allele allele : getAlleles() ) if ( allele.isReference() ) return allele; - - throw new StingException("BUG: no reference allele found at " + this); + return null; } + /** * @return true if the context is strictly bi-allelic */ public boolean isBiallelic() { - //return getAlternateAlleles().size() == 1; - return getAlleles().size() == 2; + return getNAlleles() == 2; } + public int getNAlleles() { + return alleles.size(); + } + + /** * Gets the alleles. This method should return all of the alleles present at the location, * including the reference allele. There are no constraints imposed on the ordering of alleles @@ -246,6 +287,39 @@ public class VariantContext { return altAlleles; } + public Allele getAlternateAllele(int count) { + int n = 0; + + for ( Allele allele : alleles ) { + if ( allele.isNonReference() && n++ == count ) + return allele; + } + + throw new IllegalArgumentException("Requested " + count + " alternative allele but there are only " + n + " alternative alleles " + this); + } + + + public void setAlleles(Set alleles) { + this.alleles.clear(); + for ( Allele a : alleles ) + addAllele(a); + } + + public void addAllele(Allele allele) { + addAllele(allele, false); + } + + public void addAllele(Allele allele, boolean allowDuplicates) { + for ( Allele a : alleles ) { + if ( a.basesMatch(allele) && ! allowDuplicates ) + throw new IllegalArgumentException("Duplicate allele added to VariantContext" + this); + } + + // we are a novel allele + alleles.add(allele); + } + + // --------------------------------------------------------------------------------------------------------- // // Working with genotypes @@ -260,32 +334,109 @@ public class VariantContext { /** * @return set of all Genotypes associated with this context */ - - // todo -- genotypes should really be stored as map, not set - public Set getGenotypes() { return genotypes; } - - public Map getGenotypeMap() { - HashMap map = new HashMap(); - for ( Genotype g : genotypes ) - map.put(g.getSample(), g); - return map; - } - + public Map getGenotypes() { return genotypes; } /** * @return the set of all sample names in this context */ public Set getSampleNames() { - return getGenotypeMap().keySet(); + return getGenotypes().keySet(); } + /** + * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS + * + * @return + */ + public int getChromosomeCount() { + // todo -- return the number of ! no_call alleles + return 0; + } + + /** + * Returns the number of chromosomes carrying allele A in the genotypes + * + * @param a + * @return + */ + public int getChromosomeCount(Allele a) { + // todo -- walk through genotypes and count genotypes with allele + return 0; + } + + /** + * These are genotype-specific functions + * + * @return + */ + public boolean isMonomorphic() { + return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount(); + } + + public boolean isPolymorphic() { + return ! isMonomorphic(); + } + + /** * @param sample the sample name * * @return the Genotype associated with the given sample in this context or null if the sample is not in this context */ public Genotype getGenotype(String sample) { - return getGenotypeMap().get(sample); + return getGenotypes().get(sample); + } + + public boolean hasGenotype(String sample) { + return getGenotypes().containsKey(sample); + } + + public void setGenotypes(Genotype genotype) { + this.genotypes.clear(); + addGenotype(genotype); + } + + public void setGenotypes(Collection genotypes) { + this.genotypes.clear(); + + for ( Genotype g : genotypes ) { + addGenotype(g.getSample(), g); + } + } + + public void setGenotypes(Map genotypes) { + this.genotypes.clear(); + + for ( Map.Entry elt : genotypes.entrySet() ) { + addGenotype(elt.getKey(), elt.getValue()); + } + } + + public void addGenotype(Genotype genotype) { + addGenotype(genotype.getSample(), genotype, false); + } + + + public void addGenotype(String sampleName, Genotype genotype) { + addGenotype(sampleName, genotype, false); + } + + public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) { + if ( hasGenotype(sampleName) && ! allowOverwrites ) + throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this); + + if ( ! sampleName.equals(genotype.getSample()) ) + throw new StingException("Sample name doesn't equal genotype.getSample(): " + sampleName + " genotype=" + genotype); + + this.genotypes.put(sampleName, genotype); + } + + public void removeGenotype(String sampleName) { + this.genotypes.remove(sampleName); + } + + public void removeGenotype(Genotype genotype) { + removeGenotype(genotype.getSample()); } // --------------------------------------------------------------------------------------------------------- @@ -294,62 +445,8 @@ public class VariantContext { // // --------------------------------------------------------------------------------------------------------- - // todo -- refactor into AttributedObject and have VariantContext and Genotype inherit from them - // todo -- define common attributes as enum - /** - * Sets the given attribute - * - * @param key the attribute key - * @param value the attribute value - */ - public void putAttribute(Object key, Object value) { - attributes.put(key, value); - } - - public void putAttributes(Map map) { - attributes.putAll(map); - } - - public boolean hasAttribute(Object key) { - return attributes.containsKey(key); - } - - public int getNumAttributes() { - return attributes.size(); - } - - /** - * @param key the attribute key - * - * @return the attribute value for the given key (or null if not set) - */ - public Object getAttribute(Object key) { - return attributes.get(key); - } - - public Object getAttribute(Object key, Object defaultValue) { - if ( hasAttribute(key) ) - return attributes.get(key); - else - return defaultValue; - } - - public String getAttributeAsString(Object key) { return (String)getAttribute(key); } - public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); } - public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); } - - public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); } - public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); } - public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); } - - /** - * @return the attribute map - */ - public Map getAttributes() { - return attributes; - } // --------------------------------------------------------------------------------------------------------- // @@ -360,26 +457,42 @@ public class VariantContext { /** * To be called by any modifying routines */ - private void invalidate() { validatedP = false; } + //private void invalidate() { validatedP = false; } public boolean validate() { return validate(true); } public boolean validate(boolean throwException) { - if ( ! validatedP ) { - boolean valid = false; - // todo -- add extensive validation checking here - if ( valid ) { - validatedP = valid; - } else if ( throwException ) { - throw new StingException(this + " failed validation"); + // todo -- add extensive testing here + // todo -- check that exactly one allele is tagged as reference + // todo -- check that there's only one null allele + + try { + // check alleles + boolean alreadySeenRef = false, alreadySeenNull = false; + for ( Allele allele : alleles ) { + if ( allele.isReference() ) { + if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); + alreadySeenRef = true; + } + + if ( allele.isNullAllele() ) { + if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this); + alreadySeenNull = true; + } } - return valid; - } else { - return validatedP; + if ( ! alreadySeenRef ) + throw new IllegalArgumentException("No reference allele found in VariantContext"); + } catch ( IllegalArgumentException e ) { + if ( throwException ) + throw e; + else + return false; } + + return true; } // --------------------------------------------------------------------------------------------------------- @@ -390,11 +503,49 @@ public class VariantContext { private void determineType() { if ( type == null ) { - // todo -- figure out the variation type + if ( alleles.size() == 0 ) { + throw new StingException("Unexpected requested type of VariantContext with no alleles!" + this); + } else if ( alleles.size() == 1 ) { + type = Type.NO_VARIATION; + // note that this doesn't require a reference allele. You can be monomorphic independent of having a + // reference allele + } else if ( isSNPAllele(alleles) ) { + type = Type.SNP; + } else if ( isDIPAllele(alleles) ) { + type = Type.INDEL; + } else { + type = Type.MIXED; + } } } - // todo -- toString() method + private static boolean isSNPAllele(Set alleles) { + if ( alleles.size() < 2 ) + return false; + + for ( Allele allele : alleles ) { + if ( allele.length() != 1 ) + return false; + } + + return true; + } + + private static boolean isDIPAllele(Set alleles) { + if ( alleles.size() != 2 ) + return false; + + Iterator it = alleles.iterator(); + Allele a1 = it.next(); + Allele a2 = it.next(); + return a1.length() != a2.length(); + } + + + public String toString() { + return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s", + getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes()); + } /** * @return true if the context represents point alleles only (i.e. no indels or structural variants) @@ -418,6 +569,7 @@ public class VariantContext { // return subclasses; // } + // todo -- move to utils /** * @param allele the allele to be queried * @@ -427,7 +579,7 @@ public class VariantContext { int alleleCount = 0; int totalCount = 0; - for ( Genotype g : genotypes ) { + for ( Genotype g : getGenotypes().values() ) { for ( Allele a : g.getAlleles() ) { totalCount++; if ( allele.equals(a) ) diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java new file mode 100755 index 000000000..66df6eeee --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextAdaptors.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.oneoffprojects.variantcontext; + +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; + + +public class VariantContextAdaptors { + public static VariantContext dbsnp2VariantContext(rodDbSNP dbsnp) { + VariantContext vc = new VariantContext(dbsnp.getLocation()); + + // add the reference allele + Allele refAllele = new Allele(dbsnp.getReference(), true); + vc.addAllele(refAllele); + + // add all of the alt alleles + for ( String alt : dbsnp.getAlternateAlleleList() ) + vc.addAllele(new Allele(alt, false)); + + return vc; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java index f3842377b..1b8dcf4f5 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextUtils.java @@ -32,9 +32,9 @@ public class VariantContextUtils { throw new IllegalArgumentException("The locations must be identical for two contexts to be merged"); Set samples = left.getSampleNames(); - Set Gs = new HashSet(left.getGenotypes()); + Set Gs = new HashSet(left.getGenotypes().values()); - for ( Genotype g : other.getGenotypes() ) { + for ( Genotype g : other.getGenotypes().values() ) { if ( samples.contains(g.getSample()) ) { if ( uniquifySamples ) g.setSample(g.getSample() + UNIQUIFIED_SUFFIX); diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/AlleleTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/AlleleTest.java new file mode 100755 index 000000000..11f8b95ef --- /dev/null +++ b/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/AlleleTest.java @@ -0,0 +1,188 @@ +// our package +package org.broadinstitute.sting.oneoffprojects.variantcontext; + + +// the imports for unit testing. + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.ExpandingArrayList; +import org.broadinstitute.sting.utils.StingException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +// public Allele(byte[] bases, boolean isRef) { +// public Allele(boolean isRef) { +// public Allele(String bases, boolean isRef) { +// public boolean isNullAllele() { return length() == 0; } +// public boolean isNonNullAllele() { return ! isNullAllele(); } +// public boolean isReference() { return isRef; } +// public boolean isNonReference() { return ! isReference(); } +// public byte[] getBases() { return bases; } +// public boolean equals(Allele other) { +// public int length() { + +/** + * Basic unit test for RecalData + */ +public class AlleleTest extends BaseTest { + Allele ARef, del, delRef, A, T, ATIns, ATCIns; + + @Before + public void before() { + del = new Allele("-"); + delRef = new Allele("-", true); + + A = new Allele("A"); + ARef = new Allele("A", true); + T = new Allele("T"); + + ATIns = new Allele("AT"); + ATCIns = new Allele("ATC"); + } + + @Test + public void testCreatingSNPAlleles() { + logger.warn("testCreatingSNPAlleles"); + + Assert.assertTrue(A.isNonReference()); + Assert.assertFalse(A.isReference()); + Assert.assertTrue(A.basesMatch("A")); + Assert.assertEquals(A.length(), 1); + Assert.assertTrue(A.isNonNullAllele()); + Assert.assertFalse(A.isNullAllele()); + + Assert.assertTrue(ARef.isReference()); + Assert.assertFalse(ARef.isNonReference()); + Assert.assertTrue(ARef.basesMatch("A")); + Assert.assertFalse(ARef.basesMatch("T")); + + Assert.assertTrue(T.isNonReference()); + Assert.assertFalse(T.isReference()); + Assert.assertTrue(T.basesMatch("T")); + Assert.assertFalse(T.basesMatch("A")); + } + + @Test + public void testCreatingIndelAlleles() { + logger.warn("testCreatingIndelAlleles"); + + Assert.assertEquals(ATIns.length(), 2); + Assert.assertEquals(ATCIns.length(), 3); + Assert.assertArrayEquals(ATIns.getBases(), "AT".getBytes()); + Assert.assertArrayEquals(ATCIns.getBases(), "ATC".getBytes()); + + Assert.assertTrue(del.isNonReference()); + Assert.assertFalse(delRef.isNonReference()); + Assert.assertFalse(del.isReference()); + Assert.assertTrue(delRef.isReference()); + Assert.assertFalse(del.basesMatch("-")); + Assert.assertTrue(del.basesMatch("")); + Assert.assertEquals(del.length(), 0); + Assert.assertFalse(del.isNonNullAllele()); + Assert.assertTrue(del.isNullAllele()); + } + + + @Test + public void testConstructors1() { + logger.warn("testConstructors1"); + + Allele a1 = new Allele("A"); + Allele a2 = new Allele("A".getBytes()); + Allele a3 = new Allele("a"); + Allele a4 = new Allele("A", true); + + Assert.assertTrue(a1.equals(a2)); + Assert.assertTrue(a1.equals(a3)); + Assert.assertFalse(a1.equals(a4)); + } + + @Test + public void testDelConstructors() { + logger.warn("testDelConstructors"); + + Allele a1 = new Allele("-"); + Allele a2 = new Allele("-".getBytes()); + Allele a3 = new Allele(""); + Allele a4 = new Allele("", true); + + Assert.assertTrue(a1.equals(a2)); + Assert.assertTrue(a1.equals(a3)); + Assert.assertFalse(a1.equals(a4)); + } + + @Test + public void testInsConstructors() { + logger.warn("testInsConstructors"); + + Allele a1 = new Allele("AC"); + Allele a2 = new Allele("AC".getBytes()); + Allele a3 = new Allele("Ac"); + Allele a4 = new Allele("AC", true); + + Assert.assertTrue(a1.equals(a2)); + Assert.assertTrue(a1.equals(a3)); + Assert.assertFalse(a1.equals(a4)); + } + + @Test + public void testEquals() { + logger.warn("testEquals"); + Assert.assertTrue(ARef.basesMatch(A)); + Assert.assertFalse(ARef.equals(A)); + Assert.assertFalse(ARef.equals(del)); + Assert.assertFalse(ARef.equals(ATIns)); + Assert.assertFalse(ARef.equals(ATCIns)); + + Assert.assertTrue(T.basesMatch(T)); + Assert.assertFalse(T.basesMatch(A)); + Assert.assertFalse(T.equals(A)); + + Assert.assertTrue(del.basesMatch(del)); + Assert.assertTrue(del.basesMatch(delRef)); + Assert.assertTrue(del.equals(del)); + Assert.assertFalse(del.equals(delRef)); + + Assert.assertTrue(ATIns.equals(ATIns)); + Assert.assertFalse(ATIns.equals(ATCIns)); + Assert.assertTrue(ATIns.basesMatch("AT")); + Assert.assertFalse(ATIns.basesMatch("A")); + Assert.assertFalse(ATIns.basesMatch("ATC")); + + Assert.assertTrue(ATIns.basesMatch("at")); + Assert.assertFalse(ATIns.basesMatch("atc")); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs1() { + logger.warn("testBadConstructorArgs1"); + byte[] foo = null; + new Allele(foo); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs2() { + logger.warn("testBadConstructorArgs2"); + new Allele("x"); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs3() { + logger.warn("testBadConstructorArgs3"); + new Allele("--"); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs4() { + logger.warn("testBadConstructorArgs4"); + new Allele("-A"); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs5() { + logger.warn("testBadConstructorArgs5"); + new Allele("A A"); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java new file mode 100755 index 000000000..4b23eaa4a --- /dev/null +++ b/java/test/org/broadinstitute/sting/oneoffprojects/variantcontext/VariantContextTest.java @@ -0,0 +1,241 @@ +// our package +package org.broadinstitute.sting.oneoffprojects.variantcontext; + + +// the imports for unit testing. + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.BeforeClass; + +import java.util.Map; +import java.util.Arrays; +import java.util.Set; +import java.util.List; +import java.io.FileNotFoundException; +import java.io.File; + +import net.sf.picard.reference.ReferenceSequenceFile; + +/** + * Basic unit test for RecalData + */ +public class VariantContextTest extends BaseTest { + private static ReferenceSequenceFile seq; + + @BeforeClass + public static void init() throws FileNotFoundException { + // sequence + seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta")); + GenomeLocParser.setupRefContigOrdering(seq); + } + + Allele A, Aref, T, Tref; + Allele del, delRef, ATC, ATCref; + + // A [ref] / T at 10 + GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 11); + + // - / ATC [ref] from 20-23 + GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 23); + + // - [ref] / ATC from 20-20 + GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20); + + // - / A / T / ATC [ref] from 20-23 + GenomeLoc mixedLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 23); + + @Before + public void before() { + del = new Allele("-"); + delRef = new Allele("-", true); + + A = new Allele("A"); + Aref = new Allele("A", true); + T = new Allele("T"); + Tref = new Allele("T", true); + + ATC = new Allele("ATC"); + ATCref = new Allele("ATC", true); + } + + // todo -- create reference context + + @Test + public void testCreatingSNPVariantContext() { + logger.warn("testCreatingSNPVariantContext"); + + List alleles = Arrays.asList(Aref, T); + VariantContext vc = new VariantContext(snpLoc, alleles); + logger.warn("vc = " + vc); + + Assert.assertEquals(vc.getLocation(), snpLoc); + Assert.assertEquals(vc.getType(), VariantContext.Type.SNP); + Assert.assertTrue(vc.isSNP()); + Assert.assertFalse(vc.isIndel()); + //Assert.assertFalse(vc.isInsertion()); + //Assert.assertFalse(vc.isDeletion()); + Assert.assertFalse(vc.isMixed()); + Assert.assertTrue(vc.isBiallelic()); + Assert.assertEquals(vc.getNAlleles(), 2); + + Assert.assertEquals(vc.getReference(), Aref); + Assert.assertEquals(vc.getAlleles().size(), 2); + Assert.assertEquals(vc.getAlternateAlleles().size(), 1); + Assert.assertEquals(vc.getAlternateAllele(0), T); + + Assert.assertFalse(vc.hasGenotypes()); + + Assert.assertEquals(vc.getSampleNames().size(), 0); + } + + @Test + public void testCreatingRefVariantContext() { + logger.warn("testCreatingRefVariantContext"); + + List alleles = Arrays.asList(Aref); + VariantContext vc = new VariantContext(snpLoc, alleles); + logger.warn("vc = " + vc); + + Assert.assertEquals(vc.getLocation(), snpLoc); + Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION); + Assert.assertFalse(vc.isSNP()); + Assert.assertFalse(vc.isIndel()); + //Assert.assertFalse(vc.isInsertion()); + //Assert.assertFalse(vc.isDeletion()); + Assert.assertFalse(vc.isMixed()); + Assert.assertFalse(vc.isBiallelic()); + Assert.assertEquals(vc.getNAlleles(), 1); + + Assert.assertEquals(vc.getReference(), Aref); + Assert.assertEquals(vc.getAlleles().size(), 1); + Assert.assertEquals(vc.getAlternateAlleles().size(), 0); + //Assert.assertEquals(vc.getAlternateAllele(0), T); + + Assert.assertFalse(vc.hasGenotypes()); + Assert.assertEquals(vc.getSampleNames().size(), 0); + } + + @Test + public void testCreatingDeletionVariantContext() { + logger.warn("testCreatingDeletionVariantContext"); + + List alleles = Arrays.asList(ATCref, del); + VariantContext vc = new VariantContext(delLoc, alleles); + logger.warn("vc = " + vc); + + Assert.assertEquals(vc.getLocation(), delLoc); + Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); + Assert.assertFalse(vc.isSNP()); + Assert.assertTrue(vc.isIndel()); + //Assert.assertFalse(vc.isInsertion()); + //Assert.assertFalse(vc.isDeletion()); + Assert.assertFalse(vc.isMixed()); + Assert.assertTrue(vc.isBiallelic()); + Assert.assertEquals(vc.getNAlleles(), 2); + + Assert.assertEquals(vc.getReference(), ATCref); + Assert.assertEquals(vc.getAlleles().size(), 2); + Assert.assertEquals(vc.getAlternateAlleles().size(), 1); + Assert.assertEquals(vc.getAlternateAllele(0), del); + + Assert.assertFalse(vc.hasGenotypes()); + + Assert.assertEquals(vc.getSampleNames().size(), 0); + } + + @Test + public void testCreatingInsertionVariantContext() { + logger.warn("testCreatingInsertionVariantContext"); + + List alleles = Arrays.asList(delRef, ATC); + VariantContext vc = new VariantContext(insLoc, alleles); + logger.warn("vc = " + vc); + + Assert.assertEquals(vc.getLocation(), insLoc); + Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL); + Assert.assertFalse(vc.isSNP()); + Assert.assertTrue(vc.isIndel()); + //Assert.assertFalse(vc.isInsertion()); + //Assert.assertFalse(vc.isDeletion()); + Assert.assertFalse(vc.isMixed()); + Assert.assertTrue(vc.isBiallelic()); + Assert.assertEquals(vc.getNAlleles(), 2); + + Assert.assertEquals(vc.getReference(), delRef); + Assert.assertEquals(vc.getAlleles().size(), 2); + Assert.assertEquals(vc.getAlternateAlleles().size(), 1); + Assert.assertEquals(vc.getAlternateAllele(0), ATC); + + Assert.assertFalse(vc.hasGenotypes()); + + Assert.assertEquals(vc.getSampleNames().size(), 0); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs1() { + logger.warn("testBadConstructorArgs1"); + new VariantContext(insLoc, Arrays.asList(delRef, ATCref)); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs2() { + logger.warn("testBadConstructorArgs2"); + new VariantContext(insLoc, Arrays.asList(delRef, del)); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgs3() { + logger.warn("testBadConstructorArgs3"); + new VariantContext(insLoc, Arrays.asList(del)); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgsDuplicateAlleles1() { + logger.warn("testBadConstructorArgsDuplicateAlleles1"); + new VariantContext(insLoc, Arrays.asList(Aref, T, T)); + } + + @Test (expected = IllegalArgumentException.class) + public void testBadConstructorArgsDuplicateAlleles2() { + logger.warn("testBadConstructorArgsDuplicateAlleles2"); + new VariantContext(insLoc, Arrays.asList(Aref, A)); + } +} + +// public Type getType() { +// public boolean isSNP() { return getType() == Type.SNP; } +// public boolean isVariant() { return getType() != Type.NO_VARIATION; } +// public boolean isIndel() { return getType() == Type.INDEL; } +// public boolean isMixed() { return getType() == Type.MIXED; } +// public GenomeLoc getLocation() { return loc; } +// public Allele getReference() { +// public boolean isBiallelic() { +// public boolean isMonomorphic() { +// public boolean isPolymorphic() { +// public int getNAlleles() { +// public Set getAlleles() { return alleles; } +// public Set getAlternateAlleles() { +// public Allele getAlternateAllele(int i) { +// public void setAlleles(Set alleles) { +// public void addAllele(Allele allele) { +// public void addAllele(Allele allele, boolean allowDuplicates) { +// public boolean hasGenotypes() { return genotypes.size() > 0; } +// public Map getGenotypes() { return genotypes; } +// public Set getSampleNames() { +// public Genotype getGenotype(String sample) { +// public boolean hasGenotype(String sample) { +// public void setGenotypes(Genotype genotype) { +// public void setGenotypes(Collection genotypes) { +// public void setGenotypes(Map genotypes) { +// public void addGenotype(Genotype genotype) { +// public void addGenotype(String sampleName, Genotype genotype) { +// public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) { +// public void removeGenotype(String sampleName) { +// public void removeGenotype(Genotype genotype) { +// public boolean validate() {