Incremental update 2 -- refined allele and VariantContext classes; support for AttributedObject class; extensive testing for Allele class, and partial for VariantContext. Now possible to easily convert dbSNP to VariantContext.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2705 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-27 17:19:37 +00:00
parent 3edcefb7fb
commit 3399ad9691
9 changed files with 903 additions and 148 deletions

View File

@ -64,16 +64,23 @@ import java.util.Arrays;
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
*/
public class Allele {
private static final byte[] NULL_ALLELE_BASES = new byte[0];
private boolean isRef = false;
private byte[] bases = null;
public Allele(byte[] bases, boolean isRef) {
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
this.isRef = isRef;
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
// standardize our representation of null allele and bases
if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0)
bases = NULL_ALLELE_BASES;
else
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
this.isRef = isRef;
this.bases = bases;
for ( byte b : bases ) {
if ( ! BaseUtils.isRegularBase(b) ) {
@ -91,6 +98,10 @@ public class Allele {
this(bases.getBytes(), isRef);
}
public Allele() { this(false); }
public Allele(String bases) { this(bases, false); }
public Allele(byte[] bases) { this(bases, false); }
//
//
// accessor routines
@ -102,6 +113,9 @@ public class Allele {
public boolean isReference() { return isRef; }
public boolean isNonReference() { return ! isReference(); }
public String toString() {
return isNullAllele() ? "-" : new String(getBases()) + ( isReference() ? "*" : "");
}
/**
* Return the DNA bases segregating in this allele. Note this isn't reference polarized,
@ -117,9 +131,14 @@ public class Allele {
* @return true if these alleles are equal
*/
public boolean equals(Allele other) {
return Arrays.equals(bases, other.getBases());
return isRef == other.isRef && this.basesMatch(other.getBases());
}
// todo -- notice case insensitivity
public boolean basesMatch(byte[] test) { return bases == test || Arrays.equals(bases, test); }
public boolean basesMatch(String test) { return basesMatch(test.toUpperCase().getBytes()); }
public boolean basesMatch(Allele test) { return basesMatch(test.getBases()); }
public int length() {
return bases.length;
}

View File

@ -0,0 +1,112 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import java.util.*;
/**
* @author ebanks
* <p/>
* Class VariantContext
* <p/>
* This class represents a context that unifies one or more variants
*/
public class AttributedObject {
private Map<Object, Object> attributes = new HashMap<Object, Object>();
public AttributedObject() {
;
}
public AttributedObject(Map<Object, Object> attributes) {
setAttributes(attributes);
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with attributes
//
// ---------------------------------------------------------------------------------------------------------
public void clearAttributes() {
this.attributes.clear();
}
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
// todo -- define common attributes as enum
public void setAttributes(Map<? extends Object, Object> map) {
this.attributes.clear();
putAttributes(attributes);
}
public void putAttribute(Object key, Object value) {
putAttribute(key, value, false);
}
public void putAttribute(Object key, Object value, boolean allowOverwrites) {
if ( hasAttribute(key) && ! allowOverwrites )
throw new StingException("Attempting to overwrite key->value binding: key = " + key + " this = " + this);
this.attributes.put(key, value);
}
public void removeAttribute(Object key) {
this.attributes.remove(key);
}
public void putAttributes(Map<? extends Object, Object> map) {
for ( Map.Entry<Object, Object> elt : attributes.entrySet() ) {
putAttribute(elt.getKey(), elt.getValue());
}
}
public boolean hasAttribute(Object key) {
return attributes.containsKey(key);
}
public int getNumAttributes() {
return attributes.size();
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
public Object getAttribute(Object key, Object defaultValue) {
if ( hasAttribute(key) )
return attributes.get(key);
else
return defaultValue;
}
public AttributedObject getAttributes(Collection<Object> keys) {
AttributedObject selected = new AttributedObject();
for ( Object key : keys )
selected.putAttribute(key, this.getAttribute(key));
return selected;
}
public String getAttributeAsString(Object key) { return (String)getAttribute(key); }
public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); }
public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); }
public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
}

View File

@ -9,21 +9,17 @@ import java.util.*;
* <p/>
* This class emcompasses all the basic information about a genotype
*/
public class Genotype {
public class Genotype extends AttributedObject {
private List<Allele> alleles;
private double negLog10PError;
private String sample;
private HashMap<Object, Object> attributes;
public Genotype(List<Allele> alleles, String sample, double negLog10PError) {
this.alleles = new ArrayList<Allele>(alleles);
this.sample = sample;
this.negLog10PError = negLog10PError;
attributes = new HashMap<Object, Object>();
}
/**
@ -71,22 +67,22 @@ public class Genotype {
/**
* @return true if all alleles for this genotype are SNPs or reference
*/
public boolean isPointGenotype() {
// for ( Allele allele : alleles ) {
// if ( allele.isVariant() && !allele.isSNP() )
// return false;
// }
return true;
}
// public boolean isPointGenotype() {
//// for ( Allele allele : alleles ) {
//// if ( allele.isVariant() && !allele.isSNP() )
//// return false;
//// }
// return true;
// }
/**
* @return true if this is a variant genotype, false if it's reference
*/
public boolean isVariant() {
// for ( Allele allele : alleles ) {
// if ( allele.isVariant() )
// return true;
// }
for ( Allele allele : alleles ) {
if ( allele.isNonReference() )
return true;
}
return false;
}
@ -116,31 +112,4 @@ public class Genotype {
*/
// TODO -- implement me
// public Variation toVariation(char ref);
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void setAttribute(Object key, Object value) {
attributes.put(key, value);
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
}

View File

@ -0,0 +1,50 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
/**
* Test routine for new VariantContext object
*/
public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
GenomeLoc cur = context.getLocation();
if ( ref == null )
return 0;
else {
RODRecordList<ReferenceOrderedDatum> dbsnpList = tracker.getTrackData("dbsnp", null);
if (dbsnpList == null)
return 0;
else {
int n = 0;
for (ReferenceOrderedDatum d : dbsnpList) {
rodDbSNP dbsnpRecord = (rodDbSNP)d;
VariantContext vc = VariantContextAdaptors.dbsnp2VariantContext(dbsnpRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);
}
}
return n;
}
}
}
public Integer reduceInit() {
return 0;
}
public Integer reduce(Integer point, Integer sum) {
return point + sum;
}
}

View File

@ -15,21 +15,18 @@ import org.apache.commons.jexl.*;
* <p/>
* This class represents a context that unifies one or more variants
*/
public class VariantContext {
public class VariantContext extends AttributedObject {
private GenomeLoc loc;
private Set<Allele> alleles = new HashSet<Allele>();
private Set<Genotype> genotypes = new HashSet<Genotype>();
private HashMap<Object, Object> attributes = new HashMap<Object, Object>();
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
Type type = null;
private double negLog10PError = 0.0; // todo - fixme
// todo -- add QUAL and FILTER
/** Have we checked this VariantContext already? */
private boolean validatedP = false;
//private double negLog10PError = 0.0; // todo - fixme
// public VariantContext(VariationRod rod) {
//
@ -62,16 +59,47 @@ public class VariantContext {
this(parent.getLocation(), parent.getAlleles(), genotypes, attributes);
}
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
// todo -- add more convenience methods
public VariantContext(GenomeLoc loc, Set<Allele> alleles) { this(loc, alleles, null); }
public VariantContext(GenomeLoc loc, List<Allele> alleles ) { this(loc, alleles, null); }
public VariantContext(GenomeLoc loc, List<Allele> alleles, Map<Object, Object> attributes) {
this(loc);
// todo -- add extensive testing here
HashSet<Allele> alleleSet = new HashSet<Allele>();
for ( Allele a : alleles ) {
if ( alleleSet.contains(a) )
throw new IllegalArgumentException("List<Alleles> contains duplicate elements " + loc + " " + alleles );
alleleSet.add(a);
}
// todo -- check that exactly one allele is tagged as reference
setAlleles(alleleSet);
setAttributes(attributes);
validate();
}
this.alleles = new HashSet<Allele>(alleles);
this.genotypes = new HashSet<Genotype>(genotypes);
this.attributes = new HashMap<Object, Object>(attributes);
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Map<Object, Object> attributes) {
this(loc);
setAlleles(alleles);
setAttributes(attributes);
validate();
}
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Set<Genotype> genotypes, Map<Object, Object> attributes) {
this(loc);
setAlleles(alleles);
setGenotypes(genotypes);
setAttributes(attributes);
validate();
}
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Map<String, Genotype> genotypes, Map<Object, Object> attributes) {
this(loc);
setAlleles(alleles);
setGenotypes(genotypes);
setAttributes(attributes);
validate();
}
// ---------------------------------------------------------------------------------------------------------
@ -167,7 +195,6 @@ public class VariantContext {
*/
public boolean isVariant() { return getType() != Type.NO_VARIATION; }
/**
* convenience method for indels
*
@ -175,6 +202,9 @@ public class VariantContext {
*/
public boolean isIndel() { return getType() == Type.INDEL; }
// todo -- implement, looking at reference allele
//public boolean isInsertion() { return getType() == Type.INDEL; }
//public boolean isDeletion() { return getType() == Type.INDEL; }
/**
* convenience method for indels
@ -205,21 +235,32 @@ public class VariantContext {
* @return the reference allele for this context
*/
public Allele getReference() {
Allele ref = getReferenceWithoutError();
if ( ref == null )
throw new StingException("BUG: no reference allele found at " + this);
return ref;
}
private Allele getReferenceWithoutError() {
for ( Allele allele : getAlleles() )
if ( allele.isReference() )
return allele;
throw new StingException("BUG: no reference allele found at " + this);
return null;
}
/**
* @return true if the context is strictly bi-allelic
*/
public boolean isBiallelic() {
//return getAlternateAlleles().size() == 1;
return getAlleles().size() == 2;
return getNAlleles() == 2;
}
public int getNAlleles() {
return alleles.size();
}
/**
* Gets the alleles. This method should return all of the alleles present at the location,
* including the reference allele. There are no constraints imposed on the ordering of alleles
@ -246,6 +287,39 @@ public class VariantContext {
return altAlleles;
}
public Allele getAlternateAllele(int count) {
int n = 0;
for ( Allele allele : alleles ) {
if ( allele.isNonReference() && n++ == count )
return allele;
}
throw new IllegalArgumentException("Requested " + count + " alternative allele but there are only " + n + " alternative alleles " + this);
}
public void setAlleles(Set<Allele> alleles) {
this.alleles.clear();
for ( Allele a : alleles )
addAllele(a);
}
public void addAllele(Allele allele) {
addAllele(allele, false);
}
public void addAllele(Allele allele, boolean allowDuplicates) {
for ( Allele a : alleles ) {
if ( a.basesMatch(allele) && ! allowDuplicates )
throw new IllegalArgumentException("Duplicate allele added to VariantContext" + this);
}
// we are a novel allele
alleles.add(allele);
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with genotypes
@ -260,32 +334,109 @@ public class VariantContext {
/**
* @return set of all Genotypes associated with this context
*/
// todo -- genotypes should really be stored as map, not set
public Set<Genotype> getGenotypes() { return genotypes; }
public Map<String, Genotype> getGenotypeMap() {
HashMap<String, Genotype> map = new HashMap<String, Genotype>();
for ( Genotype g : genotypes )
map.put(g.getSample(), g);
return map;
}
public Map<String, Genotype> getGenotypes() { return genotypes; }
/**
* @return the set of all sample names in this context
*/
public Set<String> getSampleNames() {
return getGenotypeMap().keySet();
return getGenotypes().keySet();
}
/**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS
*
* @return
*/
public int getChromosomeCount() {
// todo -- return the number of ! no_call alleles
return 0;
}
/**
* Returns the number of chromosomes carrying allele A in the genotypes
*
* @param a
* @return
*/
public int getChromosomeCount(Allele a) {
// todo -- walk through genotypes and count genotypes with allele
return 0;
}
/**
* These are genotype-specific functions
*
* @return
*/
public boolean isMonomorphic() {
return ! isVariant() || getChromosomeCount(getReference()) == getChromosomeCount();
}
public boolean isPolymorphic() {
return ! isMonomorphic();
}
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
*/
public Genotype getGenotype(String sample) {
return getGenotypeMap().get(sample);
return getGenotypes().get(sample);
}
public boolean hasGenotype(String sample) {
return getGenotypes().containsKey(sample);
}
public void setGenotypes(Genotype genotype) {
this.genotypes.clear();
addGenotype(genotype);
}
public void setGenotypes(Collection<Genotype> genotypes) {
this.genotypes.clear();
for ( Genotype g : genotypes ) {
addGenotype(g.getSample(), g);
}
}
public void setGenotypes(Map<String, Genotype> genotypes) {
this.genotypes.clear();
for ( Map.Entry<String, Genotype> elt : genotypes.entrySet() ) {
addGenotype(elt.getKey(), elt.getValue());
}
}
public void addGenotype(Genotype genotype) {
addGenotype(genotype.getSample(), genotype, false);
}
public void addGenotype(String sampleName, Genotype genotype) {
addGenotype(sampleName, genotype, false);
}
public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
if ( hasGenotype(sampleName) && ! allowOverwrites )
throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this);
if ( ! sampleName.equals(genotype.getSample()) )
throw new StingException("Sample name doesn't equal genotype.getSample(): " + sampleName + " genotype=" + genotype);
this.genotypes.put(sampleName, genotype);
}
public void removeGenotype(String sampleName) {
this.genotypes.remove(sampleName);
}
public void removeGenotype(Genotype genotype) {
removeGenotype(genotype.getSample());
}
// ---------------------------------------------------------------------------------------------------------
@ -294,62 +445,8 @@ public class VariantContext {
//
// ---------------------------------------------------------------------------------------------------------
// todo -- refactor into AttributedObject and have VariantContext and Genotype inherit from them
// todo -- define common attributes as enum
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void putAttribute(Object key, Object value) {
attributes.put(key, value);
}
public void putAttributes(Map<? extends Object, Object> map) {
attributes.putAll(map);
}
public boolean hasAttribute(Object key) {
return attributes.containsKey(key);
}
public int getNumAttributes() {
return attributes.size();
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
public Object getAttribute(Object key, Object defaultValue) {
if ( hasAttribute(key) )
return attributes.get(key);
else
return defaultValue;
}
public String getAttributeAsString(Object key) { return (String)getAttribute(key); }
public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); }
public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); }
public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
// ---------------------------------------------------------------------------------------------------------
//
@ -360,26 +457,42 @@ public class VariantContext {
/**
* To be called by any modifying routines
*/
private void invalidate() { validatedP = false; }
//private void invalidate() { validatedP = false; }
public boolean validate() {
return validate(true);
}
public boolean validate(boolean throwException) {
if ( ! validatedP ) {
boolean valid = false;
// todo -- add extensive validation checking here
if ( valid ) {
validatedP = valid;
} else if ( throwException ) {
throw new StingException(this + " failed validation");
// todo -- add extensive testing here
// todo -- check that exactly one allele is tagged as reference
// todo -- check that there's only one null allele
try {
// check alleles
boolean alreadySeenRef = false, alreadySeenNull = false;
for ( Allele allele : alleles ) {
if ( allele.isReference() ) {
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
alreadySeenRef = true;
}
if ( allele.isNullAllele() ) {
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
alreadySeenNull = true;
}
}
return valid;
} else {
return validatedP;
if ( ! alreadySeenRef )
throw new IllegalArgumentException("No reference allele found in VariantContext");
} catch ( IllegalArgumentException e ) {
if ( throwException )
throw e;
else
return false;
}
return true;
}
// ---------------------------------------------------------------------------------------------------------
@ -390,11 +503,49 @@ public class VariantContext {
private void determineType() {
if ( type == null ) {
// todo -- figure out the variation type
if ( alleles.size() == 0 ) {
throw new StingException("Unexpected requested type of VariantContext with no alleles!" + this);
} else if ( alleles.size() == 1 ) {
type = Type.NO_VARIATION;
// note that this doesn't require a reference allele. You can be monomorphic independent of having a
// reference allele
} else if ( isSNPAllele(alleles) ) {
type = Type.SNP;
} else if ( isDIPAllele(alleles) ) {
type = Type.INDEL;
} else {
type = Type.MIXED;
}
}
}
// todo -- toString() method
private static boolean isSNPAllele(Set<Allele> alleles) {
if ( alleles.size() < 2 )
return false;
for ( Allele allele : alleles ) {
if ( allele.length() != 1 )
return false;
}
return true;
}
private static boolean isDIPAllele(Set<Allele> alleles) {
if ( alleles.size() != 2 )
return false;
Iterator<Allele> it = alleles.iterator();
Allele a1 = it.next();
Allele a2 = it.next();
return a1.length() != a2.length();
}
public String toString() {
return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s",
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes());
}
/**
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
@ -418,6 +569,7 @@ public class VariantContext {
// return subclasses;
// }
// todo -- move to utils
/**
* @param allele the allele to be queried
*
@ -427,7 +579,7 @@ public class VariantContext {
int alleleCount = 0;
int totalCount = 0;
for ( Genotype g : genotypes ) {
for ( Genotype g : getGenotypes().values() ) {
for ( Allele a : g.getAlleles() ) {
totalCount++;
if ( allele.equals(a) )

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
public class VariantContextAdaptors {
public static VariantContext dbsnp2VariantContext(rodDbSNP dbsnp) {
VariantContext vc = new VariantContext(dbsnp.getLocation());
// add the reference allele
Allele refAllele = new Allele(dbsnp.getReference(), true);
vc.addAllele(refAllele);
// add all of the alt alleles
for ( String alt : dbsnp.getAlternateAlleleList() )
vc.addAllele(new Allele(alt, false));
return vc;
}
}

View File

@ -32,9 +32,9 @@ public class VariantContextUtils {
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
Set<String> samples = left.getSampleNames();
Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes());
Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes().values());
for ( Genotype g : other.getGenotypes() ) {
for ( Genotype g : other.getGenotypes().values() ) {
if ( samples.contains(g.getSample()) ) {
if ( uniquifySamples )
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);

View File

@ -0,0 +1,188 @@
// our package
package org.broadinstitute.sting.oneoffprojects.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ExpandingArrayList;
import org.broadinstitute.sting.utils.StingException;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
// public Allele(byte[] bases, boolean isRef) {
// public Allele(boolean isRef) {
// public Allele(String bases, boolean isRef) {
// public boolean isNullAllele() { return length() == 0; }
// public boolean isNonNullAllele() { return ! isNullAllele(); }
// public boolean isReference() { return isRef; }
// public boolean isNonReference() { return ! isReference(); }
// public byte[] getBases() { return bases; }
// public boolean equals(Allele other) {
// public int length() {
/**
* Basic unit test for RecalData
*/
public class AlleleTest extends BaseTest {
Allele ARef, del, delRef, A, T, ATIns, ATCIns;
@Before
public void before() {
del = new Allele("-");
delRef = new Allele("-", true);
A = new Allele("A");
ARef = new Allele("A", true);
T = new Allele("T");
ATIns = new Allele("AT");
ATCIns = new Allele("ATC");
}
@Test
public void testCreatingSNPAlleles() {
logger.warn("testCreatingSNPAlleles");
Assert.assertTrue(A.isNonReference());
Assert.assertFalse(A.isReference());
Assert.assertTrue(A.basesMatch("A"));
Assert.assertEquals(A.length(), 1);
Assert.assertTrue(A.isNonNullAllele());
Assert.assertFalse(A.isNullAllele());
Assert.assertTrue(ARef.isReference());
Assert.assertFalse(ARef.isNonReference());
Assert.assertTrue(ARef.basesMatch("A"));
Assert.assertFalse(ARef.basesMatch("T"));
Assert.assertTrue(T.isNonReference());
Assert.assertFalse(T.isReference());
Assert.assertTrue(T.basesMatch("T"));
Assert.assertFalse(T.basesMatch("A"));
}
@Test
public void testCreatingIndelAlleles() {
logger.warn("testCreatingIndelAlleles");
Assert.assertEquals(ATIns.length(), 2);
Assert.assertEquals(ATCIns.length(), 3);
Assert.assertArrayEquals(ATIns.getBases(), "AT".getBytes());
Assert.assertArrayEquals(ATCIns.getBases(), "ATC".getBytes());
Assert.assertTrue(del.isNonReference());
Assert.assertFalse(delRef.isNonReference());
Assert.assertFalse(del.isReference());
Assert.assertTrue(delRef.isReference());
Assert.assertFalse(del.basesMatch("-"));
Assert.assertTrue(del.basesMatch(""));
Assert.assertEquals(del.length(), 0);
Assert.assertFalse(del.isNonNullAllele());
Assert.assertTrue(del.isNullAllele());
}
@Test
public void testConstructors1() {
logger.warn("testConstructors1");
Allele a1 = new Allele("A");
Allele a2 = new Allele("A".getBytes());
Allele a3 = new Allele("a");
Allele a4 = new Allele("A", true);
Assert.assertTrue(a1.equals(a2));
Assert.assertTrue(a1.equals(a3));
Assert.assertFalse(a1.equals(a4));
}
@Test
public void testDelConstructors() {
logger.warn("testDelConstructors");
Allele a1 = new Allele("-");
Allele a2 = new Allele("-".getBytes());
Allele a3 = new Allele("");
Allele a4 = new Allele("", true);
Assert.assertTrue(a1.equals(a2));
Assert.assertTrue(a1.equals(a3));
Assert.assertFalse(a1.equals(a4));
}
@Test
public void testInsConstructors() {
logger.warn("testInsConstructors");
Allele a1 = new Allele("AC");
Allele a2 = new Allele("AC".getBytes());
Allele a3 = new Allele("Ac");
Allele a4 = new Allele("AC", true);
Assert.assertTrue(a1.equals(a2));
Assert.assertTrue(a1.equals(a3));
Assert.assertFalse(a1.equals(a4));
}
@Test
public void testEquals() {
logger.warn("testEquals");
Assert.assertTrue(ARef.basesMatch(A));
Assert.assertFalse(ARef.equals(A));
Assert.assertFalse(ARef.equals(del));
Assert.assertFalse(ARef.equals(ATIns));
Assert.assertFalse(ARef.equals(ATCIns));
Assert.assertTrue(T.basesMatch(T));
Assert.assertFalse(T.basesMatch(A));
Assert.assertFalse(T.equals(A));
Assert.assertTrue(del.basesMatch(del));
Assert.assertTrue(del.basesMatch(delRef));
Assert.assertTrue(del.equals(del));
Assert.assertFalse(del.equals(delRef));
Assert.assertTrue(ATIns.equals(ATIns));
Assert.assertFalse(ATIns.equals(ATCIns));
Assert.assertTrue(ATIns.basesMatch("AT"));
Assert.assertFalse(ATIns.basesMatch("A"));
Assert.assertFalse(ATIns.basesMatch("ATC"));
Assert.assertTrue(ATIns.basesMatch("at"));
Assert.assertFalse(ATIns.basesMatch("atc"));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs1() {
logger.warn("testBadConstructorArgs1");
byte[] foo = null;
new Allele(foo);
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs2() {
logger.warn("testBadConstructorArgs2");
new Allele("x");
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs3() {
logger.warn("testBadConstructorArgs3");
new Allele("--");
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs4() {
logger.warn("testBadConstructorArgs4");
new Allele("-A");
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs5() {
logger.warn("testBadConstructorArgs5");
new Allele("A A");
}
}

View File

@ -0,0 +1,241 @@
// our package
package org.broadinstitute.sting.oneoffprojects.variantcontext;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.BeforeClass;
import java.util.Map;
import java.util.Arrays;
import java.util.Set;
import java.util.List;
import java.io.FileNotFoundException;
import java.io.File;
import net.sf.picard.reference.ReferenceSequenceFile;
/**
* Basic unit test for RecalData
*/
public class VariantContextTest extends BaseTest {
private static ReferenceSequenceFile seq;
@BeforeClass
public static void init() throws FileNotFoundException {
// sequence
seq = new IndexedFastaSequenceFile(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
GenomeLocParser.setupRefContigOrdering(seq);
}
Allele A, Aref, T, Tref;
Allele del, delRef, ATC, ATCref;
// A [ref] / T at 10
GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 11);
// - / ATC [ref] from 20-23
GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 23);
// - [ref] / ATC from 20-20
GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
// - / A / T / ATC [ref] from 20-23
GenomeLoc mixedLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 23);
@Before
public void before() {
del = new Allele("-");
delRef = new Allele("-", true);
A = new Allele("A");
Aref = new Allele("A", true);
T = new Allele("T");
Tref = new Allele("T", true);
ATC = new Allele("ATC");
ATCref = new Allele("ATC", true);
}
// todo -- create reference context
@Test
public void testCreatingSNPVariantContext() {
logger.warn("testCreatingSNPVariantContext");
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), snpLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
Assert.assertTrue(vc.isSNP());
Assert.assertFalse(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), Aref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), T);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingRefVariantContext() {
logger.warn("testCreatingRefVariantContext");
List<Allele> alleles = Arrays.asList(Aref);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), snpLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);
Assert.assertFalse(vc.isSNP());
Assert.assertFalse(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertFalse(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 1);
Assert.assertEquals(vc.getReference(), Aref);
Assert.assertEquals(vc.getAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAlleles().size(), 0);
//Assert.assertEquals(vc.getAlternateAllele(0), T);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingDeletionVariantContext() {
logger.warn("testCreatingDeletionVariantContext");
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContext(delLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), delLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), ATCref);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), del);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test
public void testCreatingInsertionVariantContext() {
logger.warn("testCreatingInsertionVariantContext");
List<Allele> alleles = Arrays.asList(delRef, ATC);
VariantContext vc = new VariantContext(insLoc, alleles);
logger.warn("vc = " + vc);
Assert.assertEquals(vc.getLocation(), insLoc);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertEquals(vc.getReference(), delRef);
Assert.assertEquals(vc.getAlleles().size(), 2);
Assert.assertEquals(vc.getAlternateAlleles().size(), 1);
Assert.assertEquals(vc.getAlternateAllele(0), ATC);
Assert.assertFalse(vc.hasGenotypes());
Assert.assertEquals(vc.getSampleNames().size(), 0);
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs1() {
logger.warn("testBadConstructorArgs1");
new VariantContext(insLoc, Arrays.asList(delRef, ATCref));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs2() {
logger.warn("testBadConstructorArgs2");
new VariantContext(insLoc, Arrays.asList(delRef, del));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgs3() {
logger.warn("testBadConstructorArgs3");
new VariantContext(insLoc, Arrays.asList(del));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgsDuplicateAlleles1() {
logger.warn("testBadConstructorArgsDuplicateAlleles1");
new VariantContext(insLoc, Arrays.asList(Aref, T, T));
}
@Test (expected = IllegalArgumentException.class)
public void testBadConstructorArgsDuplicateAlleles2() {
logger.warn("testBadConstructorArgsDuplicateAlleles2");
new VariantContext(insLoc, Arrays.asList(Aref, A));
}
}
// public Type getType() {
// public boolean isSNP() { return getType() == Type.SNP; }
// public boolean isVariant() { return getType() != Type.NO_VARIATION; }
// public boolean isIndel() { return getType() == Type.INDEL; }
// public boolean isMixed() { return getType() == Type.MIXED; }
// public GenomeLoc getLocation() { return loc; }
// public Allele getReference() {
// public boolean isBiallelic() {
// public boolean isMonomorphic() {
// public boolean isPolymorphic() {
// public int getNAlleles() {
// public Set<Allele> getAlleles() { return alleles; }
// public Set<Allele> getAlternateAlleles() {
// public Allele getAlternateAllele(int i) {
// public void setAlleles(Set<Allele> alleles) {
// public void addAllele(Allele allele) {
// public void addAllele(Allele allele, boolean allowDuplicates) {
// public boolean hasGenotypes() { return genotypes.size() > 0; }
// public Map<String, Genotype> getGenotypes() { return genotypes; }
// public Set<String> getSampleNames() {
// public Genotype getGenotype(String sample) {
// public boolean hasGenotype(String sample) {
// public void setGenotypes(Genotype genotype) {
// public void setGenotypes(Collection<Genotype> genotypes) {
// public void setGenotypes(Map<String, Genotype> genotypes) {
// public void addGenotype(Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
// public void removeGenotype(String sampleName) {
// public void removeGenotype(Genotype genotype) {
// public boolean validate() {