Refactoring and migration of new allele/variantcontext/genotype code into oneoffprojects. NOT FOR USE. PlinkRod commented out due to dependence on this new, rapidly changing interface.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2687 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-26 13:53:29 +00:00
parent 3380b6ebe8
commit c231547204
9 changed files with 1604 additions and 1335 deletions

View File

@ -1,315 +0,0 @@
package org.broadinstitute.sting.gatk.contexts;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.refdata.*;
import java.util.*;
import org.apache.commons.jexl.*;
/**
* @author ebanks
* <p/>
* Class VariantContext
* <p/>
* This class represents a context that unifies one or more variants
*/
public class VariantContext {
private static final String UNIQUIFIED_SUFFIX = ".unique";
private Set<Allele> alleles;
private Set<Genotype> genotypes;
private Allele reference;
private GenomeLoc loc;
private HashMap<Object, Object> attributes;
public VariantContext(VariationRod rod) {
// TODO -- VariationRod should eventually require classes to implement toVariationContext()
// TODO -- (instead of using a temporary adapter class)
loc = rod.getLocation();
reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
// TODO -- populate the alleles and genotypes through an adapter
alleles = new HashSet<Allele>();
genotypes = new HashSet<Genotype>();
attributes = new HashMap<Object, Object>();
}
protected VariantContext(GenomeLoc loc, Allele reference, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
this.loc = loc;
this.reference = reference;
this.alleles = new HashSet<Allele>(alleles);
this.genotypes = new HashSet<Genotype>(genotypes);
this.attributes = new HashMap<Object, Object>(attributes);
}
/**
* @param other another variant context
*
* throws an exception if there is a collision such that the same sample exists in both contexts
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext other) {
return merge(other, false);
}
/**
* @param other another variant context
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
* the samples will be uniquified(based on their sources);
* otherwise, an exception will be thrown
*
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext other, boolean uniquifySamples) {
if ( !loc.equals(other.getLocation()) )
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
Set<String> samples = getSampleNames();
Set<Genotype> Gs = new HashSet<Genotype>(genotypes);
for ( Genotype g : other.getGenotypes() ) {
if ( samples.contains(g.getSample()) ) {
if ( uniquifySamples )
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
else
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
}
Gs.add(g);
}
HashMap<Object, Object> attrs = new HashMap<Object, Object>(attributes);
attrs.putAll(other.getAttributes());
return createNewContext(Gs, attrs);
}
/**
* @return the location of this context
*/
public GenomeLoc getLocation() { return loc; }
/**
* @return the reference allele for this context
*/
public Allele getReference() { return reference; }
/**
* @return true if the context is variant (i.e. contains a non-reference allele)
*/
public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() )
return true;
}
return false;
}
/**
* @return true if the context is strictly bi-allelic
*/
public boolean isBiallelic() {
return getAlternateAlleles().size() == 1;
}
/**
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
*/
public boolean isPointAllele() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() && !allele.isSNP() )
return false;
}
return true;
}
/**
* @return the set of all sample names in this context
*/
public Set<String> getSampleNames() {
Set<String> samples = new TreeSet<String>();
for ( Genotype g : genotypes )
samples.add(g.getSample());
return samples;
}
/**
* @return true if the context represents variants with associated genotypes
*/
public boolean hasGenotypes() { return genotypes.size() > 0; }
/**
* @return set of all Genotypes associated with this context
*/
public Set<Genotype> getGenotypes() { return genotypes; }
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
*/
public Genotype getGenotype(String sample) {
for ( Genotype g : genotypes ) {
if ( g.getSample().equals(sample) )
return g;
}
return null;
}
/**
* @return set of all subclasses within this context
*/
public Set<Object> getSubclasses() {
Set<Object> subclasses = new HashSet<Object>();
for ( Genotype g : genotypes )
subclasses.addAll(g.getAttributes().keySet());
return subclasses;
}
/**
* @param subclass the name of a subclass of variants to select
*
* @return a subset of this context which selects based on the given subclass
*/
public VariantContext select(String subclass) {
HashSet<Genotype> Gs = new HashSet<Genotype>();
for ( Genotype g : genotypes ) {
if ( g.getAttribute(subclass) != null )
Gs.add(g);
}
return createNewContext(Gs, attributes);
}
/**
* @param expr a jexl expression describing how to filter this context
*
* @return a subset of this context which is filtered based on the given expression
*/
public VariantContext filter(String expr) {
HashSet<Genotype> Gs = new HashSet<Genotype>();
try {
Expression filterExpression = ExpressionFactory.createExpression(expr);
for ( Genotype g : genotypes ) {
JexlContext jContext = JexlHelper.createContext();
jContext.setVars(g.getAttributes());
if ( (Boolean)filterExpression.evaluate(jContext) )
Gs.add(g);
}
} catch (Exception e) {
throw new StingException("JEXL error in VariantContext: " + e.getMessage());
}
return createNewContext(Gs, attributes);
}
/**
* @return a set of new variant contexts, one for each sample from this context
*/
public Set<VariantContext> splitBySample() {
Set<VariantContext> contexts = new HashSet<VariantContext>();
for ( Genotype g : genotypes ) {
HashSet<Genotype> gAsSet = new HashSet<Genotype>();
gAsSet.add(g);
contexts.add(createNewContext(gAsSet, attributes));
}
return contexts;
}
/**
* @param Gs the set of genotypes from which to create a new context
* @param attrs the attributes for the new context
*
* @return a new context based on the given genotypes
*/
private VariantContext createNewContext(Set<Genotype> Gs, HashMap<Object, Object> attrs) {
HashSet<Allele> As = new HashSet<Allele>();
for ( Genotype g : Gs )
As.addAll(g.getAlleles());
return new VariantContext(loc, reference, As, Gs, attrs);
}
/**
* @param allele the allele to be queried
*
* @return the frequency of the given allele in this context
*/
public double getAlleleFrequency(Allele allele) {
int alleleCount = 0;
int totalCount = 0;
for ( Genotype g : genotypes ) {
for ( Allele a : g.getAlleles() ) {
totalCount++;
if ( allele.equals(a) )
alleleCount++;
}
}
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
}
/**
* Gets the alleles. This method should return all of the alleles present at the location,
* including the reference allele. There are no constraints imposed on the ordering of alleles
* in the set. If the reference is not an allele in this context it will not be included.
*
* @return the set of alleles
*/
public Set<Allele> getAlleles() { return alleles; }
/**
* Gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
* in the set.
*
* @return the set of alternate alleles
*/
public Set<Allele> getAlternateAlleles() {
HashSet<Allele> altAlleles = new HashSet<Allele>();
for ( Allele allele : alleles ) {
if ( !allele.equals(reference) )
altAlleles.add(allele);
}
return altAlleles;
}
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void setAttribute(Object key, Object value) {
attributes.put(key, value);
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
}

View File

@ -1,81 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
/**
* @author ebanks
* <p/>
* Class Allele
* <p/>
* This class emcompasses all the basic information about an allele
*/
public class Allele {
private AlleleType type;
private String bases;
// the types of variants we currently allow
public enum AlleleType {
REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE, DELETION_REFERENCE
}
public Allele(AlleleType type, String bases) {
this.type = type;
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
if ( type == AlleleType.DELETION && bases.length() > 0 )
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 )
throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
this.bases = bases.toUpperCase();
}
/**
* convenience method for switching over the allele type
*
* @return the AlleleType of this allele
**/
public AlleleType getType() { return type; }
/**
* convenience method for SNPs
*
* @return true if this is a SNP, false otherwise
*/
public boolean isSNP() { return type == AlleleType.SNP; }
/**
* convenience method for variants
*
* @return true if this is a variant allele, false if it's reference
*/
public boolean isVariant() { return type != AlleleType.REFERENCE; }
/**
* convenience method for indels
*
* @return true if this is an indel, false otherwise
*/
public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
/**
* For deletions, this method returns an empty String.
* For everything else, observed bases for the allele are returned.
*
* @return the bases, as a string
*/
public String getBases() { return bases; }
/**
* @param other the other allele
*
* @return true if these alleles are equal
*/
public boolean equals(Allele other) {
return type == other.getType() && bases.equals(other.getBases());
}
}

File diff suppressed because it is too large Load Diff

View File

@ -60,11 +60,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
public boolean alwaysShowSecondBase = false;
@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
public boolean qualsAsInts = false;
//@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
//public boolean qualsAsInts = false;
@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
public boolean IGNORE_UNCOVERED_BASES = false;
//@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
//public boolean IGNORE_UNCOVERED_BASES = false;
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
public boolean SHOW_INDEL_PILEUPS = false;

View File

@ -0,0 +1,126 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.utils.BaseUtils;
import java.util.Arrays;
/**
* @author ebanks, depristo
* Types of alleles:
*
* Ref: a t C g a // C is the reference base
*
* : a t G g a // C base is a G in some individuals
*
* : a t - g a // C base is deleted w.r.t. the reference
*
* : a t CAg a // A base is inserted w.r.t. the reference sequence
*
* In these cases, where are the alleles?
*
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
* 1 base deletion of C -> { C , - } -> C is the reference allele
* 1 base insertion of A -> { - ; A } -> NULL is the reference allele
*
* Suppose I see a the following in the population:
*
* Ref: a t C g a // C is the reference base
* : a t G g a // C base is a G in some individuals
* : a t - g a // C base is deleted w.r.t. the reference
*
* How do I represent this? There are three segregating alleles:
*
* { C , G , - }
*
* Now suppose I have this more complex example:
*
* Ref: a t C g a // C is the reference base
* : a t - g a
* : a t - - a
* : a t CAg a
*
* There are actually four segregating alleles:
*
* { C g , - g, - -, and CAg } over bases 2-4
*
* However, the molecular equivalence explicitly listed above is usually discarded, so the actual
* segregating alleles are:
*
* { C g, g, -, C a g }
*
* Critically, it should be possible to apply an allele to a reference sequence to create the
* correct haplotype sequence:
*
* Allele + reference => haplotype
*
* For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the
* Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context.
*
* Given list of alleles it's possible to determine the "type" of the variation
*
* A / C @ loc => SNP with
* - / A => INDEL
*
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
*/
public class Allele {
private boolean isRef = false;
private byte[] bases = null;
public Allele(byte[] bases, boolean isRef) {
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
this.isRef = isRef;
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
this.bases = bases;
for ( byte b : bases ) {
if ( ! BaseUtils.isRegularBase(b) ) {
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
}
}
}
/** null allele creation method */
public Allele(boolean isRef) {
this("", isRef);
}
public Allele(String bases, boolean isRef) {
this(bases.getBytes(), isRef);
}
//
//
// accessor routines
//
//
public boolean isNullAllele() { return length() == 0; }
public boolean isNonNullAllele() { return ! isNullAllele(); }
public boolean isReference() { return isRef; }
public boolean isNonReference() { return ! isReference(); }
/**
* Return the DNA bases segregating in this allele. Note this isn't reference polarized,
* so the Null allele is represented by a vector of length 0
*
* @return the segregating bases
*/
public byte[] getBases() { return bases; }
/**
* @param other the other allele
*
* @return true if these alleles are equal
*/
public boolean equals(Allele other) {
return Arrays.equals(bases, other.getBases());
}
public int length() {
return bases.length;
}
}

View File

@ -1,4 +1,4 @@
package org.broadinstitute.sting.gatk.refdata;
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import java.util.*;
@ -10,11 +10,6 @@ import java.util.*;
* This class emcompasses all the basic information about a genotype
*/
public class Genotype {
public enum StandardAttributes {
DELETION_LENGTH, INVERSION_LENGTH
}
private List<Allele> alleles;
private double negLog10PError;
@ -32,9 +27,9 @@ public class Genotype {
}
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() { return alleles; }
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() { return alleles; }
/**
* @return the ploidy of this genotype
@ -77,10 +72,10 @@ public class Genotype {
* @return true if all alleles for this genotype are SNPs or reference
*/
public boolean isPointGenotype() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() && !allele.isSNP() )
return false;
}
// for ( Allele allele : alleles ) {
// if ( allele.isVariant() && !allele.isSNP() )
// return false;
// }
return true;
}
@ -88,21 +83,21 @@ public class Genotype {
* @return true if this is a variant genotype, false if it's reference
*/
public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isVariant() )
return true;
}
// for ( Allele allele : alleles ) {
// if ( allele.isVariant() )
// return true;
// }
return false;
}
/**
* @return the -1 * log10-based error estimate
*/
* @return the -1 * log10-based error estimate
*/
public double getNegLog10PError() { return negLog10PError; }
/**
* @return the sample name
*/
* @return the sample name
*/
public String getSample() { return sample; }
/**

View File

@ -0,0 +1,440 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.refdata.*;
import java.util.*;
import org.apache.commons.jexl.*;
/**
* @author ebanks
* <p/>
* Class VariantContext
* <p/>
* This class represents a context that unifies one or more variants
*/
public class VariantContext {
private GenomeLoc loc;
private Set<Allele> alleles = new HashSet<Allele>();
private Set<Genotype> genotypes = new HashSet<Genotype>();
private HashMap<Object, Object> attributes = new HashMap<Object, Object>();
Type type = null;
private double negLog10PError = 0.0; // todo - fixme
/** Have we checked this VariantContext already? */
private boolean validatedP = false;
// public VariantContext(VariationRod rod) {
//
// // TODO -- VariationRod should eventually require classes to implement toVariationContext()
// // TODO -- (instead of using a temporary adapter class)
//
// loc = rod.getLocation();
// reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
//
// // TODO -- populate the alleles and genotypes through an adapter
// alleles = new HashSet<Allele>();
// genotypes = new HashSet<Genotype>();
//
// attributes = new HashMap<Object, Object>();
// }
// ---------------------------------------------------------------------------------------------------------
//
// constructors
//
// ---------------------------------------------------------------------------------------------------------
public VariantContext(GenomeLoc loc) {
if ( loc == null ) { throw new StingException("GenomeLoc cannot be null"); }
this.loc = loc;
}
protected VariantContext(VariantContext parent, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
this(parent.getLocation(), parent.getAlleles(), genotypes, attributes);
}
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
this(loc);
// todo -- add extensive testing here
// todo -- check that exactly one allele is tagged as reference
this.alleles = new HashSet<Allele>(alleles);
this.genotypes = new HashSet<Genotype>(genotypes);
this.attributes = new HashMap<Object, Object>(attributes);
}
// ---------------------------------------------------------------------------------------------------------
//
// type operations
//
// ---------------------------------------------------------------------------------------------------------
/**
* see: http://www.ncbi.nlm.nih.gov/bookshelf/br.fcgi?book=handbook&part=ch5&rendertype=table&id=ch5.ch5_t3
*
* Format:
* dbSNP variation class
* Rules for assigning allele classes
* Sample allele definition
*
* Single Nucleotide Polymorphisms (SNPs)a
* Strictly defined as single base substitutions involving A, T, C, or G.
* A/T
*
* Deletion/Insertion Polymorphisms (DIPs)
* Designated using the full sequence of the insertion as one allele, and either a fully
* defined string for the variant allele or a Ò-Ó character to specify the deleted allele.
* This class will be assigned to a variation if the variation alleles are of different lengths or
* if one of the alleles is deleted (Ò-Ó).
* T/-/CCTA/G
*
* No-variation
* Reports may be submitted for segments of sequence that are assayed and determined to be invariant
* in the sample.
* (NoVariation)
*
* Mixed
* Mix of other classes
*
*
* Not currently supported:
*
* Heterozygous sequencea
* The term heterozygous is used to specify a region detected by certain methods that do not
* resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
* sequence must be provided to define a sequence context for the variation.
* (heterozygous)
*
* Microsatellite or short tandem repeat (STR)
* Alleles are designated by providing the repeat motif and the copy number for each allele.
* Expansion of the allele repeat motif designated in dbSNP into full-length sequence will
* be only an approximation of the true genomic sequence because many microsatellite markers are
* not fully sequenced and are resolved as size variants only.
* (CAC)8/9/10/11
*
* Named variant
* Applies to insertion/deletion polymorphisms of longer sequence features, such as retroposon
* dimorphism for Alu or line elements. These variations frequently include a deletion Ò-Ó indicator
* for the absent allele.
* (alu) / -
*
* Multi-Nucleotide Polymorphism (MNP)
* Assigned to variations that are multi-base variations of a single, common length
* GGA/AGT
*/
public enum Type {
NO_VARIATION,
SNP,
INDEL,
MIXED
}
/**
* convenience method for switching over the allele type
*
* @return the AlleleType of this allele
**/
public Type getType() {
if ( type == null )
determineType();
return type;
}
/**
* convenience method for SNPs
*
* @return true if this is a SNP, false otherwise
*/
public boolean isSNP() { return getType() == Type.SNP; }
/**
* convenience method for variants
*
* @return true if this is a variant allele, false if it's reference
*/
public boolean isVariant() { return getType() != Type.NO_VARIATION; }
/**
* convenience method for indels
*
* @return true if this is an indel, false otherwise
*/
public boolean isIndel() { return getType() == Type.INDEL; }
/**
* convenience method for indels
*
* @return true if this is an mixed variation, false otherwise
*/
public boolean isMixed() { return getType() == Type.MIXED; }
// ---------------------------------------------------------------------------------------------------------
//
// Generic accessors
//
// ---------------------------------------------------------------------------------------------------------
/**
* @return the location of this context
*/
public GenomeLoc getLocation() { return loc; }
// ---------------------------------------------------------------------------------------------------------
//
// Working with alleles
//
// ---------------------------------------------------------------------------------------------------------
/**
* @return the reference allele for this context
*/
public Allele getReference() {
for ( Allele allele : getAlleles() )
if ( allele.isReference() )
return allele;
throw new StingException("BUG: no reference allele found at " + this);
}
/**
* @return true if the context is strictly bi-allelic
*/
public boolean isBiallelic() {
//return getAlternateAlleles().size() == 1;
return getAlleles().size() == 2;
}
/**
* Gets the alleles. This method should return all of the alleles present at the location,
* including the reference allele. There are no constraints imposed on the ordering of alleles
* in the set. If the reference is not an allele in this context it will not be included.
*
* @return the set of alleles
*/
public Set<Allele> getAlleles() { return alleles; }
/**
* Gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
* in the set.
*
* @return the set of alternate alleles
*/
public Set<Allele> getAlternateAlleles() {
HashSet<Allele> altAlleles = new HashSet<Allele>();
for ( Allele allele : alleles ) {
if ( allele.isNonReference() )
altAlleles.add(allele);
}
return altAlleles;
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with genotypes
//
// ---------------------------------------------------------------------------------------------------------
/**
* @return true if the context represents variants with associated genotypes
*/
public boolean hasGenotypes() { return genotypes.size() > 0; }
/**
* @return set of all Genotypes associated with this context
*/
// todo -- genotypes should really be stored as map, not set
public Set<Genotype> getGenotypes() { return genotypes; }
public Map<String, Genotype> getGenotypeMap() {
HashMap<String, Genotype> map = new HashMap<String, Genotype>();
for ( Genotype g : genotypes )
map.put(g.getSample(), g);
return map;
}
/**
* @return the set of all sample names in this context
*/
public Set<String> getSampleNames() {
return getGenotypeMap().keySet();
}
/**
* @param sample the sample name
*
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
*/
public Genotype getGenotype(String sample) {
return getGenotypeMap().get(sample);
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with attributes
//
// ---------------------------------------------------------------------------------------------------------
// todo -- refactor into AttributedObject and have VariantContext and Genotype inherit from them
// todo -- define common attributes as enum
/**
* Sets the given attribute
*
* @param key the attribute key
* @param value the attribute value
*/
public void putAttribute(Object key, Object value) {
attributes.put(key, value);
}
public void putAttributes(Map<? extends Object, Object> map) {
attributes.putAll(map);
}
public boolean hasAttribute(Object key) {
return attributes.containsKey(key);
}
public int getNumAttributes() {
return attributes.size();
}
/**
* @param key the attribute key
*
* @return the attribute value for the given key (or null if not set)
*/
public Object getAttribute(Object key) {
return attributes.get(key);
}
public Object getAttribute(Object key, Object defaultValue) {
if ( hasAttribute(key) )
return attributes.get(key);
else
return defaultValue;
}
public String getAttributeAsString(Object key) { return (String)getAttribute(key); }
public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); }
public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); }
public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
/**
* @return the attribute map
*/
public Map<Object, Object> getAttributes() {
return attributes;
}
// ---------------------------------------------------------------------------------------------------------
//
// validation
//
// ---------------------------------------------------------------------------------------------------------
/**
* To be called by any modifying routines
*/
private void invalidate() { validatedP = false; }
public boolean validate() {
return validate(true);
}
public boolean validate(boolean throwException) {
if ( ! validatedP ) {
boolean valid = false;
// todo -- add extensive validation checking here
if ( valid ) {
validatedP = valid;
} else if ( throwException ) {
throw new StingException(this + " failed validation");
}
return valid;
} else {
return validatedP;
}
}
// ---------------------------------------------------------------------------------------------------------
//
// utility routines
//
// ---------------------------------------------------------------------------------------------------------
private void determineType() {
if ( type == null ) {
// todo -- figure out the variation type
}
}
// todo -- toString() method
/**
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
*/
// public boolean isPointAllele() {
// for ( Allele allele : alleles ) {
// if ( allele.isVariant() && !allele.isSNP() )
// return false;
// }
// return true;
// }
//
// /**
// * @return set of all subclasses within this context
// */
// public Set<Object> getSubclasses() {
// Set<Object> subclasses = new HashSet<Object>();
// for ( Genotype g : genotypes )
// subclasses.addAll(g.getAttributes().keySet());
// return subclasses;
// }
/**
* @param allele the allele to be queried
*
* @return the frequency of the given allele in this context
*/
public double getAlleleFrequency(Allele allele) {
int alleleCount = 0;
int totalCount = 0;
for ( Genotype g : genotypes ) {
for ( Allele a : g.getAlleles() ) {
totalCount++;
if ( allele.equals(a) )
alleleCount++;
}
}
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
}
}

View File

@ -0,0 +1,104 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import java.util.*;
import org.apache.commons.jexl.*;
public class VariantContextUtils {
private static final String UNIQUIFIED_SUFFIX = ".unique";
/**
* @param other another variant context
*
* throws an exception if there is a collision such that the same sample exists in both contexts
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext left, VariantContext other) {
return merge(left, other, false);
}
/**
* @param other another variant context
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
* the samples will be uniquified(based on their sources);
* otherwise, an exception will be thrown
*
* @return a context representing the merge of this context and the other provided context
*/
public VariantContext merge(VariantContext left, VariantContext other, boolean uniquifySamples) {
// todo -- make functional
if ( !left.getLocation().equals(other.getLocation()) )
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
Set<String> samples = left.getSampleNames();
Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes());
for ( Genotype g : other.getGenotypes() ) {
if ( samples.contains(g.getSample()) ) {
if ( uniquifySamples )
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
else
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
}
Gs.add(g);
}
HashMap<Object, Object> attrs = new HashMap<Object, Object>(left.getAttributes());
attrs.putAll(other.getAttributes());
return new VariantContext(left, Gs, attrs);
}
/**
* @param subclass the name of a subclass of variants to select
*
* @return a subset of this context which selects based on the given subclass
*/
// public VariantContextUtils select(String subclass) {
// HashSet<Genotype> Gs = new HashSet<Genotype>();
// for ( Genotype g : genotypes ) {
// if ( g.getAttribute(subclass) != null )
// Gs.add(g);
// }
// return createNewContext(Gs, attributes);
// }
/**
* @param expr a jexl expression describing how to filter this context
*
* @return a subset of this context which is filtered based on the given expression
*/
// public VariantContextUtils filter(String expr) {
// HashSet<Genotype> Gs = new HashSet<Genotype>();
// try {
// Expression filterExpression = ExpressionFactory.createExpression(expr);
//
// for ( Genotype g : genotypes ) {
// JexlContext jContext = JexlHelper.createContext();
// jContext.setVars(g.getAttributes());
// if ( (Boolean)filterExpression.evaluate(jContext) )
// Gs.add(g);
// }
//
// } catch (Exception e) {
// throw new StingException("JEXL error in VariantContext: " + e.getMessage());
// }
//
// return createNewContext(Gs, attributes);
// }
/**
* @return a set of new variant contexts, one for each sample from this context
*/
// public Set<VariantContextUtils> splitBySample() {
// Set<VariantContextUtils> contexts = new HashSet<VariantContextUtils>();
// for ( Genotype g : genotypes ) {
// HashSet<Genotype> gAsSet = new HashSet<Genotype>();
// gAsSet.add(g);
// contexts.add(createNewContext(gAsSet, attributes));
// }
// return contexts;
// }
}

View File

@ -1,243 +1,243 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.Assert;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.BufferedReader;
import java.io.FileReader;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: Ghost
* Date: Jan 22, 2010
* Time: 11:27:33 PM
* To change this template use File | Settings | File Templates.
*/
public class PlinkRodTest extends BaseTest {
private static IndexedFastaSequenceFile seq;
@BeforeClass
public static void beforeTests() {
try {
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
throw new StingException("unable to load the sequence dictionary");
}
GenomeLocParser.setupRefContigOrdering(seq);
}
public BufferedReader openFile(String filename) {
try {
return new BufferedReader(new FileReader(filename));
} catch (FileNotFoundException e) {
throw new StingException("Couldn't open file " + filename);
}
}
@Test
public void testStandardPedFile() {
PlinkRod rod = new PlinkRod("test");
try {
rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
} catch ( FileNotFoundException e ) {
throw new StingException("test file for testStandardPedFile() does not exist",e);
}
// test that the sample names are correct
List<String> rodNames = rod.getVariantSampleNames();
List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
boolean namesCorrect = true;
for ( int i = 0; i < expectedNames.size(); i++ ) {
namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
}
Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
// test that rod can be iterated over
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
do {
genotypesInRod.add(rod.getGenotypes());
sampleNamesInRod.add(rod.getVariantSampleNames());
lociInRod.add(rod.getLocation());
} while ( rod.parseLine(null,null) );
Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
ArrayList<Genotype> snp1 = genotypesInRod.get(0);
ArrayList<Genotype> snp3 = genotypesInRod.get(2);
Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
boolean name2isSame = true;
for ( ArrayList<String> names : sampleNamesInRod ) {
name2isSame = name2isSame && names.get(1).equals("NAMELY");
}
Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
// test that the loci are correctly parsed and in order
List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
boolean lociCorrect = true;
for ( int i = 0; i < 3; i ++ ) {
lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
}
}
@Test
public void testStandardPedFileWithIndels() {
PlinkRod rod = new PlinkRod("test");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
} catch ( FileNotFoundException e) {
throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
}
// Iterate through the rod
List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
do {
genotypesInRod.add(rod.getGenotypes());
sampleNamesInRod.add(rod.getVariantSampleNames());
lociInRod.add(rod.getLocation());
snpSites.add(rod.variantIsSNP());
} while ( rod.parseLine(null,null) );
boolean snpOrder = true;
List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
for ( int i = 0; i < 4; i ++ ) {
snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
}
Assert.assertTrue("That the variant type order is as expected", snpOrder);
Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
}
@Test
public void testBinaryPedFileNoIndels() {
PlinkRod rod = new PlinkRod("binaryTest1");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
// iterate through the ROD and get stuff
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
do {
lociInRod.add(rod.getLocation());
genotypesInRod.add(rod.getGenotypes());
samplesInRod.add(rod.getVariantSampleNames());
} while ( rod.parseLine(null,null) );
List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
for ( int i = 0; i < expecLoc.size(); i ++ ) {
Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
}
List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
"GG","GG","AG","TT","CC","CT","TG","GG","GG");
List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
false,false,false,true,false,false,true,true,false,false);
List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
"NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
"NA12878","NA12890","NA07000");
int snpNo = 1;
int indiv = 1;
int alleleOffset = 0;
for ( ArrayList<Genotype> snp : genotypesInRod ) {
for ( Genotype gen : snp ) {
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
"(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
indiv++;
alleleOffset++;
}
indiv = 1;
snpNo++;
}
}
@Test
public void testIndelBinary() {
PlinkRod rod = new PlinkRod("binaryTest2");
try {
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
} catch (FileNotFoundException e) {
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
}
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
do {
genotypesInRod.add(rod.getGenotypes());
} while ( rod.parseLine(null,null) );
List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
"TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
false,false,false,false,false,false,true,false,true,false,false,false);
List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
false,false,false,false,false,false,false,false,false,false,false,false);
int al = 0;
for ( ArrayList<Genotype> snp : genotypesInRod ) {
for ( Genotype gen : snp ) {
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
Allele firstAl = gen.getAlleles().get(0);
Allele secondAl = gen.getAlleles().get(1);
boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
al++;
}
}
}
}
//package org.broadinstitute.sting.gatk.refdata;
//
//import org.broadinstitute.sting.BaseTest;
//import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
//import org.broadinstitute.sting.utils.StingException;
//import org.broadinstitute.sting.utils.GenomeLocParser;
//import org.broadinstitute.sting.utils.GenomeLoc;
//import org.junit.BeforeClass;
//import org.junit.Test;
//import org.junit.Assert;
//
//import java.io.File;
//import java.io.FileNotFoundException;
//import java.io.BufferedReader;
//import java.io.FileReader;
//import java.util.*;
//
///**
// * Created by IntelliJ IDEA.
// * User: Ghost
// * Date: Jan 22, 2010
// * Time: 11:27:33 PM
// * To change this template use File | Settings | File Templates.
// */
//public class PlinkRodTest extends BaseTest {
// private static IndexedFastaSequenceFile seq;
//
// @BeforeClass
// public static void beforeTests() {
// try {
// seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
// } catch (FileNotFoundException e) {
// throw new StingException("unable to load the sequence dictionary");
// }
// GenomeLocParser.setupRefContigOrdering(seq);
//
// }
//
// public BufferedReader openFile(String filename) {
// try {
// return new BufferedReader(new FileReader(filename));
// } catch (FileNotFoundException e) {
// throw new StingException("Couldn't open file " + filename);
// }
//
// }
//
// @Test
// public void testStandardPedFile() {
// PlinkRod rod = new PlinkRod("test");
// try {
// rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
// } catch ( FileNotFoundException e ) {
// throw new StingException("test file for testStandardPedFile() does not exist",e);
// }
//
// // test that the sample names are correct
//
// List<String> rodNames = rod.getVariantSampleNames();
// List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
//
// Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
//
// boolean namesCorrect = true;
// for ( int i = 0; i < expectedNames.size(); i++ ) {
// namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
// }
//
// Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
//
// // test that rod can be iterated over
//
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
// do {
// genotypesInRod.add(rod.getGenotypes());
// sampleNamesInRod.add(rod.getVariantSampleNames());
// lociInRod.add(rod.getLocation());
// } while ( rod.parseLine(null,null) );
//
// Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
//
// ArrayList<Genotype> snp1 = genotypesInRod.get(0);
// ArrayList<Genotype> snp3 = genotypesInRod.get(2);
//
// Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
// Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
// Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
//
//
// List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
// List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
//
// String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
// String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
//
// Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
// Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
//
// boolean name2isSame = true;
//
// for ( ArrayList<String> names : sampleNamesInRod ) {
// name2isSame = name2isSame && names.get(1).equals("NAMELY");
// }
//
// Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
//
// // test that the loci are correctly parsed and in order
//
// List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
// boolean lociCorrect = true;
// for ( int i = 0; i < 3; i ++ ) {
// lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
// }
// }
//
// @Test
// public void testStandardPedFileWithIndels() {
// PlinkRod rod = new PlinkRod("test");
// try {
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
// } catch ( FileNotFoundException e) {
// throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
// }
//
// // Iterate through the rod
//
// List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
// ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
// do {
// genotypesInRod.add(rod.getGenotypes());
// sampleNamesInRod.add(rod.getVariantSampleNames());
// lociInRod.add(rod.getLocation());
// snpSites.add(rod.variantIsSNP());
// } while ( rod.parseLine(null,null) );
//
// boolean snpOrder = true;
// List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
// for ( int i = 0; i < 4; i ++ ) {
// snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
// }
//
// Assert.assertTrue("That the variant type order is as expected", snpOrder);
// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
// Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
// Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
// Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
// Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
// Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
// }
//
// @Test
// public void testBinaryPedFileNoIndels() {
// PlinkRod rod = new PlinkRod("binaryTest1");
// try {
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
// } catch (FileNotFoundException e) {
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
// }
//
// // iterate through the ROD and get stuff
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
//
// do {
// lociInRod.add(rod.getLocation());
// genotypesInRod.add(rod.getGenotypes());
// samplesInRod.add(rod.getVariantSampleNames());
// } while ( rod.parseLine(null,null) );
//
// List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
//
// for ( int i = 0; i < expecLoc.size(); i ++ ) {
// Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
// }
//
// List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
// "GG","GG","AG","TT","CC","CT","TG","GG","GG");
// List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
// false,false,false,true,false,false,true,true,false,false);
// List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
// "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
// "NA12878","NA12890","NA07000");
// int snpNo = 1;
// int indiv = 1;
// int alleleOffset = 0;
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
// for ( Genotype gen : snp ) {
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
// Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
// "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
// Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
// Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
// indiv++;
// alleleOffset++;
// }
// indiv = 1;
// snpNo++;
// }
// }
//
// @Test
// public void testIndelBinary() {
// PlinkRod rod = new PlinkRod("binaryTest2");
// try {
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
// } catch (FileNotFoundException e) {
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
// }
//
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
// do {
// genotypesInRod.add(rod.getGenotypes());
// } while ( rod.parseLine(null,null) );
//
// List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
// "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
// List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
// false,false,false,false,false,false,true,false,true,false,false,false);
// List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
// false,false,false,false,false,false,false,false,false,false,false,false);
//
// int al = 0;
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
// for ( Genotype gen : snp ) {
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
// Allele firstAl = gen.getAlleles().get(0);
// Allele secondAl = gen.getAlleles().get(1);
// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
// Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
// al++;
// }
// }
// }
//}