Refactoring and migration of new allele/variantcontext/genotype code into oneoffprojects. NOT FOR USE. PlinkRod commented out due to dependence on this new, rapidly changing interface.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2687 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3380b6ebe8
commit
c231547204
|
|
@ -1,315 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.contexts;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
import org.apache.commons.jexl.*;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author ebanks
|
|
||||||
* <p/>
|
|
||||||
* Class VariantContext
|
|
||||||
* <p/>
|
|
||||||
* This class represents a context that unifies one or more variants
|
|
||||||
*/
|
|
||||||
public class VariantContext {
|
|
||||||
|
|
||||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
|
||||||
|
|
||||||
private Set<Allele> alleles;
|
|
||||||
|
|
||||||
private Set<Genotype> genotypes;
|
|
||||||
|
|
||||||
private Allele reference;
|
|
||||||
|
|
||||||
private GenomeLoc loc;
|
|
||||||
|
|
||||||
private HashMap<Object, Object> attributes;
|
|
||||||
|
|
||||||
|
|
||||||
public VariantContext(VariationRod rod) {
|
|
||||||
|
|
||||||
// TODO -- VariationRod should eventually require classes to implement toVariationContext()
|
|
||||||
// TODO -- (instead of using a temporary adapter class)
|
|
||||||
|
|
||||||
loc = rod.getLocation();
|
|
||||||
reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
|
|
||||||
|
|
||||||
// TODO -- populate the alleles and genotypes through an adapter
|
|
||||||
alleles = new HashSet<Allele>();
|
|
||||||
genotypes = new HashSet<Genotype>();
|
|
||||||
|
|
||||||
attributes = new HashMap<Object, Object>();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected VariantContext(GenomeLoc loc, Allele reference, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
|
||||||
this.loc = loc;
|
|
||||||
this.reference = reference;
|
|
||||||
this.alleles = new HashSet<Allele>(alleles);
|
|
||||||
this.genotypes = new HashSet<Genotype>(genotypes);
|
|
||||||
this.attributes = new HashMap<Object, Object>(attributes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param other another variant context
|
|
||||||
*
|
|
||||||
* throws an exception if there is a collision such that the same sample exists in both contexts
|
|
||||||
* @return a context representing the merge of this context and the other provided context
|
|
||||||
*/
|
|
||||||
public VariantContext merge(VariantContext other) {
|
|
||||||
return merge(other, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param other another variant context
|
|
||||||
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
|
|
||||||
* the samples will be uniquified(based on their sources);
|
|
||||||
* otherwise, an exception will be thrown
|
|
||||||
*
|
|
||||||
* @return a context representing the merge of this context and the other provided context
|
|
||||||
*/
|
|
||||||
public VariantContext merge(VariantContext other, boolean uniquifySamples) {
|
|
||||||
if ( !loc.equals(other.getLocation()) )
|
|
||||||
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
|
|
||||||
|
|
||||||
Set<String> samples = getSampleNames();
|
|
||||||
Set<Genotype> Gs = new HashSet<Genotype>(genotypes);
|
|
||||||
|
|
||||||
for ( Genotype g : other.getGenotypes() ) {
|
|
||||||
if ( samples.contains(g.getSample()) ) {
|
|
||||||
if ( uniquifySamples )
|
|
||||||
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
|
|
||||||
else
|
|
||||||
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
|
|
||||||
}
|
|
||||||
Gs.add(g);
|
|
||||||
}
|
|
||||||
|
|
||||||
HashMap<Object, Object> attrs = new HashMap<Object, Object>(attributes);
|
|
||||||
attrs.putAll(other.getAttributes());
|
|
||||||
|
|
||||||
return createNewContext(Gs, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the location of this context
|
|
||||||
*/
|
|
||||||
public GenomeLoc getLocation() { return loc; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the reference allele for this context
|
|
||||||
*/
|
|
||||||
public Allele getReference() { return reference; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if the context is variant (i.e. contains a non-reference allele)
|
|
||||||
*/
|
|
||||||
public boolean isVariant() {
|
|
||||||
for ( Allele allele : alleles ) {
|
|
||||||
if ( allele.isVariant() )
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if the context is strictly bi-allelic
|
|
||||||
*/
|
|
||||||
public boolean isBiallelic() {
|
|
||||||
return getAlternateAlleles().size() == 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
|
|
||||||
*/
|
|
||||||
public boolean isPointAllele() {
|
|
||||||
for ( Allele allele : alleles ) {
|
|
||||||
if ( allele.isVariant() && !allele.isSNP() )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the set of all sample names in this context
|
|
||||||
*/
|
|
||||||
public Set<String> getSampleNames() {
|
|
||||||
Set<String> samples = new TreeSet<String>();
|
|
||||||
for ( Genotype g : genotypes )
|
|
||||||
samples.add(g.getSample());
|
|
||||||
return samples;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return true if the context represents variants with associated genotypes
|
|
||||||
*/
|
|
||||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return set of all Genotypes associated with this context
|
|
||||||
*/
|
|
||||||
public Set<Genotype> getGenotypes() { return genotypes; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param sample the sample name
|
|
||||||
*
|
|
||||||
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
|
||||||
*/
|
|
||||||
public Genotype getGenotype(String sample) {
|
|
||||||
for ( Genotype g : genotypes ) {
|
|
||||||
if ( g.getSample().equals(sample) )
|
|
||||||
return g;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return set of all subclasses within this context
|
|
||||||
*/
|
|
||||||
public Set<Object> getSubclasses() {
|
|
||||||
Set<Object> subclasses = new HashSet<Object>();
|
|
||||||
for ( Genotype g : genotypes )
|
|
||||||
subclasses.addAll(g.getAttributes().keySet());
|
|
||||||
return subclasses;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param subclass the name of a subclass of variants to select
|
|
||||||
*
|
|
||||||
* @return a subset of this context which selects based on the given subclass
|
|
||||||
*/
|
|
||||||
public VariantContext select(String subclass) {
|
|
||||||
HashSet<Genotype> Gs = new HashSet<Genotype>();
|
|
||||||
for ( Genotype g : genotypes ) {
|
|
||||||
if ( g.getAttribute(subclass) != null )
|
|
||||||
Gs.add(g);
|
|
||||||
}
|
|
||||||
return createNewContext(Gs, attributes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param expr a jexl expression describing how to filter this context
|
|
||||||
*
|
|
||||||
* @return a subset of this context which is filtered based on the given expression
|
|
||||||
*/
|
|
||||||
public VariantContext filter(String expr) {
|
|
||||||
HashSet<Genotype> Gs = new HashSet<Genotype>();
|
|
||||||
try {
|
|
||||||
Expression filterExpression = ExpressionFactory.createExpression(expr);
|
|
||||||
|
|
||||||
for ( Genotype g : genotypes ) {
|
|
||||||
JexlContext jContext = JexlHelper.createContext();
|
|
||||||
jContext.setVars(g.getAttributes());
|
|
||||||
if ( (Boolean)filterExpression.evaluate(jContext) )
|
|
||||||
Gs.add(g);
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new StingException("JEXL error in VariantContext: " + e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
return createNewContext(Gs, attributes);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return a set of new variant contexts, one for each sample from this context
|
|
||||||
*/
|
|
||||||
public Set<VariantContext> splitBySample() {
|
|
||||||
Set<VariantContext> contexts = new HashSet<VariantContext>();
|
|
||||||
for ( Genotype g : genotypes ) {
|
|
||||||
HashSet<Genotype> gAsSet = new HashSet<Genotype>();
|
|
||||||
gAsSet.add(g);
|
|
||||||
contexts.add(createNewContext(gAsSet, attributes));
|
|
||||||
}
|
|
||||||
return contexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param Gs the set of genotypes from which to create a new context
|
|
||||||
* @param attrs the attributes for the new context
|
|
||||||
*
|
|
||||||
* @return a new context based on the given genotypes
|
|
||||||
*/
|
|
||||||
private VariantContext createNewContext(Set<Genotype> Gs, HashMap<Object, Object> attrs) {
|
|
||||||
HashSet<Allele> As = new HashSet<Allele>();
|
|
||||||
for ( Genotype g : Gs )
|
|
||||||
As.addAll(g.getAlleles());
|
|
||||||
return new VariantContext(loc, reference, As, Gs, attrs);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param allele the allele to be queried
|
|
||||||
*
|
|
||||||
* @return the frequency of the given allele in this context
|
|
||||||
*/
|
|
||||||
public double getAlleleFrequency(Allele allele) {
|
|
||||||
|
|
||||||
int alleleCount = 0;
|
|
||||||
int totalCount = 0;
|
|
||||||
|
|
||||||
for ( Genotype g : genotypes ) {
|
|
||||||
for ( Allele a : g.getAlleles() ) {
|
|
||||||
totalCount++;
|
|
||||||
if ( allele.equals(a) )
|
|
||||||
alleleCount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the alleles. This method should return all of the alleles present at the location,
|
|
||||||
* including the reference allele. There are no constraints imposed on the ordering of alleles
|
|
||||||
* in the set. If the reference is not an allele in this context it will not be included.
|
|
||||||
*
|
|
||||||
* @return the set of alleles
|
|
||||||
*/
|
|
||||||
public Set<Allele> getAlleles() { return alleles; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the alternate alleles. This method should return all the alleles present at the location,
|
|
||||||
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
|
|
||||||
* in the set.
|
|
||||||
*
|
|
||||||
* @return the set of alternate alleles
|
|
||||||
*/
|
|
||||||
public Set<Allele> getAlternateAlleles() {
|
|
||||||
HashSet<Allele> altAlleles = new HashSet<Allele>();
|
|
||||||
for ( Allele allele : alleles ) {
|
|
||||||
if ( !allele.equals(reference) )
|
|
||||||
altAlleles.add(allele);
|
|
||||||
}
|
|
||||||
return altAlleles;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the given attribute
|
|
||||||
*
|
|
||||||
* @param key the attribute key
|
|
||||||
* @param value the attribute value
|
|
||||||
*/
|
|
||||||
public void setAttribute(Object key, Object value) {
|
|
||||||
attributes.put(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param key the attribute key
|
|
||||||
*
|
|
||||||
* @return the attribute value for the given key (or null if not set)
|
|
||||||
*/
|
|
||||||
public Object getAttribute(Object key) {
|
|
||||||
return attributes.get(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the attribute map
|
|
||||||
*/
|
|
||||||
public Map<Object, Object> getAttributes() {
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author ebanks
|
|
||||||
* <p/>
|
|
||||||
* Class Allele
|
|
||||||
* <p/>
|
|
||||||
* This class emcompasses all the basic information about an allele
|
|
||||||
*/
|
|
||||||
public class Allele {
|
|
||||||
|
|
||||||
private AlleleType type;
|
|
||||||
|
|
||||||
private String bases;
|
|
||||||
|
|
||||||
|
|
||||||
// the types of variants we currently allow
|
|
||||||
public enum AlleleType {
|
|
||||||
REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE, DELETION_REFERENCE
|
|
||||||
}
|
|
||||||
|
|
||||||
public Allele(AlleleType type, String bases) {
|
|
||||||
this.type = type;
|
|
||||||
if ( bases == null )
|
|
||||||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
|
|
||||||
if ( type == AlleleType.DELETION && bases.length() > 0 )
|
|
||||||
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
|
|
||||||
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 )
|
|
||||||
throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
|
|
||||||
this.bases = bases.toUpperCase();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* convenience method for switching over the allele type
|
|
||||||
*
|
|
||||||
* @return the AlleleType of this allele
|
|
||||||
**/
|
|
||||||
public AlleleType getType() { return type; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* convenience method for SNPs
|
|
||||||
*
|
|
||||||
* @return true if this is a SNP, false otherwise
|
|
||||||
*/
|
|
||||||
public boolean isSNP() { return type == AlleleType.SNP; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* convenience method for variants
|
|
||||||
*
|
|
||||||
* @return true if this is a variant allele, false if it's reference
|
|
||||||
*/
|
|
||||||
public boolean isVariant() { return type != AlleleType.REFERENCE; }
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* convenience method for indels
|
|
||||||
*
|
|
||||||
* @return true if this is an indel, false otherwise
|
|
||||||
*/
|
|
||||||
public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* For deletions, this method returns an empty String.
|
|
||||||
* For everything else, observed bases for the allele are returned.
|
|
||||||
*
|
|
||||||
* @return the bases, as a string
|
|
||||||
*/
|
|
||||||
public String getBases() { return bases; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param other the other allele
|
|
||||||
*
|
|
||||||
* @return true if these alleles are equal
|
|
||||||
*/
|
|
||||||
public boolean equals(Allele other) {
|
|
||||||
return type == other.getType() && bases.equals(other.getBases());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -60,11 +60,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
||||||
public boolean alwaysShowSecondBase = false;
|
public boolean alwaysShowSecondBase = false;
|
||||||
|
|
||||||
@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
//@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
||||||
public boolean qualsAsInts = false;
|
//public boolean qualsAsInts = false;
|
||||||
|
|
||||||
@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
|
//@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
|
||||||
public boolean IGNORE_UNCOVERED_BASES = false;
|
//public boolean IGNORE_UNCOVERED_BASES = false;
|
||||||
|
|
||||||
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
||||||
public boolean SHOW_INDEL_PILEUPS = false;
|
public boolean SHOW_INDEL_PILEUPS = false;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,126 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks, depristo
|
||||||
|
* Types of alleles:
|
||||||
|
*
|
||||||
|
* Ref: a t C g a // C is the reference base
|
||||||
|
*
|
||||||
|
* : a t G g a // C base is a G in some individuals
|
||||||
|
*
|
||||||
|
* : a t - g a // C base is deleted w.r.t. the reference
|
||||||
|
*
|
||||||
|
* : a t CAg a // A base is inserted w.r.t. the reference sequence
|
||||||
|
*
|
||||||
|
* In these cases, where are the alleles?
|
||||||
|
*
|
||||||
|
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
|
||||||
|
* 1 base deletion of C -> { C , - } -> C is the reference allele
|
||||||
|
* 1 base insertion of A -> { - ; A } -> NULL is the reference allele
|
||||||
|
*
|
||||||
|
* Suppose I see a the following in the population:
|
||||||
|
*
|
||||||
|
* Ref: a t C g a // C is the reference base
|
||||||
|
* : a t G g a // C base is a G in some individuals
|
||||||
|
* : a t - g a // C base is deleted w.r.t. the reference
|
||||||
|
*
|
||||||
|
* How do I represent this? There are three segregating alleles:
|
||||||
|
*
|
||||||
|
* { C , G , - }
|
||||||
|
*
|
||||||
|
* Now suppose I have this more complex example:
|
||||||
|
*
|
||||||
|
* Ref: a t C g a // C is the reference base
|
||||||
|
* : a t - g a
|
||||||
|
* : a t - - a
|
||||||
|
* : a t CAg a
|
||||||
|
*
|
||||||
|
* There are actually four segregating alleles:
|
||||||
|
*
|
||||||
|
* { C g , - g, - -, and CAg } over bases 2-4
|
||||||
|
*
|
||||||
|
* However, the molecular equivalence explicitly listed above is usually discarded, so the actual
|
||||||
|
* segregating alleles are:
|
||||||
|
*
|
||||||
|
* { C g, g, -, C a g }
|
||||||
|
*
|
||||||
|
* Critically, it should be possible to apply an allele to a reference sequence to create the
|
||||||
|
* correct haplotype sequence:
|
||||||
|
*
|
||||||
|
* Allele + reference => haplotype
|
||||||
|
*
|
||||||
|
* For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the
|
||||||
|
* Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context.
|
||||||
|
*
|
||||||
|
* Given list of alleles it's possible to determine the "type" of the variation
|
||||||
|
*
|
||||||
|
* A / C @ loc => SNP with
|
||||||
|
* - / A => INDEL
|
||||||
|
*
|
||||||
|
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
|
||||||
|
*/
|
||||||
|
public class Allele {
|
||||||
|
private boolean isRef = false;
|
||||||
|
private byte[] bases = null;
|
||||||
|
|
||||||
|
public Allele(byte[] bases, boolean isRef) {
|
||||||
|
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
||||||
|
this.isRef = isRef;
|
||||||
|
|
||||||
|
if ( bases == null )
|
||||||
|
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
|
||||||
|
|
||||||
|
this.bases = bases;
|
||||||
|
for ( byte b : bases ) {
|
||||||
|
if ( ! BaseUtils.isRegularBase(b) ) {
|
||||||
|
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** null allele creation method */
|
||||||
|
public Allele(boolean isRef) {
|
||||||
|
this("", isRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Allele(String bases, boolean isRef) {
|
||||||
|
this(bases.getBytes(), isRef);
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// accessor routines
|
||||||
|
//
|
||||||
|
//
|
||||||
|
public boolean isNullAllele() { return length() == 0; }
|
||||||
|
public boolean isNonNullAllele() { return ! isNullAllele(); }
|
||||||
|
|
||||||
|
public boolean isReference() { return isRef; }
|
||||||
|
public boolean isNonReference() { return ! isReference(); }
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the DNA bases segregating in this allele. Note this isn't reference polarized,
|
||||||
|
* so the Null allele is represented by a vector of length 0
|
||||||
|
*
|
||||||
|
* @return the segregating bases
|
||||||
|
*/
|
||||||
|
public byte[] getBases() { return bases; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param other the other allele
|
||||||
|
*
|
||||||
|
* @return true if these alleles are equal
|
||||||
|
*/
|
||||||
|
public boolean equals(Allele other) {
|
||||||
|
return Arrays.equals(bases, other.getBases());
|
||||||
|
}
|
||||||
|
|
||||||
|
public int length() {
|
||||||
|
return bases.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -10,11 +10,6 @@ import java.util.*;
|
||||||
* This class emcompasses all the basic information about a genotype
|
* This class emcompasses all the basic information about a genotype
|
||||||
*/
|
*/
|
||||||
public class Genotype {
|
public class Genotype {
|
||||||
|
|
||||||
public enum StandardAttributes {
|
|
||||||
DELETION_LENGTH, INVERSION_LENGTH
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<Allele> alleles;
|
private List<Allele> alleles;
|
||||||
|
|
||||||
private double negLog10PError;
|
private double negLog10PError;
|
||||||
|
|
@ -32,9 +27,9 @@ public class Genotype {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the alleles for this genotype
|
* @return the alleles for this genotype
|
||||||
*/
|
*/
|
||||||
public List<Allele> getAlleles() { return alleles; }
|
public List<Allele> getAlleles() { return alleles; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the ploidy of this genotype
|
* @return the ploidy of this genotype
|
||||||
|
|
@ -77,10 +72,10 @@ public class Genotype {
|
||||||
* @return true if all alleles for this genotype are SNPs or reference
|
* @return true if all alleles for this genotype are SNPs or reference
|
||||||
*/
|
*/
|
||||||
public boolean isPointGenotype() {
|
public boolean isPointGenotype() {
|
||||||
for ( Allele allele : alleles ) {
|
// for ( Allele allele : alleles ) {
|
||||||
if ( allele.isVariant() && !allele.isSNP() )
|
// if ( allele.isVariant() && !allele.isSNP() )
|
||||||
return false;
|
// return false;
|
||||||
}
|
// }
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -88,21 +83,21 @@ public class Genotype {
|
||||||
* @return true if this is a variant genotype, false if it's reference
|
* @return true if this is a variant genotype, false if it's reference
|
||||||
*/
|
*/
|
||||||
public boolean isVariant() {
|
public boolean isVariant() {
|
||||||
for ( Allele allele : alleles ) {
|
// for ( Allele allele : alleles ) {
|
||||||
if ( allele.isVariant() )
|
// if ( allele.isVariant() )
|
||||||
return true;
|
// return true;
|
||||||
}
|
// }
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the -1 * log10-based error estimate
|
* @return the -1 * log10-based error estimate
|
||||||
*/
|
*/
|
||||||
public double getNegLog10PError() { return negLog10PError; }
|
public double getNegLog10PError() { return negLog10PError; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the sample name
|
* @return the sample name
|
||||||
*/
|
*/
|
||||||
public String getSample() { return sample; }
|
public String getSample() { return sample; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -0,0 +1,440 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import org.apache.commons.jexl.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks
|
||||||
|
* <p/>
|
||||||
|
* Class VariantContext
|
||||||
|
* <p/>
|
||||||
|
* This class represents a context that unifies one or more variants
|
||||||
|
*/
|
||||||
|
public class VariantContext {
|
||||||
|
private GenomeLoc loc;
|
||||||
|
|
||||||
|
private Set<Allele> alleles = new HashSet<Allele>();
|
||||||
|
|
||||||
|
private Set<Genotype> genotypes = new HashSet<Genotype>();
|
||||||
|
|
||||||
|
private HashMap<Object, Object> attributes = new HashMap<Object, Object>();
|
||||||
|
|
||||||
|
Type type = null;
|
||||||
|
|
||||||
|
private double negLog10PError = 0.0; // todo - fixme
|
||||||
|
|
||||||
|
/** Have we checked this VariantContext already? */
|
||||||
|
private boolean validatedP = false;
|
||||||
|
|
||||||
|
// public VariantContext(VariationRod rod) {
|
||||||
|
//
|
||||||
|
// // TODO -- VariationRod should eventually require classes to implement toVariationContext()
|
||||||
|
// // TODO -- (instead of using a temporary adapter class)
|
||||||
|
//
|
||||||
|
// loc = rod.getLocation();
|
||||||
|
// reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
|
||||||
|
//
|
||||||
|
// // TODO -- populate the alleles and genotypes through an adapter
|
||||||
|
// alleles = new HashSet<Allele>();
|
||||||
|
// genotypes = new HashSet<Genotype>();
|
||||||
|
//
|
||||||
|
// attributes = new HashMap<Object, Object>();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// constructors
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public VariantContext(GenomeLoc loc) {
|
||||||
|
if ( loc == null ) { throw new StingException("GenomeLoc cannot be null"); }
|
||||||
|
|
||||||
|
this.loc = loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected VariantContext(VariantContext parent, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
||||||
|
this(parent.getLocation(), parent.getAlleles(), genotypes, attributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
||||||
|
this(loc);
|
||||||
|
|
||||||
|
// todo -- add extensive testing here
|
||||||
|
|
||||||
|
// todo -- check that exactly one allele is tagged as reference
|
||||||
|
|
||||||
|
this.alleles = new HashSet<Allele>(alleles);
|
||||||
|
this.genotypes = new HashSet<Genotype>(genotypes);
|
||||||
|
this.attributes = new HashMap<Object, Object>(attributes);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// type operations
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* see: http://www.ncbi.nlm.nih.gov/bookshelf/br.fcgi?book=handbook&part=ch5&rendertype=table&id=ch5.ch5_t3
|
||||||
|
*
|
||||||
|
* Format:
|
||||||
|
* dbSNP variation class
|
||||||
|
* Rules for assigning allele classes
|
||||||
|
* Sample allele definition
|
||||||
|
*
|
||||||
|
* Single Nucleotide Polymorphisms (SNPs)a
|
||||||
|
* Strictly defined as single base substitutions involving A, T, C, or G.
|
||||||
|
* A/T
|
||||||
|
*
|
||||||
|
* Deletion/Insertion Polymorphisms (DIPs)
|
||||||
|
* Designated using the full sequence of the insertion as one allele, and either a fully
|
||||||
|
* defined string for the variant allele or a Ò-Ó character to specify the deleted allele.
|
||||||
|
* This class will be assigned to a variation if the variation alleles are of different lengths or
|
||||||
|
* if one of the alleles is deleted (Ò-Ó).
|
||||||
|
* T/-/CCTA/G
|
||||||
|
*
|
||||||
|
* No-variation
|
||||||
|
* Reports may be submitted for segments of sequence that are assayed and determined to be invariant
|
||||||
|
* in the sample.
|
||||||
|
* (NoVariation)
|
||||||
|
*
|
||||||
|
* Mixed
|
||||||
|
* Mix of other classes
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* Not currently supported:
|
||||||
|
*
|
||||||
|
* Heterozygous sequencea
|
||||||
|
* The term heterozygous is used to specify a region detected by certain methods that do not
|
||||||
|
* resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
|
||||||
|
* sequence must be provided to define a sequence context for the variation.
|
||||||
|
* (heterozygous)
|
||||||
|
*
|
||||||
|
* Microsatellite or short tandem repeat (STR)
|
||||||
|
* Alleles are designated by providing the repeat motif and the copy number for each allele.
|
||||||
|
* Expansion of the allele repeat motif designated in dbSNP into full-length sequence will
|
||||||
|
* be only an approximation of the true genomic sequence because many microsatellite markers are
|
||||||
|
* not fully sequenced and are resolved as size variants only.
|
||||||
|
* (CAC)8/9/10/11
|
||||||
|
*
|
||||||
|
* Named variant
|
||||||
|
* Applies to insertion/deletion polymorphisms of longer sequence features, such as retroposon
|
||||||
|
* dimorphism for Alu or line elements. These variations frequently include a deletion Ò-Ó indicator
|
||||||
|
* for the absent allele.
|
||||||
|
* (alu) / -
|
||||||
|
*
|
||||||
|
* Multi-Nucleotide Polymorphism (MNP)
|
||||||
|
* Assigned to variations that are multi-base variations of a single, common length
|
||||||
|
* GGA/AGT
|
||||||
|
*/
|
||||||
|
|
||||||
|
public enum Type {
|
||||||
|
NO_VARIATION,
|
||||||
|
SNP,
|
||||||
|
INDEL,
|
||||||
|
MIXED
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convenience method for switching over the allele type
|
||||||
|
*
|
||||||
|
* @return the AlleleType of this allele
|
||||||
|
**/
|
||||||
|
public Type getType() {
|
||||||
|
if ( type == null )
|
||||||
|
determineType();
|
||||||
|
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convenience method for SNPs
|
||||||
|
*
|
||||||
|
* @return true if this is a SNP, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean isSNP() { return getType() == Type.SNP; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convenience method for variants
|
||||||
|
*
|
||||||
|
* @return true if this is a variant allele, false if it's reference
|
||||||
|
*/
|
||||||
|
public boolean isVariant() { return getType() != Type.NO_VARIATION; }
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convenience method for indels
|
||||||
|
*
|
||||||
|
* @return true if this is an indel, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean isIndel() { return getType() == Type.INDEL; }
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* convenience method for indels
|
||||||
|
*
|
||||||
|
* @return true if this is an mixed variation, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Generic accessors
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the location of this context
|
||||||
|
*/
|
||||||
|
public GenomeLoc getLocation() { return loc; }
|
||||||
|
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Working with alleles
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the reference allele for this context
|
||||||
|
*/
|
||||||
|
public Allele getReference() {
|
||||||
|
for ( Allele allele : getAlleles() )
|
||||||
|
if ( allele.isReference() )
|
||||||
|
return allele;
|
||||||
|
|
||||||
|
throw new StingException("BUG: no reference allele found at " + this);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if the context is strictly bi-allelic
|
||||||
|
*/
|
||||||
|
public boolean isBiallelic() {
|
||||||
|
//return getAlternateAlleles().size() == 1;
|
||||||
|
return getAlleles().size() == 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the alleles. This method should return all of the alleles present at the location,
|
||||||
|
* including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||||
|
* in the set. If the reference is not an allele in this context it will not be included.
|
||||||
|
*
|
||||||
|
* @return the set of alleles
|
||||||
|
*/
|
||||||
|
public Set<Allele> getAlleles() { return alleles; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the alternate alleles. This method should return all the alleles present at the location,
|
||||||
|
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||||
|
* in the set.
|
||||||
|
*
|
||||||
|
* @return the set of alternate alleles
|
||||||
|
*/
|
||||||
|
public Set<Allele> getAlternateAlleles() {
|
||||||
|
HashSet<Allele> altAlleles = new HashSet<Allele>();
|
||||||
|
for ( Allele allele : alleles ) {
|
||||||
|
if ( allele.isNonReference() )
|
||||||
|
altAlleles.add(allele);
|
||||||
|
}
|
||||||
|
|
||||||
|
return altAlleles;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Working with genotypes
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if the context represents variants with associated genotypes
|
||||||
|
*/
|
||||||
|
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return set of all Genotypes associated with this context
|
||||||
|
*/
|
||||||
|
|
||||||
|
// todo -- genotypes should really be stored as map, not set
|
||||||
|
public Set<Genotype> getGenotypes() { return genotypes; }
|
||||||
|
|
||||||
|
public Map<String, Genotype> getGenotypeMap() {
|
||||||
|
HashMap<String, Genotype> map = new HashMap<String, Genotype>();
|
||||||
|
for ( Genotype g : genotypes )
|
||||||
|
map.put(g.getSample(), g);
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the set of all sample names in this context
|
||||||
|
*/
|
||||||
|
public Set<String> getSampleNames() {
|
||||||
|
return getGenotypeMap().keySet();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param sample the sample name
|
||||||
|
*
|
||||||
|
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
||||||
|
*/
|
||||||
|
public Genotype getGenotype(String sample) {
|
||||||
|
return getGenotypeMap().get(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Working with attributes
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// todo -- refactor into AttributedObject and have VariantContext and Genotype inherit from them
|
||||||
|
|
||||||
|
// todo -- define common attributes as enum
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the given attribute
|
||||||
|
*
|
||||||
|
* @param key the attribute key
|
||||||
|
* @param value the attribute value
|
||||||
|
*/
|
||||||
|
public void putAttribute(Object key, Object value) {
|
||||||
|
attributes.put(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void putAttributes(Map<? extends Object, Object> map) {
|
||||||
|
attributes.putAll(map);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasAttribute(Object key) {
|
||||||
|
return attributes.containsKey(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getNumAttributes() {
|
||||||
|
return attributes.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param key the attribute key
|
||||||
|
*
|
||||||
|
* @return the attribute value for the given key (or null if not set)
|
||||||
|
*/
|
||||||
|
public Object getAttribute(Object key) {
|
||||||
|
return attributes.get(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getAttribute(Object key, Object defaultValue) {
|
||||||
|
if ( hasAttribute(key) )
|
||||||
|
return attributes.get(key);
|
||||||
|
else
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAttributeAsString(Object key) { return (String)getAttribute(key); }
|
||||||
|
public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); }
|
||||||
|
public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); }
|
||||||
|
|
||||||
|
public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
|
||||||
|
public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
|
||||||
|
public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the attribute map
|
||||||
|
*/
|
||||||
|
public Map<Object, Object> getAttributes() {
|
||||||
|
return attributes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// validation
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To be called by any modifying routines
|
||||||
|
*/
|
||||||
|
private void invalidate() { validatedP = false; }
|
||||||
|
|
||||||
|
public boolean validate() {
|
||||||
|
return validate(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean validate(boolean throwException) {
|
||||||
|
if ( ! validatedP ) {
|
||||||
|
boolean valid = false;
|
||||||
|
// todo -- add extensive validation checking here
|
||||||
|
if ( valid ) {
|
||||||
|
validatedP = valid;
|
||||||
|
} else if ( throwException ) {
|
||||||
|
throw new StingException(this + " failed validation");
|
||||||
|
}
|
||||||
|
|
||||||
|
return valid;
|
||||||
|
} else {
|
||||||
|
return validatedP;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// utility routines
|
||||||
|
//
|
||||||
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private void determineType() {
|
||||||
|
if ( type == null ) {
|
||||||
|
// todo -- figure out the variation type
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo -- toString() method
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
|
||||||
|
*/
|
||||||
|
// public boolean isPointAllele() {
|
||||||
|
// for ( Allele allele : alleles ) {
|
||||||
|
// if ( allele.isVariant() && !allele.isSNP() )
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
|
||||||
|
// /**
|
||||||
|
// * @return set of all subclasses within this context
|
||||||
|
// */
|
||||||
|
// public Set<Object> getSubclasses() {
|
||||||
|
// Set<Object> subclasses = new HashSet<Object>();
|
||||||
|
// for ( Genotype g : genotypes )
|
||||||
|
// subclasses.addAll(g.getAttributes().keySet());
|
||||||
|
// return subclasses;
|
||||||
|
// }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param allele the allele to be queried
|
||||||
|
*
|
||||||
|
* @return the frequency of the given allele in this context
|
||||||
|
*/
|
||||||
|
public double getAlleleFrequency(Allele allele) {
|
||||||
|
int alleleCount = 0;
|
||||||
|
int totalCount = 0;
|
||||||
|
|
||||||
|
for ( Genotype g : genotypes ) {
|
||||||
|
for ( Allele a : g.getAlleles() ) {
|
||||||
|
totalCount++;
|
||||||
|
if ( allele.equals(a) )
|
||||||
|
alleleCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,104 @@
|
||||||
|
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
import org.apache.commons.jexl.*;
|
||||||
|
|
||||||
|
|
||||||
|
public class VariantContextUtils {
|
||||||
|
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param other another variant context
|
||||||
|
*
|
||||||
|
* throws an exception if there is a collision such that the same sample exists in both contexts
|
||||||
|
* @return a context representing the merge of this context and the other provided context
|
||||||
|
*/
|
||||||
|
public VariantContext merge(VariantContext left, VariantContext other) {
|
||||||
|
return merge(left, other, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param other another variant context
|
||||||
|
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
|
||||||
|
* the samples will be uniquified(based on their sources);
|
||||||
|
* otherwise, an exception will be thrown
|
||||||
|
*
|
||||||
|
* @return a context representing the merge of this context and the other provided context
|
||||||
|
*/
|
||||||
|
public VariantContext merge(VariantContext left, VariantContext other, boolean uniquifySamples) {
|
||||||
|
// todo -- make functional
|
||||||
|
|
||||||
|
if ( !left.getLocation().equals(other.getLocation()) )
|
||||||
|
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
|
||||||
|
|
||||||
|
Set<String> samples = left.getSampleNames();
|
||||||
|
Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes());
|
||||||
|
|
||||||
|
for ( Genotype g : other.getGenotypes() ) {
|
||||||
|
if ( samples.contains(g.getSample()) ) {
|
||||||
|
if ( uniquifySamples )
|
||||||
|
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
|
||||||
|
else
|
||||||
|
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
|
||||||
|
}
|
||||||
|
Gs.add(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
HashMap<Object, Object> attrs = new HashMap<Object, Object>(left.getAttributes());
|
||||||
|
attrs.putAll(other.getAttributes());
|
||||||
|
|
||||||
|
return new VariantContext(left, Gs, attrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param subclass the name of a subclass of variants to select
|
||||||
|
*
|
||||||
|
* @return a subset of this context which selects based on the given subclass
|
||||||
|
*/
|
||||||
|
// public VariantContextUtils select(String subclass) {
|
||||||
|
// HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||||
|
// for ( Genotype g : genotypes ) {
|
||||||
|
// if ( g.getAttribute(subclass) != null )
|
||||||
|
// Gs.add(g);
|
||||||
|
// }
|
||||||
|
// return createNewContext(Gs, attributes);
|
||||||
|
// }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param expr a jexl expression describing how to filter this context
|
||||||
|
*
|
||||||
|
* @return a subset of this context which is filtered based on the given expression
|
||||||
|
*/
|
||||||
|
// public VariantContextUtils filter(String expr) {
|
||||||
|
// HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||||
|
// try {
|
||||||
|
// Expression filterExpression = ExpressionFactory.createExpression(expr);
|
||||||
|
//
|
||||||
|
// for ( Genotype g : genotypes ) {
|
||||||
|
// JexlContext jContext = JexlHelper.createContext();
|
||||||
|
// jContext.setVars(g.getAttributes());
|
||||||
|
// if ( (Boolean)filterExpression.evaluate(jContext) )
|
||||||
|
// Gs.add(g);
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// } catch (Exception e) {
|
||||||
|
// throw new StingException("JEXL error in VariantContext: " + e.getMessage());
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return createNewContext(Gs, attributes);
|
||||||
|
// }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a set of new variant contexts, one for each sample from this context
|
||||||
|
*/
|
||||||
|
// public Set<VariantContextUtils> splitBySample() {
|
||||||
|
// Set<VariantContextUtils> contexts = new HashSet<VariantContextUtils>();
|
||||||
|
// for ( Genotype g : genotypes ) {
|
||||||
|
// HashSet<Genotype> gAsSet = new HashSet<Genotype>();
|
||||||
|
// gAsSet.add(g);
|
||||||
|
// contexts.add(createNewContext(gAsSet, attributes));
|
||||||
|
// }
|
||||||
|
// return contexts;
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
@ -1,243 +1,243 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
//package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
//
|
||||||
import org.broadinstitute.sting.BaseTest;
|
//import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
//import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
//import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
//import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
//import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.junit.BeforeClass;
|
//import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
//import org.junit.Test;
|
||||||
import org.junit.Assert;
|
//import org.junit.Assert;
|
||||||
|
//
|
||||||
import java.io.File;
|
//import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
//import java.io.FileNotFoundException;
|
||||||
import java.io.BufferedReader;
|
//import java.io.BufferedReader;
|
||||||
import java.io.FileReader;
|
//import java.io.FileReader;
|
||||||
import java.util.*;
|
//import java.util.*;
|
||||||
|
//
|
||||||
/**
|
///**
|
||||||
* Created by IntelliJ IDEA.
|
// * Created by IntelliJ IDEA.
|
||||||
* User: Ghost
|
// * User: Ghost
|
||||||
* Date: Jan 22, 2010
|
// * Date: Jan 22, 2010
|
||||||
* Time: 11:27:33 PM
|
// * Time: 11:27:33 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
// * To change this template use File | Settings | File Templates.
|
||||||
*/
|
// */
|
||||||
public class PlinkRodTest extends BaseTest {
|
//public class PlinkRodTest extends BaseTest {
|
||||||
private static IndexedFastaSequenceFile seq;
|
// private static IndexedFastaSequenceFile seq;
|
||||||
|
//
|
||||||
@BeforeClass
|
// @BeforeClass
|
||||||
public static void beforeTests() {
|
// public static void beforeTests() {
|
||||||
try {
|
// try {
|
||||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
// seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||||
} catch (FileNotFoundException e) {
|
// } catch (FileNotFoundException e) {
|
||||||
throw new StingException("unable to load the sequence dictionary");
|
// throw new StingException("unable to load the sequence dictionary");
|
||||||
}
|
// }
|
||||||
GenomeLocParser.setupRefContigOrdering(seq);
|
// GenomeLocParser.setupRefContigOrdering(seq);
|
||||||
|
//
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
public BufferedReader openFile(String filename) {
|
// public BufferedReader openFile(String filename) {
|
||||||
try {
|
// try {
|
||||||
return new BufferedReader(new FileReader(filename));
|
// return new BufferedReader(new FileReader(filename));
|
||||||
} catch (FileNotFoundException e) {
|
// } catch (FileNotFoundException e) {
|
||||||
throw new StingException("Couldn't open file " + filename);
|
// throw new StingException("Couldn't open file " + filename);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
@Test
|
// @Test
|
||||||
public void testStandardPedFile() {
|
// public void testStandardPedFile() {
|
||||||
PlinkRod rod = new PlinkRod("test");
|
// PlinkRod rod = new PlinkRod("test");
|
||||||
try {
|
// try {
|
||||||
rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
|
// rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
|
||||||
} catch ( FileNotFoundException e ) {
|
// } catch ( FileNotFoundException e ) {
|
||||||
throw new StingException("test file for testStandardPedFile() does not exist",e);
|
// throw new StingException("test file for testStandardPedFile() does not exist",e);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
// test that the sample names are correct
|
// // test that the sample names are correct
|
||||||
|
//
|
||||||
List<String> rodNames = rod.getVariantSampleNames();
|
// List<String> rodNames = rod.getVariantSampleNames();
|
||||||
List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
|
// List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
|
||||||
|
//
|
||||||
Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
|
// Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
|
||||||
|
//
|
||||||
boolean namesCorrect = true;
|
// boolean namesCorrect = true;
|
||||||
for ( int i = 0; i < expectedNames.size(); i++ ) {
|
// for ( int i = 0; i < expectedNames.size(); i++ ) {
|
||||||
namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
|
// namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
|
// Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
|
||||||
|
//
|
||||||
// test that rod can be iterated over
|
// // test that rod can be iterated over
|
||||||
|
//
|
||||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||||
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
||||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||||
do {
|
// do {
|
||||||
genotypesInRod.add(rod.getGenotypes());
|
// genotypesInRod.add(rod.getGenotypes());
|
||||||
sampleNamesInRod.add(rod.getVariantSampleNames());
|
// sampleNamesInRod.add(rod.getVariantSampleNames());
|
||||||
lociInRod.add(rod.getLocation());
|
// lociInRod.add(rod.getLocation());
|
||||||
} while ( rod.parseLine(null,null) );
|
// } while ( rod.parseLine(null,null) );
|
||||||
|
//
|
||||||
Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
|
// Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
|
||||||
|
//
|
||||||
ArrayList<Genotype> snp1 = genotypesInRod.get(0);
|
// ArrayList<Genotype> snp1 = genotypesInRod.get(0);
|
||||||
ArrayList<Genotype> snp3 = genotypesInRod.get(2);
|
// ArrayList<Genotype> snp3 = genotypesInRod.get(2);
|
||||||
|
//
|
||||||
Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
|
// Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
|
||||||
Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
|
// Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
|
||||||
Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
|
// Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
|
||||||
|
//
|
||||||
|
//
|
||||||
List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
|
// List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
|
||||||
List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
|
// List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
|
||||||
|
//
|
||||||
String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
|
// String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
|
||||||
String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
|
// String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
|
||||||
|
//
|
||||||
Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
|
// Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
|
||||||
Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
|
// Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
|
||||||
|
//
|
||||||
boolean name2isSame = true;
|
// boolean name2isSame = true;
|
||||||
|
//
|
||||||
for ( ArrayList<String> names : sampleNamesInRod ) {
|
// for ( ArrayList<String> names : sampleNamesInRod ) {
|
||||||
name2isSame = name2isSame && names.get(1).equals("NAMELY");
|
// name2isSame = name2isSame && names.get(1).equals("NAMELY");
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
|
// Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
|
||||||
|
//
|
||||||
// test that the loci are correctly parsed and in order
|
// // test that the loci are correctly parsed and in order
|
||||||
|
//
|
||||||
List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
|
// List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
|
||||||
boolean lociCorrect = true;
|
// boolean lociCorrect = true;
|
||||||
for ( int i = 0; i < 3; i ++ ) {
|
// for ( int i = 0; i < 3; i ++ ) {
|
||||||
lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
|
// lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
@Test
|
// @Test
|
||||||
public void testStandardPedFileWithIndels() {
|
// public void testStandardPedFileWithIndels() {
|
||||||
PlinkRod rod = new PlinkRod("test");
|
// PlinkRod rod = new PlinkRod("test");
|
||||||
try {
|
// try {
|
||||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
|
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
|
||||||
} catch ( FileNotFoundException e) {
|
// } catch ( FileNotFoundException e) {
|
||||||
throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
|
// throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
// Iterate through the rod
|
// // Iterate through the rod
|
||||||
|
//
|
||||||
List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
// List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||||
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
||||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||||
ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
|
// ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
|
||||||
do {
|
// do {
|
||||||
genotypesInRod.add(rod.getGenotypes());
|
// genotypesInRod.add(rod.getGenotypes());
|
||||||
sampleNamesInRod.add(rod.getVariantSampleNames());
|
// sampleNamesInRod.add(rod.getVariantSampleNames());
|
||||||
lociInRod.add(rod.getLocation());
|
// lociInRod.add(rod.getLocation());
|
||||||
snpSites.add(rod.variantIsSNP());
|
// snpSites.add(rod.variantIsSNP());
|
||||||
} while ( rod.parseLine(null,null) );
|
// } while ( rod.parseLine(null,null) );
|
||||||
|
//
|
||||||
boolean snpOrder = true;
|
// boolean snpOrder = true;
|
||||||
List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
|
// List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
|
||||||
for ( int i = 0; i < 4; i ++ ) {
|
// for ( int i = 0; i < 4; i ++ ) {
|
||||||
snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
|
// snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
Assert.assertTrue("That the variant type order is as expected", snpOrder);
|
// Assert.assertTrue("That the variant type order is as expected", snpOrder);
|
||||||
Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
|
// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
|
||||||
Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
|
// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
|
||||||
Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
|
// Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
|
||||||
Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
|
// Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
|
||||||
Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
|
// Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
|
||||||
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
|
// Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
|
||||||
Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
|
// Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
@Test
|
// @Test
|
||||||
public void testBinaryPedFileNoIndels() {
|
// public void testBinaryPedFileNoIndels() {
|
||||||
PlinkRod rod = new PlinkRod("binaryTest1");
|
// PlinkRod rod = new PlinkRod("binaryTest1");
|
||||||
try {
|
// try {
|
||||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
|
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
|
||||||
} catch (FileNotFoundException e) {
|
// } catch (FileNotFoundException e) {
|
||||||
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
// iterate through the ROD and get stuff
|
// // iterate through the ROD and get stuff
|
||||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||||
ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
|
// ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
|
||||||
|
//
|
||||||
do {
|
// do {
|
||||||
lociInRod.add(rod.getLocation());
|
// lociInRod.add(rod.getLocation());
|
||||||
genotypesInRod.add(rod.getGenotypes());
|
// genotypesInRod.add(rod.getGenotypes());
|
||||||
samplesInRod.add(rod.getVariantSampleNames());
|
// samplesInRod.add(rod.getVariantSampleNames());
|
||||||
} while ( rod.parseLine(null,null) );
|
// } while ( rod.parseLine(null,null) );
|
||||||
|
//
|
||||||
List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
|
// List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
|
||||||
|
//
|
||||||
for ( int i = 0; i < expecLoc.size(); i ++ ) {
|
// for ( int i = 0; i < expecLoc.size(); i ++ ) {
|
||||||
Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
|
// Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
|
// List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
|
||||||
"GG","GG","AG","TT","CC","CT","TG","GG","GG");
|
// "GG","GG","AG","TT","CC","CT","TG","GG","GG");
|
||||||
List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
|
// List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
|
||||||
false,false,false,true,false,false,true,true,false,false);
|
// false,false,false,true,false,false,true,true,false,false);
|
||||||
List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
// List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||||
"NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
// "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||||
"NA12878","NA12890","NA07000");
|
// "NA12878","NA12890","NA07000");
|
||||||
int snpNo = 1;
|
// int snpNo = 1;
|
||||||
int indiv = 1;
|
// int indiv = 1;
|
||||||
int alleleOffset = 0;
|
// int alleleOffset = 0;
|
||||||
for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||||
for ( Genotype gen : snp ) {
|
// for ( Genotype gen : snp ) {
|
||||||
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||||
Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
|
// Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
|
||||||
"(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
|
// "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
|
||||||
Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
|
// Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
|
||||||
Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
|
// Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
|
||||||
indiv++;
|
// indiv++;
|
||||||
alleleOffset++;
|
// alleleOffset++;
|
||||||
}
|
// }
|
||||||
indiv = 1;
|
// indiv = 1;
|
||||||
snpNo++;
|
// snpNo++;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
@Test
|
// @Test
|
||||||
public void testIndelBinary() {
|
// public void testIndelBinary() {
|
||||||
PlinkRod rod = new PlinkRod("binaryTest2");
|
// PlinkRod rod = new PlinkRod("binaryTest2");
|
||||||
try {
|
// try {
|
||||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
|
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
|
||||||
} catch (FileNotFoundException e) {
|
// } catch (FileNotFoundException e) {
|
||||||
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||||
}
|
// }
|
||||||
|
//
|
||||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||||
do {
|
// do {
|
||||||
genotypesInRod.add(rod.getGenotypes());
|
// genotypesInRod.add(rod.getGenotypes());
|
||||||
} while ( rod.parseLine(null,null) );
|
// } while ( rod.parseLine(null,null) );
|
||||||
|
//
|
||||||
List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
|
// List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
|
||||||
"TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
|
// "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
|
||||||
List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
|
// List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
|
||||||
false,false,false,false,false,false,true,false,true,false,false,false);
|
// false,false,false,false,false,false,true,false,true,false,false,false);
|
||||||
List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
|
// List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
|
||||||
false,false,false,false,false,false,false,false,false,false,false,false);
|
// false,false,false,false,false,false,false,false,false,false,false,false);
|
||||||
|
//
|
||||||
int al = 0;
|
// int al = 0;
|
||||||
for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||||
for ( Genotype gen : snp ) {
|
// for ( Genotype gen : snp ) {
|
||||||
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||||
Allele firstAl = gen.getAlleles().get(0);
|
// Allele firstAl = gen.getAlleles().get(0);
|
||||||
Allele secondAl = gen.getAlleles().get(1);
|
// Allele secondAl = gen.getAlleles().get(1);
|
||||||
boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
|
// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
|
||||||
boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
|
// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
|
||||||
Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
|
// Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
|
||||||
Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
|
// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
|
||||||
Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
|
// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
|
||||||
al++;
|
// al++;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue