Refactoring and migration of new allele/variantcontext/genotype code into oneoffprojects. NOT FOR USE. PlinkRod commented out due to dependence on this new, rapidly changing interface.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2687 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3380b6ebe8
commit
c231547204
|
|
@ -1,315 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
||||
import java.util.*;
|
||||
import org.apache.commons.jexl.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VariantContext
|
||||
* <p/>
|
||||
* This class represents a context that unifies one or more variants
|
||||
*/
|
||||
public class VariantContext {
|
||||
|
||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
private Set<Allele> alleles;
|
||||
|
||||
private Set<Genotype> genotypes;
|
||||
|
||||
private Allele reference;
|
||||
|
||||
private GenomeLoc loc;
|
||||
|
||||
private HashMap<Object, Object> attributes;
|
||||
|
||||
|
||||
public VariantContext(VariationRod rod) {
|
||||
|
||||
// TODO -- VariationRod should eventually require classes to implement toVariationContext()
|
||||
// TODO -- (instead of using a temporary adapter class)
|
||||
|
||||
loc = rod.getLocation();
|
||||
reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
|
||||
|
||||
// TODO -- populate the alleles and genotypes through an adapter
|
||||
alleles = new HashSet<Allele>();
|
||||
genotypes = new HashSet<Genotype>();
|
||||
|
||||
attributes = new HashMap<Object, Object>();
|
||||
}
|
||||
|
||||
protected VariantContext(GenomeLoc loc, Allele reference, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
||||
this.loc = loc;
|
||||
this.reference = reference;
|
||||
this.alleles = new HashSet<Allele>(alleles);
|
||||
this.genotypes = new HashSet<Genotype>(genotypes);
|
||||
this.attributes = new HashMap<Object, Object>(attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
*
|
||||
* throws an exception if there is a collision such that the same sample exists in both contexts
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
public VariantContext merge(VariantContext other) {
|
||||
return merge(other, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
|
||||
* the samples will be uniquified(based on their sources);
|
||||
* otherwise, an exception will be thrown
|
||||
*
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
public VariantContext merge(VariantContext other, boolean uniquifySamples) {
|
||||
if ( !loc.equals(other.getLocation()) )
|
||||
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
|
||||
|
||||
Set<String> samples = getSampleNames();
|
||||
Set<Genotype> Gs = new HashSet<Genotype>(genotypes);
|
||||
|
||||
for ( Genotype g : other.getGenotypes() ) {
|
||||
if ( samples.contains(g.getSample()) ) {
|
||||
if ( uniquifySamples )
|
||||
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
|
||||
else
|
||||
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
|
||||
}
|
||||
Gs.add(g);
|
||||
}
|
||||
|
||||
HashMap<Object, Object> attrs = new HashMap<Object, Object>(attributes);
|
||||
attrs.putAll(other.getAttributes());
|
||||
|
||||
return createNewContext(Gs, attrs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the location of this context
|
||||
*/
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
/**
|
||||
* @return the reference allele for this context
|
||||
*/
|
||||
public Allele getReference() { return reference; }
|
||||
|
||||
/**
|
||||
* @return true if the context is variant (i.e. contains a non-reference allele)
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context is strictly bi-allelic
|
||||
*/
|
||||
public boolean isBiallelic() {
|
||||
return getAlternateAlleles().size() == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
|
||||
*/
|
||||
public boolean isPointAllele() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() && !allele.isSNP() )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the set of all sample names in this context
|
||||
*/
|
||||
public Set<String> getSampleNames() {
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
for ( Genotype g : genotypes )
|
||||
samples.add(g.getSample());
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context represents variants with associated genotypes
|
||||
*/
|
||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||
|
||||
/**
|
||||
* @return set of all Genotypes associated with this context
|
||||
*/
|
||||
public Set<Genotype> getGenotypes() { return genotypes; }
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
||||
*/
|
||||
public Genotype getGenotype(String sample) {
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( g.getSample().equals(sample) )
|
||||
return g;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return set of all subclasses within this context
|
||||
*/
|
||||
public Set<Object> getSubclasses() {
|
||||
Set<Object> subclasses = new HashSet<Object>();
|
||||
for ( Genotype g : genotypes )
|
||||
subclasses.addAll(g.getAttributes().keySet());
|
||||
return subclasses;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param subclass the name of a subclass of variants to select
|
||||
*
|
||||
* @return a subset of this context which selects based on the given subclass
|
||||
*/
|
||||
public VariantContext select(String subclass) {
|
||||
HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
for ( Genotype g : genotypes ) {
|
||||
if ( g.getAttribute(subclass) != null )
|
||||
Gs.add(g);
|
||||
}
|
||||
return createNewContext(Gs, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param expr a jexl expression describing how to filter this context
|
||||
*
|
||||
* @return a subset of this context which is filtered based on the given expression
|
||||
*/
|
||||
public VariantContext filter(String expr) {
|
||||
HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
try {
|
||||
Expression filterExpression = ExpressionFactory.createExpression(expr);
|
||||
|
||||
for ( Genotype g : genotypes ) {
|
||||
JexlContext jContext = JexlHelper.createContext();
|
||||
jContext.setVars(g.getAttributes());
|
||||
if ( (Boolean)filterExpression.evaluate(jContext) )
|
||||
Gs.add(g);
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new StingException("JEXL error in VariantContext: " + e.getMessage());
|
||||
}
|
||||
|
||||
return createNewContext(Gs, attributes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a set of new variant contexts, one for each sample from this context
|
||||
*/
|
||||
public Set<VariantContext> splitBySample() {
|
||||
Set<VariantContext> contexts = new HashSet<VariantContext>();
|
||||
for ( Genotype g : genotypes ) {
|
||||
HashSet<Genotype> gAsSet = new HashSet<Genotype>();
|
||||
gAsSet.add(g);
|
||||
contexts.add(createNewContext(gAsSet, attributes));
|
||||
}
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param Gs the set of genotypes from which to create a new context
|
||||
* @param attrs the attributes for the new context
|
||||
*
|
||||
* @return a new context based on the given genotypes
|
||||
*/
|
||||
private VariantContext createNewContext(Set<Genotype> Gs, HashMap<Object, Object> attrs) {
|
||||
HashSet<Allele> As = new HashSet<Allele>();
|
||||
for ( Genotype g : Gs )
|
||||
As.addAll(g.getAlleles());
|
||||
return new VariantContext(loc, reference, As, Gs, attrs);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param allele the allele to be queried
|
||||
*
|
||||
* @return the frequency of the given allele in this context
|
||||
*/
|
||||
public double getAlleleFrequency(Allele allele) {
|
||||
|
||||
int alleleCount = 0;
|
||||
int totalCount = 0;
|
||||
|
||||
for ( Genotype g : genotypes ) {
|
||||
for ( Allele a : g.getAlleles() ) {
|
||||
totalCount++;
|
||||
if ( allele.equals(a) )
|
||||
alleleCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the alleles. This method should return all of the alleles present at the location,
|
||||
* including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
* in the set. If the reference is not an allele in this context it will not be included.
|
||||
*
|
||||
* @return the set of alleles
|
||||
*/
|
||||
public Set<Allele> getAlleles() { return alleles; }
|
||||
|
||||
/**
|
||||
* Gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
* in the set.
|
||||
*
|
||||
* @return the set of alternate alleles
|
||||
*/
|
||||
public Set<Allele> getAlternateAlleles() {
|
||||
HashSet<Allele> altAlleles = new HashSet<Allele>();
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( !allele.equals(reference) )
|
||||
altAlleles.add(allele);
|
||||
}
|
||||
return altAlleles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given attribute
|
||||
*
|
||||
* @param key the attribute key
|
||||
* @param value the attribute value
|
||||
*/
|
||||
public void setAttribute(Object key, Object value) {
|
||||
attributes.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the attribute key
|
||||
*
|
||||
* @return the attribute value for the given key (or null if not set)
|
||||
*/
|
||||
public Object getAttribute(Object key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the attribute map
|
||||
*/
|
||||
public Map<Object, Object> getAttributes() {
|
||||
return attributes;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class Allele
|
||||
* <p/>
|
||||
* This class emcompasses all the basic information about an allele
|
||||
*/
|
||||
public class Allele {
|
||||
|
||||
private AlleleType type;
|
||||
|
||||
private String bases;
|
||||
|
||||
|
||||
// the types of variants we currently allow
|
||||
public enum AlleleType {
|
||||
REFERENCE, SNP, INSERTION, DELETION, INVERSION, UNKNOWN_POINT_ALLELE, DELETION_REFERENCE
|
||||
}
|
||||
|
||||
public Allele(AlleleType type, String bases) {
|
||||
this.type = type;
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null");
|
||||
if ( type == AlleleType.DELETION && bases.length() > 0 )
|
||||
throw new IllegalArgumentException("Constructor: deletions cannot have observed bases");
|
||||
if ( (type == AlleleType.REFERENCE || type == AlleleType.SNP || type == AlleleType.UNKNOWN_POINT_ALLELE) && bases.length() > 1 )
|
||||
throw new IllegalArgumentException("Constructor: point alleles cannot have more than one observed base");
|
||||
this.bases = bases.toUpperCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* convenience method for switching over the allele type
|
||||
*
|
||||
* @return the AlleleType of this allele
|
||||
**/
|
||||
public AlleleType getType() { return type; }
|
||||
|
||||
/**
|
||||
* convenience method for SNPs
|
||||
*
|
||||
* @return true if this is a SNP, false otherwise
|
||||
*/
|
||||
public boolean isSNP() { return type == AlleleType.SNP; }
|
||||
|
||||
/**
|
||||
* convenience method for variants
|
||||
*
|
||||
* @return true if this is a variant allele, false if it's reference
|
||||
*/
|
||||
public boolean isVariant() { return type != AlleleType.REFERENCE; }
|
||||
|
||||
|
||||
/**
|
||||
* convenience method for indels
|
||||
*
|
||||
* @return true if this is an indel, false otherwise
|
||||
*/
|
||||
public boolean isIndel() { return type == AlleleType.INSERTION || type == AlleleType.DELETION; }
|
||||
|
||||
|
||||
/**
|
||||
* For deletions, this method returns an empty String.
|
||||
* For everything else, observed bases for the allele are returned.
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() { return bases; }
|
||||
|
||||
/**
|
||||
* @param other the other allele
|
||||
*
|
||||
* @return true if these alleles are equal
|
||||
*/
|
||||
public boolean equals(Allele other) {
|
||||
return type == other.getType() && bases.equals(other.getBases());
|
||||
}
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -60,11 +60,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="alwaysShowSecondBase",doc="If true, prints dummy bases for the second bases in the BAM file where they are missing",required=false)
|
||||
public boolean alwaysShowSecondBase = false;
|
||||
|
||||
@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
||||
public boolean qualsAsInts = false;
|
||||
//@Argument(fullName="qualsAsInts",doc="If true, prints out qualities in the pileup as comma-separated integers",required=false)
|
||||
//public boolean qualsAsInts = false;
|
||||
|
||||
@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
|
||||
public boolean IGNORE_UNCOVERED_BASES = false;
|
||||
//@Argument(fullName="ignore_uncovered_bases",shortName="skip_uncov",doc="Output nothing when a base is uncovered")
|
||||
//public boolean IGNORE_UNCOVERED_BASES = false;
|
||||
|
||||
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
||||
public boolean SHOW_INDEL_PILEUPS = false;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,126 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* @author ebanks, depristo
|
||||
* Types of alleles:
|
||||
*
|
||||
* Ref: a t C g a // C is the reference base
|
||||
*
|
||||
* : a t G g a // C base is a G in some individuals
|
||||
*
|
||||
* : a t - g a // C base is deleted w.r.t. the reference
|
||||
*
|
||||
* : a t CAg a // A base is inserted w.r.t. the reference sequence
|
||||
*
|
||||
* In these cases, where are the alleles?
|
||||
*
|
||||
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
|
||||
* 1 base deletion of C -> { C , - } -> C is the reference allele
|
||||
* 1 base insertion of A -> { - ; A } -> NULL is the reference allele
|
||||
*
|
||||
* Suppose I see a the following in the population:
|
||||
*
|
||||
* Ref: a t C g a // C is the reference base
|
||||
* : a t G g a // C base is a G in some individuals
|
||||
* : a t - g a // C base is deleted w.r.t. the reference
|
||||
*
|
||||
* How do I represent this? There are three segregating alleles:
|
||||
*
|
||||
* { C , G , - }
|
||||
*
|
||||
* Now suppose I have this more complex example:
|
||||
*
|
||||
* Ref: a t C g a // C is the reference base
|
||||
* : a t - g a
|
||||
* : a t - - a
|
||||
* : a t CAg a
|
||||
*
|
||||
* There are actually four segregating alleles:
|
||||
*
|
||||
* { C g , - g, - -, and CAg } over bases 2-4
|
||||
*
|
||||
* However, the molecular equivalence explicitly listed above is usually discarded, so the actual
|
||||
* segregating alleles are:
|
||||
*
|
||||
* { C g, g, -, C a g }
|
||||
*
|
||||
* Critically, it should be possible to apply an allele to a reference sequence to create the
|
||||
* correct haplotype sequence:
|
||||
*
|
||||
* Allele + reference => haplotype
|
||||
*
|
||||
* For convenience, we are going to create Alleles where the GenomeLoc of the allele is stored outside of the
|
||||
* Allele object itself. So there's an idea of an A/C polymorphism independent of it's surrounding context.
|
||||
*
|
||||
* Given list of alleles it's possible to determine the "type" of the variation
|
||||
*
|
||||
* A / C @ loc => SNP with
|
||||
* - / A => INDEL
|
||||
*
|
||||
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
|
||||
*/
|
||||
public class Allele {
|
||||
private boolean isRef = false;
|
||||
private byte[] bases = null;
|
||||
|
||||
public Allele(byte[] bases, boolean isRef) {
|
||||
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
|
||||
this.isRef = isRef;
|
||||
|
||||
if ( bases == null )
|
||||
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
|
||||
|
||||
this.bases = bases;
|
||||
for ( byte b : bases ) {
|
||||
if ( ! BaseUtils.isRegularBase(b) ) {
|
||||
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** null allele creation method */
|
||||
public Allele(boolean isRef) {
|
||||
this("", isRef);
|
||||
}
|
||||
|
||||
public Allele(String bases, boolean isRef) {
|
||||
this(bases.getBytes(), isRef);
|
||||
}
|
||||
|
||||
//
|
||||
//
|
||||
// accessor routines
|
||||
//
|
||||
//
|
||||
public boolean isNullAllele() { return length() == 0; }
|
||||
public boolean isNonNullAllele() { return ! isNullAllele(); }
|
||||
|
||||
public boolean isReference() { return isRef; }
|
||||
public boolean isNonReference() { return ! isReference(); }
|
||||
|
||||
|
||||
/**
|
||||
* Return the DNA bases segregating in this allele. Note this isn't reference polarized,
|
||||
* so the Null allele is represented by a vector of length 0
|
||||
*
|
||||
* @return the segregating bases
|
||||
*/
|
||||
public byte[] getBases() { return bases; }
|
||||
|
||||
/**
|
||||
* @param other the other allele
|
||||
*
|
||||
* @return true if these alleles are equal
|
||||
*/
|
||||
public boolean equals(Allele other) {
|
||||
return Arrays.equals(bases, other.getBases());
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return bases.length;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -10,11 +10,6 @@ import java.util.*;
|
|||
* This class emcompasses all the basic information about a genotype
|
||||
*/
|
||||
public class Genotype {
|
||||
|
||||
public enum StandardAttributes {
|
||||
DELETION_LENGTH, INVERSION_LENGTH
|
||||
}
|
||||
|
||||
private List<Allele> alleles;
|
||||
|
||||
private double negLog10PError;
|
||||
|
|
@ -32,9 +27,9 @@ public class Genotype {
|
|||
}
|
||||
|
||||
/**
|
||||
* @return the alleles for this genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() { return alleles; }
|
||||
* @return the alleles for this genotype
|
||||
*/
|
||||
public List<Allele> getAlleles() { return alleles; }
|
||||
|
||||
/**
|
||||
* @return the ploidy of this genotype
|
||||
|
|
@ -77,10 +72,10 @@ public class Genotype {
|
|||
* @return true if all alleles for this genotype are SNPs or reference
|
||||
*/
|
||||
public boolean isPointGenotype() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() && !allele.isSNP() )
|
||||
return false;
|
||||
}
|
||||
// for ( Allele allele : alleles ) {
|
||||
// if ( allele.isVariant() && !allele.isSNP() )
|
||||
// return false;
|
||||
// }
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -88,21 +83,21 @@ public class Genotype {
|
|||
* @return true if this is a variant genotype, false if it's reference
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isVariant() )
|
||||
return true;
|
||||
}
|
||||
// for ( Allele allele : alleles ) {
|
||||
// if ( allele.isVariant() )
|
||||
// return true;
|
||||
// }
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the -1 * log10-based error estimate
|
||||
*/
|
||||
* @return the -1 * log10-based error estimate
|
||||
*/
|
||||
public double getNegLog10PError() { return negLog10PError; }
|
||||
|
||||
/**
|
||||
* @return the sample name
|
||||
*/
|
||||
* @return the sample name
|
||||
*/
|
||||
public String getSample() { return sample; }
|
||||
|
||||
/**
|
||||
|
|
@ -0,0 +1,440 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
|
||||
import java.util.*;
|
||||
import org.apache.commons.jexl.*;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VariantContext
|
||||
* <p/>
|
||||
* This class represents a context that unifies one or more variants
|
||||
*/
|
||||
public class VariantContext {
|
||||
private GenomeLoc loc;
|
||||
|
||||
private Set<Allele> alleles = new HashSet<Allele>();
|
||||
|
||||
private Set<Genotype> genotypes = new HashSet<Genotype>();
|
||||
|
||||
private HashMap<Object, Object> attributes = new HashMap<Object, Object>();
|
||||
|
||||
Type type = null;
|
||||
|
||||
private double negLog10PError = 0.0; // todo - fixme
|
||||
|
||||
/** Have we checked this VariantContext already? */
|
||||
private boolean validatedP = false;
|
||||
|
||||
// public VariantContext(VariationRod rod) {
|
||||
//
|
||||
// // TODO -- VariationRod should eventually require classes to implement toVariationContext()
|
||||
// // TODO -- (instead of using a temporary adapter class)
|
||||
//
|
||||
// loc = rod.getLocation();
|
||||
// reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
|
||||
//
|
||||
// // TODO -- populate the alleles and genotypes through an adapter
|
||||
// alleles = new HashSet<Allele>();
|
||||
// genotypes = new HashSet<Genotype>();
|
||||
//
|
||||
// attributes = new HashMap<Object, Object>();
|
||||
// }
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
public VariantContext(GenomeLoc loc) {
|
||||
if ( loc == null ) { throw new StingException("GenomeLoc cannot be null"); }
|
||||
|
||||
this.loc = loc;
|
||||
}
|
||||
|
||||
protected VariantContext(VariantContext parent, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
||||
this(parent.getLocation(), parent.getAlleles(), genotypes, attributes);
|
||||
}
|
||||
|
||||
public VariantContext(GenomeLoc loc, Set<Allele> alleles, Set<Genotype> genotypes, HashMap<Object, Object> attributes) {
|
||||
this(loc);
|
||||
|
||||
// todo -- add extensive testing here
|
||||
|
||||
// todo -- check that exactly one allele is tagged as reference
|
||||
|
||||
this.alleles = new HashSet<Allele>(alleles);
|
||||
this.genotypes = new HashSet<Genotype>(genotypes);
|
||||
this.attributes = new HashMap<Object, Object>(attributes);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// type operations
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* see: http://www.ncbi.nlm.nih.gov/bookshelf/br.fcgi?book=handbook&part=ch5&rendertype=table&id=ch5.ch5_t3
|
||||
*
|
||||
* Format:
|
||||
* dbSNP variation class
|
||||
* Rules for assigning allele classes
|
||||
* Sample allele definition
|
||||
*
|
||||
* Single Nucleotide Polymorphisms (SNPs)a
|
||||
* Strictly defined as single base substitutions involving A, T, C, or G.
|
||||
* A/T
|
||||
*
|
||||
* Deletion/Insertion Polymorphisms (DIPs)
|
||||
* Designated using the full sequence of the insertion as one allele, and either a fully
|
||||
* defined string for the variant allele or a Ò-Ó character to specify the deleted allele.
|
||||
* This class will be assigned to a variation if the variation alleles are of different lengths or
|
||||
* if one of the alleles is deleted (Ò-Ó).
|
||||
* T/-/CCTA/G
|
||||
*
|
||||
* No-variation
|
||||
* Reports may be submitted for segments of sequence that are assayed and determined to be invariant
|
||||
* in the sample.
|
||||
* (NoVariation)
|
||||
*
|
||||
* Mixed
|
||||
* Mix of other classes
|
||||
*
|
||||
*
|
||||
* Not currently supported:
|
||||
*
|
||||
* Heterozygous sequencea
|
||||
* The term heterozygous is used to specify a region detected by certain methods that do not
|
||||
* resolve the polymorphism into a specific sequence motif. In these cases, a unique flanking
|
||||
* sequence must be provided to define a sequence context for the variation.
|
||||
* (heterozygous)
|
||||
*
|
||||
* Microsatellite or short tandem repeat (STR)
|
||||
* Alleles are designated by providing the repeat motif and the copy number for each allele.
|
||||
* Expansion of the allele repeat motif designated in dbSNP into full-length sequence will
|
||||
* be only an approximation of the true genomic sequence because many microsatellite markers are
|
||||
* not fully sequenced and are resolved as size variants only.
|
||||
* (CAC)8/9/10/11
|
||||
*
|
||||
* Named variant
|
||||
* Applies to insertion/deletion polymorphisms of longer sequence features, such as retroposon
|
||||
* dimorphism for Alu or line elements. These variations frequently include a deletion Ò-Ó indicator
|
||||
* for the absent allele.
|
||||
* (alu) / -
|
||||
*
|
||||
* Multi-Nucleotide Polymorphism (MNP)
|
||||
* Assigned to variations that are multi-base variations of a single, common length
|
||||
* GGA/AGT
|
||||
*/
|
||||
|
||||
public enum Type {
|
||||
NO_VARIATION,
|
||||
SNP,
|
||||
INDEL,
|
||||
MIXED
|
||||
}
|
||||
|
||||
/**
|
||||
* convenience method for switching over the allele type
|
||||
*
|
||||
* @return the AlleleType of this allele
|
||||
**/
|
||||
public Type getType() {
|
||||
if ( type == null )
|
||||
determineType();
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* convenience method for SNPs
|
||||
*
|
||||
* @return true if this is a SNP, false otherwise
|
||||
*/
|
||||
public boolean isSNP() { return getType() == Type.SNP; }
|
||||
|
||||
/**
|
||||
* convenience method for variants
|
||||
*
|
||||
* @return true if this is a variant allele, false if it's reference
|
||||
*/
|
||||
public boolean isVariant() { return getType() != Type.NO_VARIATION; }
|
||||
|
||||
|
||||
/**
|
||||
* convenience method for indels
|
||||
*
|
||||
* @return true if this is an indel, false otherwise
|
||||
*/
|
||||
public boolean isIndel() { return getType() == Type.INDEL; }
|
||||
|
||||
|
||||
/**
|
||||
* convenience method for indels
|
||||
*
|
||||
* @return true if this is an mixed variation, false otherwise
|
||||
*/
|
||||
public boolean isMixed() { return getType() == Type.MIXED; }
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Generic accessors
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return the location of this context
|
||||
*/
|
||||
public GenomeLoc getLocation() { return loc; }
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with alleles
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return the reference allele for this context
|
||||
*/
|
||||
public Allele getReference() {
|
||||
for ( Allele allele : getAlleles() )
|
||||
if ( allele.isReference() )
|
||||
return allele;
|
||||
|
||||
throw new StingException("BUG: no reference allele found at " + this);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the context is strictly bi-allelic
|
||||
*/
|
||||
public boolean isBiallelic() {
|
||||
//return getAlternateAlleles().size() == 1;
|
||||
return getAlleles().size() == 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the alleles. This method should return all of the alleles present at the location,
|
||||
* including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
* in the set. If the reference is not an allele in this context it will not be included.
|
||||
*
|
||||
* @return the set of alleles
|
||||
*/
|
||||
public Set<Allele> getAlleles() { return alleles; }
|
||||
|
||||
/**
|
||||
* Gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference allele. There are no constraints imposed on the ordering of alleles
|
||||
* in the set.
|
||||
*
|
||||
* @return the set of alternate alleles
|
||||
*/
|
||||
public Set<Allele> getAlternateAlleles() {
|
||||
HashSet<Allele> altAlleles = new HashSet<Allele>();
|
||||
for ( Allele allele : alleles ) {
|
||||
if ( allele.isNonReference() )
|
||||
altAlleles.add(allele);
|
||||
}
|
||||
|
||||
return altAlleles;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with genotypes
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @return true if the context represents variants with associated genotypes
|
||||
*/
|
||||
public boolean hasGenotypes() { return genotypes.size() > 0; }
|
||||
|
||||
/**
|
||||
* @return set of all Genotypes associated with this context
|
||||
*/
|
||||
|
||||
// todo -- genotypes should really be stored as map, not set
|
||||
public Set<Genotype> getGenotypes() { return genotypes; }
|
||||
|
||||
public Map<String, Genotype> getGenotypeMap() {
|
||||
HashMap<String, Genotype> map = new HashMap<String, Genotype>();
|
||||
for ( Genotype g : genotypes )
|
||||
map.put(g.getSample(), g);
|
||||
return map;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return the set of all sample names in this context
|
||||
*/
|
||||
public Set<String> getSampleNames() {
|
||||
return getGenotypeMap().keySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sample the sample name
|
||||
*
|
||||
* @return the Genotype associated with the given sample in this context or null if the sample is not in this context
|
||||
*/
|
||||
public Genotype getGenotype(String sample) {
|
||||
return getGenotypeMap().get(sample);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with attributes
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
// todo -- refactor into AttributedObject and have VariantContext and Genotype inherit from them
|
||||
|
||||
// todo -- define common attributes as enum
|
||||
|
||||
/**
|
||||
* Sets the given attribute
|
||||
*
|
||||
* @param key the attribute key
|
||||
* @param value the attribute value
|
||||
*/
|
||||
public void putAttribute(Object key, Object value) {
|
||||
attributes.put(key, value);
|
||||
}
|
||||
|
||||
public void putAttributes(Map<? extends Object, Object> map) {
|
||||
attributes.putAll(map);
|
||||
}
|
||||
|
||||
public boolean hasAttribute(Object key) {
|
||||
return attributes.containsKey(key);
|
||||
}
|
||||
|
||||
public int getNumAttributes() {
|
||||
return attributes.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the attribute key
|
||||
*
|
||||
* @return the attribute value for the given key (or null if not set)
|
||||
*/
|
||||
public Object getAttribute(Object key) {
|
||||
return attributes.get(key);
|
||||
}
|
||||
|
||||
public Object getAttribute(Object key, Object defaultValue) {
|
||||
if ( hasAttribute(key) )
|
||||
return attributes.get(key);
|
||||
else
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
public String getAttributeAsString(Object key) { return (String)getAttribute(key); }
|
||||
public int getAttributeAsInt(Object key) { return (Integer)getAttribute(key); }
|
||||
public double getAttributeAsDouble(Object key) { return (Double)getAttribute(key); }
|
||||
|
||||
public String getAttributeAsString(Object key, String defaultValue) { return (String)getAttribute(key, defaultValue); }
|
||||
public int getAttributeAsInt(Object key, int defaultValue) { return (Integer)getAttribute(key, defaultValue); }
|
||||
public double getAttributeAsDouble(Object key, double defaultValue) { return (Double)getAttribute(key, defaultValue); }
|
||||
|
||||
/**
|
||||
* @return the attribute map
|
||||
*/
|
||||
public Map<Object, Object> getAttributes() {
|
||||
return attributes;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// validation
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* To be called by any modifying routines
|
||||
*/
|
||||
private void invalidate() { validatedP = false; }
|
||||
|
||||
public boolean validate() {
|
||||
return validate(true);
|
||||
}
|
||||
|
||||
public boolean validate(boolean throwException) {
|
||||
if ( ! validatedP ) {
|
||||
boolean valid = false;
|
||||
// todo -- add extensive validation checking here
|
||||
if ( valid ) {
|
||||
validatedP = valid;
|
||||
} else if ( throwException ) {
|
||||
throw new StingException(this + " failed validation");
|
||||
}
|
||||
|
||||
return valid;
|
||||
} else {
|
||||
return validatedP;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// utility routines
|
||||
//
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
private void determineType() {
|
||||
if ( type == null ) {
|
||||
// todo -- figure out the variation type
|
||||
}
|
||||
}
|
||||
|
||||
// todo -- toString() method
|
||||
|
||||
/**
|
||||
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
|
||||
*/
|
||||
// public boolean isPointAllele() {
|
||||
// for ( Allele allele : alleles ) {
|
||||
// if ( allele.isVariant() && !allele.isSNP() )
|
||||
// return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
|
||||
// /**
|
||||
// * @return set of all subclasses within this context
|
||||
// */
|
||||
// public Set<Object> getSubclasses() {
|
||||
// Set<Object> subclasses = new HashSet<Object>();
|
||||
// for ( Genotype g : genotypes )
|
||||
// subclasses.addAll(g.getAttributes().keySet());
|
||||
// return subclasses;
|
||||
// }
|
||||
|
||||
/**
|
||||
* @param allele the allele to be queried
|
||||
*
|
||||
* @return the frequency of the given allele in this context
|
||||
*/
|
||||
public double getAlleleFrequency(Allele allele) {
|
||||
int alleleCount = 0;
|
||||
int totalCount = 0;
|
||||
|
||||
for ( Genotype g : genotypes ) {
|
||||
for ( Allele a : g.getAlleles() ) {
|
||||
totalCount++;
|
||||
if ( allele.equals(a) )
|
||||
alleleCount++;
|
||||
}
|
||||
}
|
||||
|
||||
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.variantcontext;
|
||||
|
||||
import java.util.*;
|
||||
import org.apache.commons.jexl.*;
|
||||
|
||||
|
||||
public class VariantContextUtils {
|
||||
private static final String UNIQUIFIED_SUFFIX = ".unique";
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
*
|
||||
* throws an exception if there is a collision such that the same sample exists in both contexts
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
public VariantContext merge(VariantContext left, VariantContext other) {
|
||||
return merge(left, other, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other another variant context
|
||||
* @param uniquifySamples if true and there is a collision such that the same sample exists in both contexts,
|
||||
* the samples will be uniquified(based on their sources);
|
||||
* otherwise, an exception will be thrown
|
||||
*
|
||||
* @return a context representing the merge of this context and the other provided context
|
||||
*/
|
||||
public VariantContext merge(VariantContext left, VariantContext other, boolean uniquifySamples) {
|
||||
// todo -- make functional
|
||||
|
||||
if ( !left.getLocation().equals(other.getLocation()) )
|
||||
throw new IllegalArgumentException("The locations must be identical for two contexts to be merged");
|
||||
|
||||
Set<String> samples = left.getSampleNames();
|
||||
Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes());
|
||||
|
||||
for ( Genotype g : other.getGenotypes() ) {
|
||||
if ( samples.contains(g.getSample()) ) {
|
||||
if ( uniquifySamples )
|
||||
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
|
||||
else
|
||||
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
|
||||
}
|
||||
Gs.add(g);
|
||||
}
|
||||
|
||||
HashMap<Object, Object> attrs = new HashMap<Object, Object>(left.getAttributes());
|
||||
attrs.putAll(other.getAttributes());
|
||||
|
||||
return new VariantContext(left, Gs, attrs);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @param subclass the name of a subclass of variants to select
|
||||
*
|
||||
* @return a subset of this context which selects based on the given subclass
|
||||
*/
|
||||
// public VariantContextUtils select(String subclass) {
|
||||
// HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
// for ( Genotype g : genotypes ) {
|
||||
// if ( g.getAttribute(subclass) != null )
|
||||
// Gs.add(g);
|
||||
// }
|
||||
// return createNewContext(Gs, attributes);
|
||||
// }
|
||||
|
||||
/**
|
||||
* @param expr a jexl expression describing how to filter this context
|
||||
*
|
||||
* @return a subset of this context which is filtered based on the given expression
|
||||
*/
|
||||
// public VariantContextUtils filter(String expr) {
|
||||
// HashSet<Genotype> Gs = new HashSet<Genotype>();
|
||||
// try {
|
||||
// Expression filterExpression = ExpressionFactory.createExpression(expr);
|
||||
//
|
||||
// for ( Genotype g : genotypes ) {
|
||||
// JexlContext jContext = JexlHelper.createContext();
|
||||
// jContext.setVars(g.getAttributes());
|
||||
// if ( (Boolean)filterExpression.evaluate(jContext) )
|
||||
// Gs.add(g);
|
||||
// }
|
||||
//
|
||||
// } catch (Exception e) {
|
||||
// throw new StingException("JEXL error in VariantContext: " + e.getMessage());
|
||||
// }
|
||||
//
|
||||
// return createNewContext(Gs, attributes);
|
||||
// }
|
||||
|
||||
/**
|
||||
* @return a set of new variant contexts, one for each sample from this context
|
||||
*/
|
||||
// public Set<VariantContextUtils> splitBySample() {
|
||||
// Set<VariantContextUtils> contexts = new HashSet<VariantContextUtils>();
|
||||
// for ( Genotype g : genotypes ) {
|
||||
// HashSet<Genotype> gAsSet = new HashSet<Genotype>();
|
||||
// gAsSet.add(g);
|
||||
// contexts.add(createNewContext(gAsSet, attributes));
|
||||
// }
|
||||
// return contexts;
|
||||
// }
|
||||
}
|
||||
|
|
@ -1,243 +1,243 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.Assert;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: Ghost
|
||||
* Date: Jan 22, 2010
|
||||
* Time: 11:27:33 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class PlinkRodTest extends BaseTest {
|
||||
private static IndexedFastaSequenceFile seq;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeTests() {
|
||||
try {
|
||||
seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("unable to load the sequence dictionary");
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(seq);
|
||||
|
||||
}
|
||||
|
||||
public BufferedReader openFile(String filename) {
|
||||
try {
|
||||
return new BufferedReader(new FileReader(filename));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Couldn't open file " + filename);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStandardPedFile() {
|
||||
PlinkRod rod = new PlinkRod("test");
|
||||
try {
|
||||
rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new StingException("test file for testStandardPedFile() does not exist",e);
|
||||
}
|
||||
|
||||
// test that the sample names are correct
|
||||
|
||||
List<String> rodNames = rod.getVariantSampleNames();
|
||||
List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
|
||||
|
||||
Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
|
||||
|
||||
boolean namesCorrect = true;
|
||||
for ( int i = 0; i < expectedNames.size(); i++ ) {
|
||||
namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
|
||||
}
|
||||
|
||||
Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
|
||||
|
||||
// test that rod can be iterated over
|
||||
|
||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
do {
|
||||
genotypesInRod.add(rod.getGenotypes());
|
||||
sampleNamesInRod.add(rod.getVariantSampleNames());
|
||||
lociInRod.add(rod.getLocation());
|
||||
} while ( rod.parseLine(null,null) );
|
||||
|
||||
Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
|
||||
|
||||
ArrayList<Genotype> snp1 = genotypesInRod.get(0);
|
||||
ArrayList<Genotype> snp3 = genotypesInRod.get(2);
|
||||
|
||||
Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
|
||||
Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
|
||||
Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
|
||||
|
||||
|
||||
List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
|
||||
List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
|
||||
|
||||
String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
|
||||
String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
|
||||
|
||||
Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
|
||||
Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
|
||||
|
||||
boolean name2isSame = true;
|
||||
|
||||
for ( ArrayList<String> names : sampleNamesInRod ) {
|
||||
name2isSame = name2isSame && names.get(1).equals("NAMELY");
|
||||
}
|
||||
|
||||
Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
|
||||
|
||||
// test that the loci are correctly parsed and in order
|
||||
|
||||
List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
|
||||
boolean lociCorrect = true;
|
||||
for ( int i = 0; i < 3; i ++ ) {
|
||||
lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStandardPedFileWithIndels() {
|
||||
PlinkRod rod = new PlinkRod("test");
|
||||
try {
|
||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
|
||||
} catch ( FileNotFoundException e) {
|
||||
throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
|
||||
}
|
||||
|
||||
// Iterate through the rod
|
||||
|
||||
List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
|
||||
do {
|
||||
genotypesInRod.add(rod.getGenotypes());
|
||||
sampleNamesInRod.add(rod.getVariantSampleNames());
|
||||
lociInRod.add(rod.getLocation());
|
||||
snpSites.add(rod.variantIsSNP());
|
||||
} while ( rod.parseLine(null,null) );
|
||||
|
||||
boolean snpOrder = true;
|
||||
List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
|
||||
for ( int i = 0; i < 4; i ++ ) {
|
||||
snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
|
||||
}
|
||||
|
||||
Assert.assertTrue("That the variant type order is as expected", snpOrder);
|
||||
Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
|
||||
Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
|
||||
Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
|
||||
Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
|
||||
Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
|
||||
Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
|
||||
Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBinaryPedFileNoIndels() {
|
||||
PlinkRod rod = new PlinkRod("binaryTest1");
|
||||
try {
|
||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||
}
|
||||
|
||||
// iterate through the ROD and get stuff
|
||||
ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
|
||||
|
||||
do {
|
||||
lociInRod.add(rod.getLocation());
|
||||
genotypesInRod.add(rod.getGenotypes());
|
||||
samplesInRod.add(rod.getVariantSampleNames());
|
||||
} while ( rod.parseLine(null,null) );
|
||||
|
||||
List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
|
||||
|
||||
for ( int i = 0; i < expecLoc.size(); i ++ ) {
|
||||
Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
|
||||
}
|
||||
|
||||
List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
|
||||
"GG","GG","AG","TT","CC","CT","TG","GG","GG");
|
||||
List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
|
||||
false,false,false,true,false,false,true,true,false,false);
|
||||
List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||
"NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||
"NA12878","NA12890","NA07000");
|
||||
int snpNo = 1;
|
||||
int indiv = 1;
|
||||
int alleleOffset = 0;
|
||||
for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||
for ( Genotype gen : snp ) {
|
||||
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||
Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
|
||||
"(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
|
||||
Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
|
||||
Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
|
||||
indiv++;
|
||||
alleleOffset++;
|
||||
}
|
||||
indiv = 1;
|
||||
snpNo++;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIndelBinary() {
|
||||
PlinkRod rod = new PlinkRod("binaryTest2");
|
||||
try {
|
||||
rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||
}
|
||||
|
||||
ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
do {
|
||||
genotypesInRod.add(rod.getGenotypes());
|
||||
} while ( rod.parseLine(null,null) );
|
||||
|
||||
List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
|
||||
"TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
|
||||
List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
|
||||
false,false,false,false,false,false,true,false,true,false,false,false);
|
||||
List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
|
||||
false,false,false,false,false,false,false,false,false,false,false,false);
|
||||
|
||||
int al = 0;
|
||||
for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||
for ( Genotype gen : snp ) {
|
||||
String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||
Allele firstAl = gen.getAlleles().get(0);
|
||||
Allele secondAl = gen.getAlleles().get(1);
|
||||
boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
|
||||
boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
|
||||
Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
|
||||
Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
|
||||
Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
|
||||
al++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//package org.broadinstitute.sting.gatk.refdata;
|
||||
//
|
||||
//import org.broadinstitute.sting.BaseTest;
|
||||
//import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
|
||||
//import org.broadinstitute.sting.utils.StingException;
|
||||
//import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
//import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
//import org.junit.BeforeClass;
|
||||
//import org.junit.Test;
|
||||
//import org.junit.Assert;
|
||||
//
|
||||
//import java.io.File;
|
||||
//import java.io.FileNotFoundException;
|
||||
//import java.io.BufferedReader;
|
||||
//import java.io.FileReader;
|
||||
//import java.util.*;
|
||||
//
|
||||
///**
|
||||
// * Created by IntelliJ IDEA.
|
||||
// * User: Ghost
|
||||
// * Date: Jan 22, 2010
|
||||
// * Time: 11:27:33 PM
|
||||
// * To change this template use File | Settings | File Templates.
|
||||
// */
|
||||
//public class PlinkRodTest extends BaseTest {
|
||||
// private static IndexedFastaSequenceFile seq;
|
||||
//
|
||||
// @BeforeClass
|
||||
// public static void beforeTests() {
|
||||
// try {
|
||||
// seq = new IndexedFastaSequenceFile(new File(oneKGLocation + "reference/human_b36_both.fasta"));
|
||||
// } catch (FileNotFoundException e) {
|
||||
// throw new StingException("unable to load the sequence dictionary");
|
||||
// }
|
||||
// GenomeLocParser.setupRefContigOrdering(seq);
|
||||
//
|
||||
// }
|
||||
//
|
||||
// public BufferedReader openFile(String filename) {
|
||||
// try {
|
||||
// return new BufferedReader(new FileReader(filename));
|
||||
// } catch (FileNotFoundException e) {
|
||||
// throw new StingException("Couldn't open file " + filename);
|
||||
// }
|
||||
//
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testStandardPedFile() {
|
||||
// PlinkRod rod = new PlinkRod("test");
|
||||
// try {
|
||||
// rod.initialize( new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_test.ped") );
|
||||
// } catch ( FileNotFoundException e ) {
|
||||
// throw new StingException("test file for testStandardPedFile() does not exist",e);
|
||||
// }
|
||||
//
|
||||
// // test that the sample names are correct
|
||||
//
|
||||
// List<String> rodNames = rod.getVariantSampleNames();
|
||||
// List<String> expectedNames = Arrays.asList("NA12887","NAMELY","COWBA");
|
||||
//
|
||||
// Assert.assertEquals("That there are as many samples in the rod as in the expected list",expectedNames.size(),rodNames.size());
|
||||
//
|
||||
// boolean namesCorrect = true;
|
||||
// for ( int i = 0; i < expectedNames.size(); i++ ) {
|
||||
// namesCorrect = namesCorrect && ( rodNames.get(i).equals(expectedNames.get(i)) );
|
||||
// }
|
||||
//
|
||||
// Assert.assertTrue("That the names are correct and in the proper order",namesCorrect);
|
||||
//
|
||||
// // test that rod can be iterated over
|
||||
//
|
||||
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
||||
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
// do {
|
||||
// genotypesInRod.add(rod.getGenotypes());
|
||||
// sampleNamesInRod.add(rod.getVariantSampleNames());
|
||||
// lociInRod.add(rod.getLocation());
|
||||
// } while ( rod.parseLine(null,null) );
|
||||
//
|
||||
// Assert.assertEquals("That there are three SNPs in the ROD",3,genotypesInRod.size());
|
||||
//
|
||||
// ArrayList<Genotype> snp1 = genotypesInRod.get(0);
|
||||
// ArrayList<Genotype> snp3 = genotypesInRod.get(2);
|
||||
//
|
||||
// Assert.assertEquals("That there are three Genotypes in SNP 1",3,snp1.size());
|
||||
// Assert.assertEquals("That there are three samples in SNP 2", 3, sampleNamesInRod.get(1).size());
|
||||
// Assert.assertEquals("That there are three Genotypes in SNP 3",3,snp3.size());
|
||||
//
|
||||
//
|
||||
// List<Allele> snp1_individual3_alleles = snp1.get(2).getAlleles();
|
||||
// List<Allele> snp3_individual2_alleles = snp3.get(1).getAlleles();
|
||||
//
|
||||
// String alleleStr1 = snp1_individual3_alleles.get(0).getBases()+snp1_individual3_alleles.get(1).getBases();
|
||||
// String alleleStr2 = snp3_individual2_alleles.get(0).getBases()+snp3_individual2_alleles.get(1).getBases();
|
||||
//
|
||||
// Assert.assertEquals("That the third genotype of snp 1 is correctly no-call","00",alleleStr1);
|
||||
// Assert.assertEquals("That the second genotype of snp 3 is correctly G G","GG",alleleStr2);
|
||||
//
|
||||
// boolean name2isSame = true;
|
||||
//
|
||||
// for ( ArrayList<String> names : sampleNamesInRod ) {
|
||||
// name2isSame = name2isSame && names.get(1).equals("NAMELY");
|
||||
// }
|
||||
//
|
||||
// Assert.assertTrue("That the second name of all the genotypes is the same and is correct",name2isSame);
|
||||
//
|
||||
// // test that the loci are correctly parsed and in order
|
||||
//
|
||||
// List<String> expectedLoci = Arrays.asList("1:123456","2:13274","3:11111");
|
||||
// boolean lociCorrect = true;
|
||||
// for ( int i = 0; i < 3; i ++ ) {
|
||||
// lociCorrect = lociCorrect && lociInRod.get(i).toString().equals(expectedLoci.get(i));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testStandardPedFileWithIndels() {
|
||||
// PlinkRod rod = new PlinkRod("test");
|
||||
// try {
|
||||
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/standard_plink_with_indels.ped") );
|
||||
// } catch ( FileNotFoundException e) {
|
||||
// throw new StingException("Test file for testStandardPedFileWithIndels() could not be found", e);
|
||||
// }
|
||||
//
|
||||
// // Iterate through the rod
|
||||
//
|
||||
// List<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
// ArrayList<ArrayList<String>> sampleNamesInRod = new ArrayList<ArrayList<String>>();
|
||||
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
// ArrayList<Boolean> snpSites = new ArrayList<Boolean>();
|
||||
// do {
|
||||
// genotypesInRod.add(rod.getGenotypes());
|
||||
// sampleNamesInRod.add(rod.getVariantSampleNames());
|
||||
// lociInRod.add(rod.getLocation());
|
||||
// snpSites.add(rod.variantIsSNP());
|
||||
// } while ( rod.parseLine(null,null) );
|
||||
//
|
||||
// boolean snpOrder = true;
|
||||
// List<Boolean> expectedOrder = Arrays.asList(true,false,true,false);
|
||||
// for ( int i = 0; i < 4; i ++ ) {
|
||||
// snpOrder = snpOrder && ( expectedOrder.get(i) == snpSites.get(i) );
|
||||
// }
|
||||
//
|
||||
// Assert.assertTrue("That the variant type order is as expected", snpOrder);
|
||||
// Assert.assertTrue("That the second genotype of second variant is not a point mutation", ! genotypesInRod.get(1).get(1).isPointGenotype() );
|
||||
// Assert.assertTrue("That the second genotype of fourth variant is not a point mutation", ! genotypesInRod.get(3).get(1).isPointGenotype() );
|
||||
// Assert.assertTrue("That the second genotype of fourth variant is homozygous", genotypesInRod.get(3).get(1).isHom());
|
||||
// Assert.assertTrue("That the fourth genotype of fourth variant is heterozygous",genotypesInRod.get(3).get(3).isHet());
|
||||
// Assert.assertEquals("That the reference deletion genotype has the correct string", "ATTTAT",genotypesInRod.get(3).get(2).getAlleles().get(0).getBases());
|
||||
// Assert.assertEquals("That the insertion bases are correct","CTC",genotypesInRod.get(1).get(2).getAlleles().get(0).getBases());
|
||||
// Assert.assertEquals("That the snp bases are correct","GC",genotypesInRod.get(2).get(2).getAlleles().get(0).getBases()+genotypesInRod.get(2).get(2).getAlleles().get(1).getBases());
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testBinaryPedFileNoIndels() {
|
||||
// PlinkRod rod = new PlinkRod("binaryTest1");
|
||||
// try {
|
||||
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_noindel_test.bed"));
|
||||
// } catch (FileNotFoundException e) {
|
||||
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||
// }
|
||||
//
|
||||
// // iterate through the ROD and get stuff
|
||||
// ArrayList<GenomeLoc> lociInRod = new ArrayList<GenomeLoc>();
|
||||
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
// ArrayList<ArrayList<String>> samplesInRod = new ArrayList<ArrayList<String>>();
|
||||
//
|
||||
// do {
|
||||
// lociInRod.add(rod.getLocation());
|
||||
// genotypesInRod.add(rod.getGenotypes());
|
||||
// samplesInRod.add(rod.getVariantSampleNames());
|
||||
// } while ( rod.parseLine(null,null) );
|
||||
//
|
||||
// List<String> expecLoc = Arrays.asList("1:123456","1:14327877","2:22074511","3:134787","3:178678","4:829645","4:5234132","12:1268713");
|
||||
//
|
||||
// for ( int i = 0; i < expecLoc.size(); i ++ ) {
|
||||
// Assert.assertEquals("That locus "+(i+1)+" in the rod is correct", expecLoc.get(i), lociInRod.get(i).toString());
|
||||
// }
|
||||
//
|
||||
// List<String> expecAlleles = Arrays.asList("AA","AA","AA","GG","GG","GG","AA","TA","TT","CC","CC","GC","TC","CC","TT",
|
||||
// "GG","GG","AG","TT","CC","CT","TG","GG","GG");
|
||||
// List<Boolean> expecHet = Arrays.asList(false,false,false,false,false,false,false,true,false,false,false,true,true,false,
|
||||
// false,false,false,true,false,false,true,true,false,false);
|
||||
// List<String> expecName = Arrays.asList("NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||
// "NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000","NA12878","NA12890","NA07000",
|
||||
// "NA12878","NA12890","NA07000");
|
||||
// int snpNo = 1;
|
||||
// int indiv = 1;
|
||||
// int alleleOffset = 0;
|
||||
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||
// for ( Genotype gen : snp ) {
|
||||
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||
// Assert.assertEquals("That the allele of person "+indiv+" for snp "+snpNo+" is correct "+
|
||||
// "(allele offset "+alleleOffset+")", expecAlleles.get(alleleOffset),alStr);
|
||||
// Assert.assertEquals("That the genotype of person "+indiv+" for snp "+snpNo+" is properly set", expecHet.get(alleleOffset),gen.isHet());
|
||||
// Assert.assertEquals("That the name of person "+indiv+" for snp "+snpNo+" is correct", expecName.get(alleleOffset),samplesInRod.get(snpNo-1).get(indiv-1));
|
||||
// indiv++;
|
||||
// alleleOffset++;
|
||||
// }
|
||||
// indiv = 1;
|
||||
// snpNo++;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// @Test
|
||||
// public void testIndelBinary() {
|
||||
// PlinkRod rod = new PlinkRod("binaryTest2");
|
||||
// try {
|
||||
// rod.initialize(new File("/humgen/gsa-hpprojects/GATK/data/Validation_Data/test/plink_rod_test/binary_indel_test.bed"));
|
||||
// } catch (FileNotFoundException e) {
|
||||
// throw new StingException("Test file for testBinaryPedFileNoIndels() could not be found",e);
|
||||
// }
|
||||
//
|
||||
// ArrayList<ArrayList<Genotype>> genotypesInRod = new ArrayList<ArrayList<Genotype>>();
|
||||
// do {
|
||||
// genotypesInRod.add(rod.getGenotypes());
|
||||
// } while ( rod.parseLine(null,null) );
|
||||
//
|
||||
// List<String> expecAlleles = Arrays.asList("ACCA","","ACCAACCA","GGGG","GG","","AA","TA","00","","CCTCCT","CCT",
|
||||
// "TC","CC","TT","GG","GG","AG","","CTTGCTTG","CTTG","TG","GG","GG");
|
||||
// List<Boolean> expecDeletion = Arrays.asList(false,false,false,false,false,false,false,false,false,true,false,true,
|
||||
// false,false,false,false,false,false,true,false,true,false,false,false);
|
||||
// List<Boolean> expecInsertion = Arrays.asList(true,false,true,true,true,false,false,false,false,false,false,false,
|
||||
// false,false,false,false,false,false,false,false,false,false,false,false);
|
||||
//
|
||||
// int al = 0;
|
||||
// for ( ArrayList<Genotype> snp : genotypesInRod ) {
|
||||
// for ( Genotype gen : snp ) {
|
||||
// String alStr = gen.getAlleles().get(0).getBases()+gen.getAlleles().get(1).getBases();
|
||||
// Allele firstAl = gen.getAlleles().get(0);
|
||||
// Allele secondAl = gen.getAlleles().get(1);
|
||||
// boolean isInsertion = ( firstAl.getType() == Allele.AlleleType.INSERTION || secondAl.getType() == Allele.AlleleType.INSERTION );
|
||||
// boolean isDeletion = ( firstAl.getType() == Allele.AlleleType.DELETION || secondAl.getType() == Allele.AlleleType.DELETION );
|
||||
// Assert.assertEquals("That the indel file has the correct alleles for genotype "+al,expecAlleles.get(al), alStr);
|
||||
// Assert.assertEquals("That the insertion property of genotype "+al+" is correct",expecInsertion.get(al),isInsertion);
|
||||
// Assert.assertEquals("That the deletion property of genotype "+al+" is correct", expecDeletion.get(al), isDeletion);
|
||||
// al++;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
|
|
|||
Loading…
Reference in New Issue