V3 of VariantContext. Support for Genotypes and NO_CALL alleles. QUAL fields fully implemented. Can parse VCF records and dbSNP. More complete validation. Detailed testing routines for VariantContext and Allele.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2718 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-01-28 04:10:16 +00:00
parent 4642a1ae59
commit f6bca7873c
9 changed files with 637 additions and 244 deletions

View File

@ -64,29 +64,66 @@ import java.util.Arrays;
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion
*/
public class Allele {
private static final byte[] NULL_ALLELE_BASES = new byte[0];
private static final byte[] EMPTY_ALLELE_BASES = new byte[0];
// private static final byte[] NULL_ALLELE_BASES = new byte[0];
// private static final byte[] NO_CALL_ALLELE_BASES = ".".getBytes();
private boolean isRef = false;
private boolean isNull = false;
private boolean isNoCall = false;
private byte[] bases = null;
public final static Allele NO_CALL = new Allele(".");
public Allele(byte[] bases, boolean isRef) {
if ( bases == null )
throw new IllegalArgumentException("Constructor: the Allele base string cannot be null; use new Allele() or new Allele(\"\") to create a Null allele");
// standardize our representation of null allele and bases
if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0)
bases = NULL_ALLELE_BASES;
if ( wouldBeNullAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
isNull = true;
}
if ( wouldBeNoCallAllele(bases) ) {
bases = EMPTY_ALLELE_BASES;
isNoCall = true;
if ( isRef ) throw new IllegalArgumentException("Cannot tag a NoCall allele as the reference allele");
}
else
bases = new String(bases).toUpperCase().getBytes(); // todo -- slow performance
this.isRef = isRef;
this.bases = bases;
if ( ! acceptableAlleleBases(bases) )
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
}
public final static boolean wouldBeNullAllele(byte[] bases) {
return (bases.length == 1 && bases[0] == '-') || bases.length == 0;
}
public final static boolean wouldBeNoCallAllele(byte[] bases) {
return bases.length == 1 && bases[0] == '.';
}
public final static boolean acceptableAlleleBases(String bases) {
return acceptableAlleleBases(bases.getBytes());
}
public final static boolean acceptableAlleleBases(byte[] bases) {
if ( (bases.length == 1 && bases[0] == '-') || bases.length == 0)
return true;
for ( byte b : bases ) {
if ( ! BaseUtils.isRegularBase(b) ) {
throw new IllegalArgumentException("Unexpected base in allele bases " + new String(bases));
return false;
}
}
return true;
}
/** null allele creation method */
@ -107,14 +144,17 @@ public class Allele {
// accessor routines
//
//
public boolean isNullAllele() { return length() == 0; }
public boolean isNonNullAllele() { return ! isNullAllele(); }
public boolean isNull() { return isNull; }
public boolean isNonNull() { return ! isNull(); }
public boolean isNoCall() { return isNoCall; }
public boolean isCalled() { return ! isNoCall(); }
public boolean isReference() { return isRef; }
public boolean isNonReference() { return ! isReference(); }
public String toString() {
return isNullAllele() ? "-" : new String(getBases()) + ( isReference() ? "*" : "");
return (isNull() ? "-" : ( isNoCall() ? "." : new String(getBases()))) + (isReference() ? "*" : "");
}
/**
@ -131,7 +171,7 @@ public class Allele {
* @return true if these alleles are equal
*/
public boolean equals(Allele other) {
return isRef == other.isRef && this.basesMatch(other.getBases());
return isRef == other.isRef && isNull == other.isNull && isNoCall == other.isNoCall && this.basesMatch(other.getBases());
}
// todo -- notice case insensitivity

View File

@ -7,23 +7,58 @@ import java.util.*;
/**
* @author ebanks
* @author depristo
* <p/>
* Class VariantContext
* Class AttributedObject
* <p/>
* This class represents a context that unifies one or more variants
* Common functions in VariantContext
*/
public class AttributedObject {
public static final double NO_NEG_LOG_10PERROR = 0.0;
private double negLog10PError = NO_NEG_LOG_10PERROR;
private Map<Object, Object> attributes = new HashMap<Object, Object>();
public AttributedObject() {
;
public AttributedObject() { }
public AttributedObject(double negLog10PError) {
setNegLog10PError(negLog10PError);
}
public AttributedObject(Map<Object, Object> attributes) {
public AttributedObject(Map<? extends Object, ? extends Object> attributes) {
setAttributes(attributes);
}
public AttributedObject(Map<? extends Object, ? extends Object> attributes, double negLog10PError) {
this(attributes);
setNegLog10PError(negLog10PError);
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with log error rates
//
// ---------------------------------------------------------------------------------------------------------
public boolean hasNegLog10PError() {
return getNegLog10PError() == NO_NEG_LOG_10PERROR;
}
/**
* @return the -1 * log10-based error estimate
*/
public double getNegLog10PError() { return negLog10PError; }
public void setNegLog10PError(double negLog10PError) {
if ( negLog10PError < 0 && negLog10PError != NO_NEG_LOG_10PERROR ) throw new IllegalArgumentException("BUG: negLog10PError cannot be < than 0 : " + negLog10PError);
if ( Double.isInfinite(negLog10PError) ) throw new IllegalArgumentException("BUG: negLog10PError should not be Infinity");
if ( Double.isNaN(negLog10PError) ) throw new IllegalArgumentException("BUG: negLog10PError should not be NaN");
this.negLog10PError = negLog10PError;
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with attributes
@ -42,9 +77,9 @@ public class AttributedObject {
// todo -- define common attributes as enum
public void setAttributes(Map<? extends Object, Object> map) {
public void setAttributes(Map<? extends Object, ? extends Object> map) {
this.attributes.clear();
putAttributes(attributes);
putAttributes(map);
}
public void putAttribute(Object key, Object value) {
@ -62,9 +97,11 @@ public class AttributedObject {
this.attributes.remove(key);
}
public void putAttributes(Map<? extends Object, Object> map) {
for ( Map.Entry<Object, Object> elt : attributes.entrySet() ) {
putAttribute(elt.getKey(), elt.getValue());
public void putAttributes(Map<? extends Object, ? extends Object> map) {
if ( map != null ) {
for ( Map.Entry<? extends Object, ? extends Object> elt : map.entrySet() ) {
putAttribute(elt.getKey(), elt.getValue());
}
}
}

View File

@ -10,106 +10,153 @@ import java.util.*;
* This class emcompasses all the basic information about a genotype
*/
public class Genotype extends AttributedObject {
private List<Allele> alleles;
private List<Allele> alleles = new ArrayList<Allele>();
private String sampleName = null;
private double negLog10PError;
public Genotype(VariantContext vc, List<String> alleles, String sampleName, double negLog10PError) {
this(resolveAlleles(vc, alleles), sampleName, negLog10PError);
}
private String sample;
public Genotype(List<Allele> alleles, String sample, double negLog10PError) {
this.alleles = new ArrayList<Allele>(alleles);
this.sample = sample;
this.negLog10PError = negLog10PError;
public Genotype(List<Allele> alleles, String sampleName, double negLog10PError) {
setAlleles(alleles);
setSampleName(sampleName);
setNegLog10PError(negLog10PError);
}
/**
* @return the alleles for this genotype
*/
public List<Allele> getAlleles() { return alleles; }
public List<Allele> getAlleles() {
return alleles;
}
public List<Allele> getAlleles(Allele allele) {
List<Allele> al = new ArrayList<Allele>();
for ( Allele a : alleles )
if ( a.equals(allele) )
al.add(a);
return al;
}
/**
* @return the ploidy of this genotype
*/
public int getPloidy() { return alleles.size(); }
public enum Type {
NO_CALL,
HOM_REF,
HET,
HOM_VAR
}
public Type getType() {
Allele firstAllele = alleles.get(0);
if ( firstAllele.isNoCall() ) {
return Type.NO_CALL;
}
for (Allele a : alleles) {
if ( ! firstAllele.equals(a) )
return Type.HET;
}
return firstAllele.isReference() ? Type.HOM_REF : Type.HOM_VAR;
}
/**
* @return true if all observed alleles are the same (regardless of whether they are ref or alt)
*/
public boolean isHom() {
// do not assume ploidy == 2
if ( alleles.size() == 0 )
return true;
Allele first = alleles.get(0);
for ( int i = 1; i < alleles.size(); i++ ) {
if ( !first.equals(alleles.get(i)) )
return false;
}
return true;
}
public boolean isHom() { return getType() == Type.HOM_REF || getType() == Type.HOM_VAR; }
/**
* @return true if we're het (observed alleles differ)
*/
public boolean isHet() { return !isHom(); }
public boolean isHet() { return getType() == Type.HET; }
/**
* @return true if this genotype is not actually a genotype but a "no call" (e.g. './.' in VCF)
*/
public boolean isNoCall() {
// TODO -- implement me
return false;
}
public boolean isNoCall() { return getType() == Type.NO_CALL; }
/**
* @return true if all alleles for this genotype are SNPs or reference
*/
// public boolean isPointGenotype() {
//// for ( Allele allele : alleles ) {
//// if ( allele.isVariant() && !allele.isSNP() )
//// return false;
//// }
// return true;
// /**
// * @return true if this is a variant genotype, false if it's reference
// */
// public boolean isVariant() {
// for ( Allele allele : alleles ) {
// if ( allele.isNonReference() )
// return true;
// }
// return false;
// }
/**
* @return true if this is a variant genotype, false if it's reference
*/
public boolean isVariant() {
for ( Allele allele : alleles ) {
if ( allele.isNonReference() )
return true;
}
return false;
}
/**
* @return the -1 * log10-based error estimate
*/
public double getNegLog10PError() { return negLog10PError; }
/**
* @return the sample name
*/
public String getSample() { return sample; }
public String getSampleName() {
return sampleName;
}
/**
* Sets the sample name
*
* @param sample the sample name
* @param sampleName the sample name
*/
public void setSample(String sample) {
this.sample = sample;
public void setSampleName(String sampleName) {
if ( sampleName == null ) throw new IllegalArgumentException("Sample name cannot be null " + this);
this.sampleName = sampleName;
}
/**
* @param ref the reference base
*
* @return this genotype as a variation
* @param alleles
*/
// TODO -- implement me
// public Variation toVariation(char ref);
public void setAlleles(List<Allele> alleles) {
this.alleles.clear();
// todo -- add validation checking here
if ( alleles == null ) throw new IllegalArgumentException("BUG: alleles cannot be null in setAlleles");
if ( alleles.size() == 0) throw new IllegalArgumentException("BUG: alleles cannot be of size 0 in setAlleles");
int nNoCalls = 0;
for ( Allele allele : alleles ) { nNoCalls += allele.isNoCall() ? 1 : 0; }
if ( nNoCalls > 0 && nNoCalls != alleles.size() )
throw new IllegalArgumentException("BUG: alleles include some No Calls and some Calls, an illegal state " + this);
for ( Allele allele : alleles ) {
addAllele(allele);
}
}
public void addAllele(Allele allele) {
if ( allele == null ) throw new IllegalArgumentException("BUG: Cannot add a null allele to a genotype");
this.alleles.add(allele);
}
private static List<Allele> resolveAlleles(VariantContext vc, List<String> alleleStrings) {
List<Allele> alleles = new ArrayList<Allele>();
for ( String alleleString : alleleStrings ) {
Allele allele = vc.getAllele(alleleString);
if ( allele == null ) {
if ( Allele.wouldBeNoCallAllele(alleleString.getBytes()) ) {
allele = new Allele(alleleString);
} else {
throw new IllegalArgumentException("Allele " + alleleString + " not present in the list of alleles in VariantContext " + vc);
}
}
alleles.add(allele);
}
return alleles;
}
public String toString() {
return String.format("[GT: %s %s %s Q%.2f %s]", getSampleName(), getAlleles(), getType(), getNegLog10PError(), getAttributes());
}
}

View File

@ -2,10 +2,7 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RODRecordList;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.*;
@ -15,20 +12,35 @@ import org.broadinstitute.sting.utils.*;
public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
GenomeLoc cur = context.getLocation();
if ( ref == null )
return 0;
else {
RODRecordList<ReferenceOrderedDatum> dbsnpList = tracker.getTrackData("dbsnp", null);
if (dbsnpList == null)
return 0;
else {
if (dbsnpList != null) {
// do dbSNP conversion
int n = 0;
for (ReferenceOrderedDatum d : dbsnpList) {
rodDbSNP dbsnpRecord = (rodDbSNP)d;
VariantContext vc = VariantContextAdaptors.dbsnp2VariantContext(dbsnpRecord);
if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
VariantContext vc = VariantContextAdaptors.dbsnp2VariantContext(dbsnpRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);
}
}
}
return n;
}
RODRecordList<ReferenceOrderedDatum> vcfList = tracker.getTrackData("vcf", null);
if (vcfList != null) {
// do vcf conversion
int n = 0;
for (ReferenceOrderedDatum d : vcfList) {
RodVCF vcfRecord = (RodVCF)d;
VariantContext vc = VariantContextAdaptors.vcf2VariantContext(vcfRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);
@ -37,6 +49,8 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
return n;
}
return 0;
}
}

View File

@ -2,10 +2,8 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.gatk.refdata.*;
import java.util.*;
import org.apache.commons.jexl.*;
/**
@ -22,26 +20,9 @@ public class VariantContext extends AttributedObject {
private Map<String, Genotype> genotypes = new HashMap<String, Genotype>();
Type type = null;
Type type = Type.UNDETERMINED;
// todo -- add QUAL and FILTER
//private double negLog10PError = 0.0; // todo - fixme
// public VariantContext(VariationRod rod) {
//
// // TODO -- VariationRod should eventually require classes to implement toVariationContext()
// // TODO -- (instead of using a temporary adapter class)
//
// loc = rod.getLocation();
// reference = new Allele(Allele.AlleleType.REFERENCE, rod.getReference());
//
// // TODO -- populate the alleles and genotypes through an adapter
// alleles = new HashSet<Allele>();
// genotypes = new HashSet<Genotype>();
//
// attributes = new HashMap<Object, Object>();
// }
// todo -- add FILTER
// ---------------------------------------------------------------------------------------------------------
//
@ -166,7 +147,10 @@ public class VariantContext extends AttributedObject {
NO_VARIATION,
SNP,
INDEL,
MIXED
MIXED,
// special types
UNDETERMINED // do not use this value, it is reserved for the VariantContext itself
}
/**
@ -175,7 +159,7 @@ public class VariantContext extends AttributedObject {
* @return the AlleleType of this allele
**/
public Type getType() {
if ( type == null )
if ( type == Type.UNDETERMINED )
determineType();
return type;
@ -202,9 +186,19 @@ public class VariantContext extends AttributedObject {
*/
public boolean isIndel() { return getType() == Type.INDEL; }
// todo -- implement, looking at reference allele
//public boolean isInsertion() { return getType() == Type.INDEL; }
//public boolean isDeletion() { return getType() == Type.INDEL; }
/**
* @return true if the alleles indicate a simple insertion (i.e., the reference allele is Null)
*/
public boolean isInsertion() {
return getType() == Type.INDEL && getReference().isNull();
}
/**
* @return true if the alleles indicate a simple deletion (i.e., a single alt allele that is Null)
*/
public boolean isDeletion() {
return getType() == Type.INDEL && ! isInsertion();
}
/**
* convenience method for indels
@ -248,7 +242,6 @@ public class VariantContext extends AttributedObject {
return null;
}
/**
* @return true if the context is strictly bi-allelic
*/
@ -260,6 +253,48 @@ public class VariantContext extends AttributedObject {
return alleles.size();
}
public Allele getAllele(String allele) {
return getAllele(allele.getBytes());
}
public Allele getAllele(byte[] allele) {
for ( Allele a : getAlleles() ) {
if ( a.basesMatch(allele) ) {
return a;
}
}
return null; // couldn't find anything
}
public boolean hasAllele(Allele allele) {
for ( Allele a : getAlleles() ) {
if ( a.equals(allele) )
return true;
}
return false;
}
// public int getMaxAlleleLength() {
// int max = 0;
//
// for ( Allele a : getAlleles() ) {
// max = a.length() > max ? a.length() : max;
// }
//
// return max;
// }
//
// public int getMinAlleleLength() {
// int min = Integer.MAX_VALUE;
//
// for ( Allele a : getAlleles() ) {
// min = a.length() < min ? a.length() : min;
// }
//
// return min;
// }
/**
* Gets the alleles. This method should return all of the alleles present at the location,
@ -287,15 +322,15 @@ public class VariantContext extends AttributedObject {
return altAlleles;
}
public Allele getAlternateAllele(int count) {
public Allele getAlternateAllele(int i) {
int n = 0;
for ( Allele allele : alleles ) {
if ( allele.isNonReference() && n++ == count )
if ( allele.isNonReference() && n++ == i )
return allele;
}
throw new IllegalArgumentException("Requested " + count + " alternative allele but there are only " + n + " alternative alleles " + this);
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
}
@ -310,6 +345,8 @@ public class VariantContext extends AttributedObject {
}
public void addAllele(Allele allele, boolean allowDuplicates) {
type = Type.UNDETERMINED;
for ( Allele a : alleles ) {
if ( a.basesMatch(allele) && ! allowDuplicates )
throw new IllegalArgumentException("Duplicate allele added to VariantContext" + this);
@ -349,8 +386,13 @@ public class VariantContext extends AttributedObject {
* @return
*/
public int getChromosomeCount() {
// todo -- return the number of ! no_call alleles
return 0;
int n = 0;
for ( Genotype g : getGenotypes().values() ) {
n += g.isNoCall() ? 0 : g.getPloidy();
}
return n;
}
/**
@ -360,8 +402,13 @@ public class VariantContext extends AttributedObject {
* @return
*/
public int getChromosomeCount(Allele a) {
// todo -- walk through genotypes and count genotypes with allele
return 0;
int n = 0;
for ( Genotype g : getGenotypes().values() ) {
n += g.getAlleles(a).size();
}
return n;
}
/**
@ -400,7 +447,7 @@ public class VariantContext extends AttributedObject {
this.genotypes.clear();
for ( Genotype g : genotypes ) {
addGenotype(g.getSample(), g);
addGenotype(g.getSampleName(), g);
}
}
@ -412,11 +459,21 @@ public class VariantContext extends AttributedObject {
}
}
public void addGenotype(Genotype genotype) {
addGenotype(genotype.getSample(), genotype, false);
public void addGenotypes(Map<String, Genotype> genotypes) {
for ( Map.Entry<String, Genotype> g : genotypes.entrySet() )
addGenotype(g.getKey(), g.getValue());
}
public void addGenotypes(Collection<Genotype> genotypes) {
for ( Genotype g : genotypes )
addGenotype(g);
}
public void addGenotype(Genotype genotype) {
addGenotype(genotype.getSampleName(), genotype, false);
}
public void addGenotype(String sampleName, Genotype genotype) {
addGenotype(sampleName, genotype, false);
}
@ -425,29 +482,23 @@ public class VariantContext extends AttributedObject {
if ( hasGenotype(sampleName) && ! allowOverwrites )
throw new StingException("Attempting to overwrite sample->genotype binding: " + sampleName + " this=" + this);
if ( ! sampleName.equals(genotype.getSample()) )
if ( ! sampleName.equals(genotype.getSampleName()) )
throw new StingException("Sample name doesn't equal genotype.getSample(): " + sampleName + " genotype=" + genotype);
this.genotypes.put(sampleName, genotype);
}
public void removeGenotype(String sampleName) {
if ( ! this.genotypes.containsKey(sampleName) )
throw new IllegalArgumentException("Sample name isn't contained in genotypes " + sampleName + " genotypes =" + genotypes);
this.genotypes.remove(sampleName);
}
public void removeGenotype(Genotype genotype) {
removeGenotype(genotype.getSample());
removeGenotype(genotype.getSampleName());
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with attributes
//
// ---------------------------------------------------------------------------------------------------------
// todo -- define common attributes as enum
// ---------------------------------------------------------------------------------------------------------
//
// validation
@ -464,27 +515,9 @@ public class VariantContext extends AttributedObject {
}
public boolean validate(boolean throwException) {
// todo -- add extensive testing here
// todo -- check that exactly one allele is tagged as reference
// todo -- check that there's only one null allele
try {
// check alleles
boolean alreadySeenRef = false, alreadySeenNull = false;
for ( Allele allele : alleles ) {
if ( allele.isReference() ) {
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
alreadySeenRef = true;
}
if ( allele.isNullAllele() ) {
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
alreadySeenNull = true;
}
}
if ( ! alreadySeenRef )
throw new IllegalArgumentException("No reference allele found in VariantContext");
validateAlleles();
validateGenotypes();
} catch ( IllegalArgumentException e ) {
if ( throwException )
throw e;
@ -495,6 +528,58 @@ public class VariantContext extends AttributedObject {
return true;
}
private void validateAlleles() {
// check alleles
boolean alreadySeenRef = false, alreadySeenNull = false;
for ( Allele allele : alleles ) {
// make sure there's only one reference allele
if ( allele.isReference() ) {
if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this);
alreadySeenRef = true;
}
if ( allele.isNoCall() ) {
throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this);
}
// make sure there's only one null allele
if ( allele.isNull() ) {
if ( alreadySeenNull ) throw new IllegalArgumentException("BUG: Received two null alleles in VariantContext " + alleles + " this=" + this);
alreadySeenNull = true;
}
}
// make sure there's one reference allele
if ( ! alreadySeenRef )
throw new IllegalArgumentException("No reference allele found in VariantContext");
if ( getType() == Type.INDEL ) {
// if ( getReference().length() != (getLocation().size()-1) ) {
if ( (getReference().isNull() && getLocation().size() != 1 ) ||
(getReference().isNonNull() && getReference().length() != getLocation().size()) ) {
throw new IllegalStateException("BUG: GenomeLoc " + getLocation() + " has a size == " + getLocation().size() + " but the variation reference allele has length " + getReference().length() + " this = " + this);
}
}
}
private void validateGenotypes() {
if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null");
for ( Map.Entry<String, Genotype> elt : this.genotypes.entrySet() ) {
String name = elt.getKey();
Genotype g = elt.getValue();
if ( ! name.equals(g.getSampleName()) ) throw new IllegalStateException("Bound sample name " + name + " does not equal the name of the genotype " + g.getSampleName());
for ( Allele gAllele : g.getAlleles() ) {
if ( ! hasAllele(gAllele) && gAllele.isCalled() )
throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles);
}
}
}
// ---------------------------------------------------------------------------------------------------------
//
// utility routines
@ -502,7 +587,7 @@ public class VariantContext extends AttributedObject {
// ---------------------------------------------------------------------------------------------------------
private void determineType() {
if ( type == null ) {
if ( type == Type.UNDETERMINED ) {
if ( alleles.size() == 0 ) {
throw new StingException("Unexpected requested type of VariantContext with no alleles!" + this);
} else if ( alleles.size() == 1 ) {
@ -540,53 +625,30 @@ public class VariantContext extends AttributedObject {
Allele a2 = it.next();
return a1.length() != a2.length();
}
public String toString() {
return String.format("[VC @ %s of type=%s alleles=%s attr=%s GT=%s",
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes());
getLocation(), this.getType(), this.getAlleles(), this.getAttributes(), this.getGenotypes().values());
}
/**
* @return true if the context represents point alleles only (i.e. no indels or structural variants)
*/
// public boolean isPointAllele() {
// for ( Allele allele : alleles ) {
// if ( allele.isVariant() && !allele.isSNP() )
// return false;
// }
// return true;
// }
//
// /**
// * @return set of all subclasses within this context
// */
// public Set<Object> getSubclasses() {
// Set<Object> subclasses = new HashSet<Object>();
// for ( Genotype g : genotypes )
// subclasses.addAll(g.getAttributes().keySet());
// return subclasses;
// }
// todo -- move to utils
/**
* @param allele the allele to be queried
*
* @return the frequency of the given allele in this context
*/
public double getAlleleFrequency(Allele allele) {
int alleleCount = 0;
int totalCount = 0;
for ( Genotype g : getGenotypes().values() ) {
for ( Allele a : g.getAlleles() ) {
totalCount++;
if ( allele.equals(a) )
alleleCount++;
}
}
return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
}
// public double getAlleleFrequency(Allele allele) {
// int alleleCount = 0;
// int totalCount = 0;
//
// for ( Genotype g : getGenotypes().values() ) {
// for ( Allele a : g.getAlleles() ) {
// totalCount++;
// if ( allele.equals(a) )
// alleleCount++;
// }
// }
//
// return totalCount == 0 ? 0.0 : (double)alleleCount / (double)totalCount;
// }
}

View File

@ -1,24 +1,86 @@
package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.*;
public class VariantContextAdaptors {
public static VariantContext dbsnp2VariantContext(rodDbSNP dbsnp) {
VariantContext vc = new VariantContext(dbsnp.getLocation());
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
VariantContext vc = new VariantContext(dbsnp.getLocation());
// add the reference allele
Allele refAllele = new Allele(dbsnp.getReference(), true);
vc.addAllele(refAllele);
// add the reference allele
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
// add all of the alt alleles
for ( String alt : dbsnp.getAlternateAlleleList() )
vc.addAllele(new Allele(alt, false));
Allele refAllele = new Allele(dbsnp.getReference(), true);
vc.addAllele(refAllele);
return vc;
// add all of the alt alleles
for ( String alt : dbsnp.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
vc.addAllele(new Allele(alt, false));
}
vc.validate();
return vc;
} else
return null; // can't handle anything else
}
public static VariantContext vcf2VariantContext(RodVCF vcf) {
if ( vcf.isSNP() || vcf.isIndel() ) {
VariantContext vc = new VariantContext(vcf.getLocation());
// add the reference allele
if ( ! Allele.acceptableAlleleBases(vcf.getReference()) ) {
System.out.printf("Excluding vcf record %s%n", vcf);
return null;
}
Allele refAllele = new Allele(vcf.getReference(), true);
vc.addAllele(refAllele);
vc.setNegLog10PError(vcf.getNegLog10PError());
vc.setAttributes(vcf.getInfoValues());
vc.putAttribute("ID", vcf.getID());
vc.putAttribute("FILTER", vcf.getFilterString());
// add all of the alt alleles
for ( String alt : vcf.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
System.out.printf("Excluding vcf record %s%n", vcf);
return null;
}
vc.addAllele(new Allele(alt, false));
}
for ( VCFGenotypeRecord vcfG : vcf.getVCFGenotypeRecords() ) {
List<String> alleleStrings = new ArrayList<String>();
for ( VCFGenotypeEncoding s : vcfG.getAlleles() )
alleleStrings.add(s.getBases());
double pError = vcfG.getNegLog10PError() == VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY ? AttributedObject.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError();
Genotype g = new Genotype(vc, alleleStrings, vcfG.getSampleName(), pError);
for ( Map.Entry<String, String> e : vcfG.getFields().entrySet() ) {
if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) )
g.putAttribute(e.getKey(), e.getValue());
}
vc.addGenotype(g);
}
vc.validate();
return vc;
} else
return null; // can't handle anything else
}
}

View File

@ -35,9 +35,9 @@ public class VariantContextUtils {
Set<Genotype> Gs = new HashSet<Genotype>(left.getGenotypes().values());
for ( Genotype g : other.getGenotypes().values() ) {
if ( samples.contains(g.getSample()) ) {
if ( samples.contains(g.getSampleName()) ) {
if ( uniquifySamples )
g.setSample(g.getSample() + UNIQUIFIED_SUFFIX);
g.setSampleName(g.getSampleName() + UNIQUIFIED_SUFFIX);
else
throw new IllegalStateException("The same sample name exists in both contexts when attempting to merge");
}

View File

@ -27,7 +27,7 @@ import org.junit.Test;
* Basic unit test for RecalData
*/
public class AlleleTest extends BaseTest {
Allele ARef, del, delRef, A, T, ATIns, ATCIns;
Allele ARef, del, delRef, A, T, ATIns, ATCIns, NoCall;
@Before
public void before() {
@ -40,6 +40,8 @@ public class AlleleTest extends BaseTest {
ATIns = new Allele("AT");
ATCIns = new Allele("ATC");
NoCall = new Allele(".");
}
@Test
@ -50,8 +52,8 @@ public class AlleleTest extends BaseTest {
Assert.assertFalse(A.isReference());
Assert.assertTrue(A.basesMatch("A"));
Assert.assertEquals(A.length(), 1);
Assert.assertTrue(A.isNonNullAllele());
Assert.assertFalse(A.isNullAllele());
Assert.assertTrue(A.isNonNull());
Assert.assertFalse(A.isNull());
Assert.assertTrue(ARef.isReference());
Assert.assertFalse(ARef.isNonReference());
@ -64,6 +66,19 @@ public class AlleleTest extends BaseTest {
Assert.assertFalse(T.basesMatch("A"));
}
@Test
public void testCreatingNoCallAlleles() {
logger.warn("testCreatingNoCallAlleles");
Assert.assertTrue(NoCall.isNonReference());
Assert.assertFalse(NoCall.isReference());
Assert.assertFalse(NoCall.basesMatch("."));
Assert.assertEquals(NoCall.length(), 0);
Assert.assertTrue(NoCall.isNonNull());
Assert.assertFalse(NoCall.isNull());
}
@Test
public void testCreatingIndelAlleles() {
logger.warn("testCreatingIndelAlleles");
@ -80,8 +95,8 @@ public class AlleleTest extends BaseTest {
Assert.assertFalse(del.basesMatch("-"));
Assert.assertTrue(del.basesMatch(""));
Assert.assertEquals(del.length(), 0);
Assert.assertFalse(del.isNonNullAllele());
Assert.assertTrue(del.isNullAllele());
Assert.assertFalse(del.isNonNull());
Assert.assertTrue(del.isNull());
}

View File

@ -13,9 +13,7 @@ import org.junit.Before;
import org.junit.Test;
import org.junit.BeforeClass;
import java.util.Map;
import java.util.Arrays;
import java.util.Set;
import java.util.List;
import java.io.FileNotFoundException;
import java.io.File;
@ -42,13 +40,13 @@ public class VariantContextTest extends BaseTest {
GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 11);
// - / ATC [ref] from 20-23
GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 23);
GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
// - [ref] / ATC from 20-20
GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
// - / A / T / ATC [ref] from 20-23
GenomeLoc mixedLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 23);
GenomeLoc mixedLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
@Before
public void before() {
@ -64,8 +62,6 @@ public class VariantContextTest extends BaseTest {
ATCref = new Allele("ATC", true);
}
// todo -- create reference context
@Test
public void testCreatingSNPVariantContext() {
logger.warn("testCreatingSNPVariantContext");
@ -78,8 +74,8 @@ public class VariantContextTest extends BaseTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
Assert.assertTrue(vc.isSNP());
Assert.assertFalse(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
@ -106,8 +102,8 @@ public class VariantContextTest extends BaseTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);
Assert.assertFalse(vc.isSNP());
Assert.assertFalse(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertFalse(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 1);
@ -133,8 +129,8 @@ public class VariantContextTest extends BaseTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isInsertion());
Assert.assertTrue(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
@ -161,8 +157,8 @@ public class VariantContextTest extends BaseTest {
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
Assert.assertFalse(vc.isSNP());
Assert.assertTrue(vc.isIndel());
//Assert.assertFalse(vc.isInsertion());
//Assert.assertFalse(vc.isDeletion());
Assert.assertTrue(vc.isInsertion());
Assert.assertFalse(vc.isDeletion());
Assert.assertFalse(vc.isMixed());
Assert.assertTrue(vc.isBiallelic());
Assert.assertEquals(vc.getNAlleles(), 2);
@ -206,8 +202,140 @@ public class VariantContextTest extends BaseTest {
logger.warn("testBadConstructorArgsDuplicateAlleles2");
new VariantContext(insLoc, Arrays.asList(Aref, A));
}
@Test
public void testAccessingSimpleSNPGenotypes() {
logger.warn("testAccessingSimpleSNPGenotypes");
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, T), "AT", 10);
Genotype g3 = new Genotype(Arrays.asList(T, T), "TT", 10);
vc.addGenotypes(Arrays.asList(g1, g2, g3));
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertEquals(vc.getSampleNames().size(), 3);
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(vc.getGenotypes().get("AA"), g1);
Assert.assertEquals(vc.getGenotype("AA"), g1);
Assert.assertEquals(vc.getGenotypes().get("AT"), g2);
Assert.assertEquals(vc.getGenotype("AT"), g2);
Assert.assertEquals(vc.getGenotypes().get("TT"), g3);
Assert.assertEquals(vc.getGenotype("TT"), g3);
Assert.assertTrue(vc.hasGenotype("AA"));
Assert.assertTrue(vc.hasGenotype("AT"));
Assert.assertTrue(vc.hasGenotype("TT"));
Assert.assertFalse(vc.hasGenotype("foo"));
Assert.assertFalse(vc.hasGenotype("TTT"));
Assert.assertFalse(vc.hasGenotype("at"));
Assert.assertFalse(vc.hasGenotype("tt"));
Assert.assertEquals(vc.getChromosomeCount(), 6);
Assert.assertEquals(vc.getChromosomeCount(Aref), 3);
Assert.assertEquals(vc.getChromosomeCount(T), 3);
}
@Test
public void testAccessingCompleteGenotypes() {
logger.warn("testAccessingCompleteGenotypes");
List<Allele> alleles = Arrays.asList(Aref, T, del);
VariantContext vc = new VariantContext(delLoc, alleles);
logger.warn("vc = " + vc);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, T), "AT", 10);
Genotype g3 = new Genotype(Arrays.asList(T, T), "TT", 10);
Genotype g4 = new Genotype(Arrays.asList(T, del), "Td", 10);
Genotype g5 = new Genotype(Arrays.asList(del, del), "dd", 10);
Genotype g6 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "..", 10);
vc.addGenotypes(Arrays.asList(g1, g2, g3, g4, g5, g6));
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertEquals(vc.getGenotypes().size(), 6);
Assert.assertEquals(10, vc.getChromosomeCount());
Assert.assertEquals(3, vc.getChromosomeCount(Aref));
Assert.assertEquals(4, vc.getChromosomeCount(T));
Assert.assertEquals(3, vc.getChromosomeCount(del));
Assert.assertEquals(2, vc.getChromosomeCount(Allele.NO_CALL));
}
@Test
public void testAccessingRefGenotypes() {
logger.warn("testAccessingRefGenotypes");
List<Allele> alleles1 = Arrays.asList(Aref, T);
List<Allele> alleles2 = Arrays.asList(Aref);
List<Allele> alleles3 = Arrays.asList(Aref, T, del);
for ( List<Allele> alleles : Arrays.asList(alleles1, alleles2, alleles3)) {
VariantContext vc = new VariantContext(snpLoc, alleles);
logger.warn("vc = " + vc);
Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "AA1", 10);
Genotype g2 = new Genotype(Arrays.asList(Aref, Aref), "AA2", 10);
Genotype g3 = new Genotype(Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), "..", 10);
vc.addGenotypes(Arrays.asList(g1, g2, g3));
Assert.assertTrue(vc.hasGenotypes());
Assert.assertTrue(vc.isMonomorphic());
Assert.assertFalse(vc.isPolymorphic());
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(4, vc.getChromosomeCount());
Assert.assertEquals(4, vc.getChromosomeCount(Aref));
Assert.assertEquals(0, vc.getChromosomeCount(T));
Assert.assertEquals(2, vc.getChromosomeCount(Allele.NO_CALL));
}
}
@Test
public void testManipulatingAlleles() {
logger.warn("testManipulatingAlleles");
// todo -- add tests that call add/set/remove
}
@Test
public void testManipulatingGenotypes() {
logger.warn("testManipulatingGenotypes");
// todo -- add tests that call add/set/remove
}
}
// genotype functions
// public boolean hasGenotypes() { return genotypes.size() > 0; }
// public Map<String, Genotype> getGenotypes() { return genotypes; }
// public Set<String> getSampleNames() {
// public int getChromosomeCount() {
// public int getChromosomeCount(Allele a) {
// public boolean isMonomorphic() {
// public boolean isPolymorphic() {
// public Genotype getGenotype(String sample) {
// public boolean hasGenotype(String sample) {
// public void setGenotypes(Genotype genotype) {
// public void setGenotypes(Collection<Genotype> genotypes) {
// public void setGenotypes(Map<String, Genotype> genotypes) {
// public void addGenotype(Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
// public void removeGenotype(String sampleName) {
// public void removeGenotype(Genotype genotype) {
// all functions
// public Type getType() {
// public boolean isSNP() { return getType() == Type.SNP; }
// public boolean isVariant() { return getType() != Type.NO_VARIATION; }
@ -225,17 +353,5 @@ public class VariantContextTest extends BaseTest {
// public void setAlleles(Set<Allele> alleles) {
// public void addAllele(Allele allele) {
// public void addAllele(Allele allele, boolean allowDuplicates) {
// public boolean hasGenotypes() { return genotypes.size() > 0; }
// public Map<String, Genotype> getGenotypes() { return genotypes; }
// public Set<String> getSampleNames() {
// public Genotype getGenotype(String sample) {
// public boolean hasGenotype(String sample) {
// public void setGenotypes(Genotype genotype) {
// public void setGenotypes(Collection<Genotype> genotypes) {
// public void setGenotypes(Map<String, Genotype> genotypes) {
// public void addGenotype(Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype) {
// public void addGenotype(String sampleName, Genotype genotype, boolean allowOverwrites) {
// public void removeGenotype(String sampleName) {
// public void removeGenotype(Genotype genotype) {
// public boolean validate() {