Documentation. Plus nicer structure to adaptors. Intermediate checkin before move into core

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2783 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-02-05 13:33:27 +00:00
parent e53432d54d
commit 69132c81aa
8 changed files with 229 additions and 103 deletions

View File

@ -206,7 +206,6 @@ public class Allele {
* @return the segregating bases
*/
public byte[] getBases() { return bases; }
// todo -- can we make this immutable?
/**
* @param other the other allele

View File

@ -18,7 +18,6 @@ public class MutableVariantContext extends VariantContext {
//
// ---------------------------------------------------------------------------------------------------------
// todo -- add remaining context constructors
public MutableVariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
super(name, loc, alleles, genotypes, negLog10PError, filters, attributes);
}

View File

@ -15,40 +15,41 @@ public class TestVariantContextWalker extends RodWalker<Integer, Integer> {
if ( ref == null )
return 0;
else {
RODRecordList<ReferenceOrderedDatum> dbsnpList = tracker.getTrackData("dbsnp", null);
if (dbsnpList != null) {
// do dbSNP conversion
int n = 0;
for (ReferenceOrderedDatum d : dbsnpList) {
rodDbSNP dbsnpRecord = (rodDbSNP)d;
if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
VariantContext vc = VariantContextAdaptors.convertToVariantContext("dbsnp", dbsnpRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);
}
}
}
return n;
}
RODRecordList<ReferenceOrderedDatum> vcfList = tracker.getTrackData("vcf", null);
if (vcfList != null) {
// do vcf conversion
int n = 0;
for (ReferenceOrderedDatum d : vcfList) {
RodVCF vcfRecord = (RodVCF)d;
VariantContext vc = VariantContextAdaptors.convertToVariantContext("vcf", vcfRecord);
if ( vc != null ) {
n++;
System.out.printf("%s%n", vc);
}
}
return n;
}
// todo -- this should just invoke the new RefMetaDataConverter, and print out all of the info
// RODRecordList<ReferenceOrderedDatum> dbsnpList = tracker.getTrackData("dbsnp", null);
//
// if (dbsnpList != null) {
// // do dbSNP conversion
// int n = 0;
// for (ReferenceOrderedDatum d : dbsnpList) {
// rodDbSNP dbsnpRecord = (rodDbSNP)d;
// if ( dbsnpRecord.getLocation().getStart() == context.getLocation().getStart() ) {
// VariantContext vc = VariantContextAdaptors.convertToVariantContext("dbsnp", dbsnpRecord);
// if ( vc != null ) {
// n++;
// System.out.printf("%s%n", vc);
// }
// }
// }
//
// return n;
// }
//
// RODRecordList<ReferenceOrderedDatum> vcfList = tracker.getTrackData("vcf", null);
// if (vcfList != null) {
// // do vcf conversion
// int n = 0;
// for (ReferenceOrderedDatum d : vcfList) {
// RodVCF vcfRecord = (RodVCF)d;
// VariantContext vc = VariantContextAdaptors.convertToVariantContext("vcf", vcfRecord);
// if ( vc != null ) {
// n++;
// System.out.printf("%s%n", vc);
// }
// }
//
// return n;
// }
return 0;
}

View File

@ -34,6 +34,9 @@ import java.util.*;
* It's also easy to create subcontexts based on selected genotypes.
*
* == Working with Variant Contexts ==
* By default, VariantContexts are immutable. In order to access (in the rare circumstances where you need them)
* setter routines, you need to create MutableVariantContexts and MutableGenotypes.
*
* === Some example data ===
*
* Allele A, Aref, T, Tref;
@ -71,19 +74,19 @@ import java.util.*;
* Here's an example of a A/T polymorphism with the A being reference:
*
* <pre>
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref, T));
* VariantContext vc = new VariantContext(name, snpLoc, Arrays.asList(Aref, T));
* </pre>
*
* If you want to create a non-variant site, just put in a single reference allele
*
* <pre>
* VariantContext vc = new VariantContext(snpLoc, Arrays.asList(Aref));
* VariantContext vc = new VariantContext(name, snpLoc, Arrays.asList(Aref));
* </pre>
*
* A deletion is just as easy:
*
* <pre>
* VariantContext vc = new VariantContext(delLoc, Arrays.asList(ATCref, del));
* VariantContext vc = new VariantContext(name, delLoc, Arrays.asList(ATCref, del));
* </pre>
*
* The only 2 things that distinguishes between a insertion and deletion are the reference allele
@ -92,7 +95,7 @@ import java.util.*;
* a 1-bp GenomeLoc (at say 20).
*
* <pre>
* VariantContext vc = new VariantContext(insLoc, Arrays.asList(delRef, ATC));
* VariantContext vc = new VariantContext("name", insLoc, Arrays.asList(delRef, ATC));
* </pre>
*
* ==== Converting rods and other data structures to VCs ====
@ -100,7 +103,7 @@ import java.util.*;
* You can convert many common types into VariantContexts using the general function:
*
* <pre>
* VariantContextAdaptors.convertToVariantContext(myObject)
* VariantContextAdaptors.convertToVariantContext(name, myObject)
* </pre>
*
* dbSNP and VCFs, for example, can be passed in as myObject and a VariantContext corresponding to that
@ -112,12 +115,10 @@ import java.util.*;
*
* <pre>
* List<Allele> alleles = Arrays.asList(Aref, T);
* VariantContext vc = new VariantContext(snpLoc, alleles);
*
* Genotype g1 = new Genotype(Arrays.asList(Aref, Aref), "g1", 10);
* Genotype g2 = new Genotype(Arrays.asList(Aref, T), "g2", 10);
* Genotype g3 = new Genotype(Arrays.asList(T, T), "g3", 10);
* vc.addGenotypes(Arrays.asList(g1, g2, g3));
* VariantContext vc = new VariantContext(snpLoc, alleles, Arrays.asList(g1, g2, g3));
* </pre>
*
* At this point we have 3 genotypes in our context, g1-g3.
@ -200,19 +201,50 @@ public class VariantContext {
validate();
}
/**
* Create a new VariantContext
*
* @param name
* @param loc
* @param alleles
* @param genotypes
* @param negLog10PError
* @param filters
* @param attributes
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
this(name, loc, alleles, genotypeCollectionToMap(new HashMap<String, Genotype>(), genotypes), negLog10PError, filters, attributes);
}
/**
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
* @param name
* @param loc
* @param alleles
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles) {
this(name, loc, alleles, NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
}
/**
* Create a new variant context without genotypes and no Perror, no filters, and no attributes
* @param name
* @param loc
* @param alleles
*/
public VariantContext(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
this(name, loc, alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
}
// todo -- add clone method
/**
* Copy constructor
*
* @param other the VariantContext to copy
*/
public VariantContext(VariantContext other) {
this(other.getName(), other.getLocation(), other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.getFilters(), other.getAttributes());
}
// ---------------------------------------------------------------------------------------------------------
//

View File

@ -2,60 +2,116 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RodVCF;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding;
import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord;
import java.util.*;
/**
* A terrible but temporary approach to converting objects to VariantContexts. If you want to add a converter,
* you need to create a adaptor object here and register a converter from your class to this object. When tribble arrives,
* we'll use a better approach.
*
* @author depristo@broadinstitute.org
*/
public class VariantContextAdaptors {
public static boolean canBeConvertedToVariantContext(String name, Object variantContainingObject) {
return convertToVariantContext(name, variantContainingObject) != null;
// --------------------------------------------------------------------------------------------------------------
//
// Generic support routines. Do not modify
//
// --------------------------------------------------------------------------------------------------------------
private static Map<Class, VCAdaptor> adaptors = new HashMap<Class, VCAdaptor>();
static {
adaptors.put(rodDbSNP.class, new RodDBSnpAdaptor());
adaptors.put(RodVCF.class, new RodVCFAdaptor());
adaptors.put(VCFRecord.class, new VCFRecordAdaptor());
}
public static VariantContext convertToVariantContext(String name, Object variantContainingObject) {
if ( variantContainingObject instanceof rodDbSNP )
return dbsnpToVariantContext(name, (rodDbSNP)variantContainingObject);
else if ( variantContainingObject instanceof RodVCF )
return vcfToVariantContext(name, ((RodVCF)variantContainingObject).getRecord());
else if ( variantContainingObject instanceof VCFRecord )
return vcfToVariantContext(name, (VCFRecord)variantContainingObject);
else
public static boolean canBeConvertedToVariantContext(Object variantContainingObject) {
return adaptors.containsKey(variantContainingObject.getClass());
// return convertToVariantContext(name, variantContainingObject) != null;
}
/** generic superclass */
private static abstract class VCAdaptor {
abstract VariantContext convert(String name, Object input);
}
public static VariantContext toVariantContext(String name, Object variantContainingObject) {
if ( ! adaptors.containsKey(variantContainingObject.getClass()) )
return null;
//throw new IllegalArgumentException("Cannot convert object " + variantContainingObject + " of class " + variantContainingObject.getClass() + " to a variant context");
else {
return adaptors.get(variantContainingObject.getClass()).convert(name, variantContainingObject);
}
}
private static VariantContext dbsnpToVariantContext(String name, rodDbSNP dbsnp) {
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
// add the reference allele
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
// --------------------------------------------------------------------------------------------------------------
//
// From here below you can add adaptor classes for new rods (or other types) to convert to VCF
//
// --------------------------------------------------------------------------------------------------------------
List<Allele> alleles = new ArrayList<Allele>();
Allele refAllele = new Allele(dbsnp.getReference(), true);
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : dbsnp.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
// --------------------------------------------------------------------------------------------------------------
//
// dbSNP to VariantContext
//
// --------------------------------------------------------------------------------------------------------------
private static class RodDBSnpAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
rodDbSNP dbsnp = (rodDbSNP)input;
if ( dbsnp.isSNP() || dbsnp.isIndel() || dbsnp.varType.contains("mixed") ) {
// add the reference allele
if ( ! Allele.acceptableAlleleBases(dbsnp.getReference()) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
alleles.add(new Allele(alt, false));
}
VariantContext vc = new VariantContext(name, dbsnp.getLocation(), alleles);
vc.validate();
return vc;
} else
return null; // can't handle anything else
List<Allele> alleles = new ArrayList<Allele>();
Allele refAllele = new Allele(dbsnp.getReference(), true);
alleles.add(refAllele);
// add all of the alt alleles
for ( String alt : dbsnp.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
}
alleles.add(new Allele(alt, false));
}
VariantContext vc = new VariantContext(name, dbsnp.getLocation(), alleles);
vc.validate();
return vc;
} else
return null; // can't handle anything else
}
}
// --------------------------------------------------------------------------------------------------------------
//
// VCF to VariantContext
//
// --------------------------------------------------------------------------------------------------------------
private static class RodVCFAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
return vcfToVariantContext(name, ((RodVCF)input).getRecord());
}
}
private static class VCFRecordAdaptor extends VCAdaptor {
VariantContext convert(String name, Object input) {
return vcfToVariantContext(name, (VCFRecord)input);
}
}
private static VariantContext vcfToVariantContext(String name, VCFRecord vcf) {
if ( vcf.isSNP() || vcf.isIndel() ) {
// add the reference allele
@ -74,6 +130,7 @@ public class VariantContextAdaptors {
alleles.add(refAllele);
for ( String alt : vcf.getAlternateAlleleList() ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
// todo -- cleanup
System.out.printf("Excluding vcf record %s%n", vcf);
return null;
}

View File

@ -2,25 +2,40 @@ package org.broadinstitute.sting.oneoffprojects.variantcontext;
import java.util.*;
import org.apache.commons.jexl.*;
import org.broadinstitute.sting.oneoffprojects.variantcontext.varianteval2.VariantEvaluator;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
public class VariantContextUtils {
/** */
public static class MatchExp {
/**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/
public static class JexlVCMatchExp {
public String name;
public String expStr;
public Expression exp;
public MatchExp(String name, String str, Expression exp) {
/**
* Create a new matcher expression with name and JEXL expression exp
* @param name
* @param exp
*/
public JexlVCMatchExp(String name, Expression exp) {
this.name = name;
this.exp = exp;
}
}
public static List<MatchExp> initializeMatchExps(String[] names, String[] exps) {
/**
* Method for creating JexlVCMatchExp from input walker arguments names and exps. These two arrays contain
* the name associated with each JEXL expression. initializeMatchExps will parse each expression and return
* a list of JexlVCMatchExp, in order, that correspond to the names and exps. These are suitable input to
* match() below.
*
* @param names
* @param exps
* @return
*/
public static List<JexlVCMatchExp> initializeMatchExps(String[] names, String[] exps) {
if ( names == null || exps == null )
throw new StingException("BUG: neither names nor exps can be null: names " + names + " exps=" + exps );
@ -33,8 +48,17 @@ public class VariantContextUtils {
return VariantContextUtils.initializeMatchExps(map);
}
public static List<MatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
List<MatchExp> exps = new ArrayList<MatchExp>();
/**
* Method for creating JexlVCMatchExp from input walker arguments mapping from names to exps. These two arrays contain
* the name associated with each JEXL expression. initializeMatchExps will parse each expression and return
* a list of JexlVCMatchExp, in order, that correspond to the names and exps. These are suitable input to
* match() below.
*
* @param names_and_exps
* @return
*/
public static List<JexlVCMatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
List<JexlVCMatchExp> exps = new ArrayList<JexlVCMatchExp>();
for ( Map.Entry<String, String> elt : names_and_exps.entrySet() ) {
String name = elt.getKey();
@ -42,8 +66,8 @@ public class VariantContextUtils {
if ( name == null || expStr == null ) throw new IllegalArgumentException("Cannot create null expressions : " + name + " " + expStr);
try {
Expression filterExpression = ExpressionFactory.createExpression(expStr);
exps.add(new MatchExp(name, expStr, filterExpression));
Expression exp = ExpressionFactory.createExpression(expStr);
exps.add(new JexlVCMatchExp(name, exp));
} catch (Exception e) {
throw new StingException("Invalid expression used (" + expStr + "). Please see the JEXL docs for correct syntax.");
}
@ -52,11 +76,27 @@ public class VariantContextUtils {
return exps;
}
public static boolean match(VariantContext vc, MatchExp exp) {
/**
* Returns true if exp match VC. See collection<> version for full docs.
* @param vc
* @param exp
* @return
*/
public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
return match(vc,Arrays.asList(exp)).get(exp);
}
public static Map<MatchExp, Boolean> match(VariantContext vc, Collection<MatchExp> exps) {
/**
* Matches each JexlVCMatchExp exp against the data contained in vc, and returns a map from these
* expressions to true (if they matched) or false (if they didn't). This the best way to apply JEXL
* expressions to VariantContext records. Use initializeMatchExps() to create the list of JexlVCMatchExp
* expressions.
*
* @param vc
* @param exps
* @return
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Collection<JexlVCMatchExp> exps) {
// todo -- actually, we should implement a JEXL context interface to VariantContext,
// todo -- which just looks up the values assigned statically here. Much better approach
@ -93,8 +133,8 @@ public class VariantContextUtils {
jContext.setVars(infoMap);
try {
Map<MatchExp, Boolean> resultMap = new HashMap<MatchExp, Boolean>();
for ( MatchExp e : exps ) {
Map<JexlVCMatchExp, Boolean> resultMap = new HashMap<JexlVCMatchExp, Boolean>();
for ( JexlVCMatchExp e : exps ) {
resultMap.put(e, (Boolean)e.exp.evaluate(jContext));
}
return resultMap;
@ -110,9 +150,7 @@ public class VariantContextUtils {
}
}
private static final String UNIQUIFIED_SUFFIX = ".unique";
// private static final String UNIQUIFIED_SUFFIX = ".unique";
/**
* @param other another variant context

View File

@ -86,9 +86,9 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
private class EvaluationContext extends HashMap<String, Set<VariantEvaluator>> {
// useful for typing
public String trackName, contextName;
VariantContextUtils.MatchExp selectExp;
VariantContextUtils.JexlVCMatchExp selectExp;
public EvaluationContext(String trackName, String contextName, VariantContextUtils.MatchExp selectExp) {
public EvaluationContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
this.trackName = trackName;
this.contextName = contextName;
this.selectExp = selectExp;
@ -117,11 +117,11 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
public void initialize() {
determineAllEvalations();
List<VariantContextUtils.MatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
List<VariantContextUtils.JexlVCMatchExp> selectExps = VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS);
for ( ReferenceOrderedDataSource d : this.getToolkit().getRodDataSources() ) {
if ( d.getName().startsWith("eval") ) {
for ( VariantContextUtils.MatchExp e : selectExps ) {
for ( VariantContextUtils.JexlVCMatchExp e : selectExps ) {
addNewContext(d.getName(), d.getName() + "." + e.name, e);
}
addNewContext(d.getName(), d.getName() + ".all", null);
@ -171,7 +171,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
return evals;
}
private void addNewContext(String trackName, String contextName, VariantContextUtils.MatchExp selectExp) {
private void addNewContext(String trackName, String contextName, VariantContextUtils.JexlVCMatchExp selectExp) {
EvaluationContext group = new EvaluationContext(trackName, contextName, selectExp);
for ( String filteredName : Arrays.asList(RAW_SET_NAME, RETAINED_SET_NAME, FILTERED_SET_NAME) ) {
@ -288,7 +288,7 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
if ( rodList != null ) {
for ( ReferenceOrderedDatum rec : rodList.getRecords() ) {
if ( rec.getLocation().getStart() == context.getLocation().getStart() ) {
VariantContext vc = VariantContextAdaptors.convertToVariantContext(name, rec);
VariantContext vc = VariantContextAdaptors.toVariantContext(name, rec);
if ( vc != null ) {
return vc;
}

View File

@ -47,7 +47,7 @@ public class SNPDensity extends RefWalker<Pair<VariantContext, GenomeLoc>, SNPDe
if (vcfList != null) {
for (ReferenceOrderedDatum d : vcfList) {
RodVCF vcfRecord = (RodVCF)d;
vc = VariantContextAdaptors.convertToVariantContext("eval", vcfRecord);
vc = VariantContextAdaptors.toVariantContext("eval", vcfRecord);
break;
}
}