Initial test of write and read from MongoDB
This commit is contained in:
parent
d93a413f2e
commit
bb8a6e9b0a
|
|
@ -0,0 +1,109 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: thibault
|
||||
* Date: 3/30/12
|
||||
* Time: 4:47 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
import com.mongodb.*;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
/**
|
||||
* Inserts all of the RODs in the input data set. Data is inserted using VariantContext.toMongoDB().
|
||||
*/
|
||||
public class InsertRODsWalker extends RodWalker<Integer, Integer> {
|
||||
@Input(fullName="input", shortName = "input", doc="The input ROD which should be inserted into the DB.", required=true)
|
||||
public RodBinding<Feature> input;
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
private final static String MONGO_HOST = "gsa4.broadinstitute.org";
|
||||
private final static Integer MONGO_PORT = 43054;
|
||||
private final static String MONGO_DB_NAME = "bjorn";
|
||||
private final static String MONGO_VC_COLLECTION = "vcs";
|
||||
|
||||
protected Mongo mongo;
|
||||
protected DBCollection mongoCollection;
|
||||
|
||||
@Override
|
||||
public void initialize()
|
||||
{
|
||||
try {
|
||||
mongo = new Mongo(MONGO_HOST, MONGO_PORT);
|
||||
DB mongoDb = mongo.getDB(MONGO_DB_NAME);
|
||||
mongoCollection = mongoDb.getCollection(MONGO_VC_COLLECTION);
|
||||
|
||||
// set up indices
|
||||
mongoCollection.ensureIndex("location");
|
||||
mongoCollection.ensureIndex("sample");
|
||||
mongoCollection.ensureIndex("contig");
|
||||
mongoCollection.ensureIndex("start");
|
||||
mongoCollection.ensureIndex("stop");
|
||||
|
||||
// set up primary key
|
||||
mongoCollection.ensureIndex(new BasicDBObject("location", 1).append("sample", 1), new BasicDBObject("unique", 1));
|
||||
|
||||
}
|
||||
catch (MongoException e) {}
|
||||
catch (java.net.UnknownHostException e) {}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the number of loci processed to zero.
|
||||
*
|
||||
* @return 0
|
||||
*/
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
/**
|
||||
*
|
||||
* @param tracker the meta-data tracker
|
||||
* @param ref the reference base
|
||||
* @param context the context for the given locus
|
||||
* @return 1 if the locus was successfully processed, 0 if otherwise
|
||||
*/
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
|
||||
if ( feature instanceof VariantContext ) {
|
||||
VariantContext vc = (VariantContext) feature;
|
||||
for (BasicDBObject vcForMongo : vc.toMongoDB()) {
|
||||
mongoCollection.insert(vcForMongo);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment the number of rods processed.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @return the new number of rods processed.
|
||||
*/
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum + value;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
mongo.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import com.mongodb.*;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
|
|
@ -33,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MendelianViolation;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
|
@ -181,7 +183,8 @@ import java.util.*;
|
|||
*
|
||||
*/
|
||||
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
/**
|
||||
* A site is considered discordant if there exists some sample in the variant track that has a non-reference genotype
|
||||
|
|
@ -344,6 +347,14 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
|
||||
private Set<String> IDsToKeep = null;
|
||||
|
||||
private final static String MONGO_HOST = "gsa4.broadinstitute.org";
|
||||
private final static Integer MONGO_PORT = 43054;
|
||||
private final static String MONGO_DB_NAME = "bjorn";
|
||||
private final static String MONGO_VC_COLLECTION = "vcs";
|
||||
|
||||
protected Mongo mongo;
|
||||
protected DBCollection mongoCollection;
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
|
|
@ -443,6 +454,15 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
throw new UserException.CouldNotReadInputFile(rsIDFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
mongo = new Mongo(MONGO_HOST, MONGO_PORT);
|
||||
DB mongoDb = mongo.getDB(MONGO_DB_NAME);
|
||||
mongoCollection = mongoDb.getCollection(MONGO_VC_COLLECTION);
|
||||
}
|
||||
catch (MongoException e) {}
|
||||
catch (java.net.UnknownHostException e) {}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -458,7 +478,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
//Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
Collection<VariantContext> vcs = getMongoVariants(context.getLocation());
|
||||
|
||||
if ( vcs == null || vcs.size() == 0) {
|
||||
return 0;
|
||||
|
|
@ -531,6 +552,92 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
return 1;
|
||||
}
|
||||
|
||||
private Collection<VariantContext> getMongoVariants(GenomeLoc location) {
|
||||
String contig = location.getContig();
|
||||
long start = location.getStart();
|
||||
long stop = location.getStop();
|
||||
|
||||
ArrayList<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
|
||||
BasicDBObject query = new BasicDBObject();
|
||||
query.put("contig", contig);
|
||||
query.put("start", start);
|
||||
query.put("stop", stop);
|
||||
|
||||
DBCursor cursor = mongoCollection.find(query);
|
||||
while(cursor.hasNext()) {
|
||||
DBObject result = cursor.next();
|
||||
|
||||
String source = (String)result.get("source");
|
||||
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>();
|
||||
BasicDBObject allelesInDb = (BasicDBObject)result.get("alleles");
|
||||
for (Object alleleInDb : allelesInDb.values()) {
|
||||
String rawAllele = (String)alleleInDb;
|
||||
boolean isRef = rawAllele.contains("*");
|
||||
String allele = rawAllele.replace("*", "");
|
||||
alleles.add(Allele.create(allele, isRef));
|
||||
}
|
||||
|
||||
VariantContextBuilder builder = new VariantContextBuilder(source, contig, start, stop, alleles);
|
||||
|
||||
String id = (String)result.get("id");
|
||||
String sample = (String)result.get("sample");
|
||||
Double error = (Double)result.get("error");
|
||||
|
||||
Map<String, Object> attributes = new TreeMap<String, Object>();
|
||||
BasicDBList attrsInDb = (BasicDBList)result.get("attributes");
|
||||
for (Object attrInDb : attrsInDb) {
|
||||
BasicDBObject attrKVP = (BasicDBObject)attrInDb;
|
||||
String key = (String)attrKVP.get("key");
|
||||
Object value = attrKVP.get("value");
|
||||
attributes.put(key, value);
|
||||
}
|
||||
|
||||
Set<String> filters = new HashSet<String>();
|
||||
BasicDBObject filtersInDb = (BasicDBObject)result.get("filters");
|
||||
if (filtersInDb != null) {
|
||||
for (Object filterInDb : filtersInDb.values()) {
|
||||
filters.add((String)filterInDb);
|
||||
}
|
||||
}
|
||||
|
||||
BasicDBObject genotypeInDb = (BasicDBObject)result.get("genotype");
|
||||
Double genotypeError = (Double)genotypeInDb.get("error");
|
||||
|
||||
ArrayList<Allele> genotypeAlleles = new ArrayList<Allele>();
|
||||
BasicDBObject genotypeAllelesInDb = (BasicDBObject)genotypeInDb.get("alleles");
|
||||
for (Object alleleInDb : genotypeAllelesInDb.values()) {
|
||||
String rawAllele = (String)alleleInDb;
|
||||
boolean isRef = rawAllele.contains("*");
|
||||
String allele = rawAllele.replace("*", "");
|
||||
genotypeAlleles.add(Allele.create(allele, isRef));
|
||||
}
|
||||
|
||||
Map<String, Object> genotypeAttributes = new TreeMap<String, Object>();
|
||||
BasicDBList genotypeAttrsInDb = (BasicDBList)genotypeInDb.get("attributes");
|
||||
for (Object attrInDb : genotypeAttrsInDb) {
|
||||
BasicDBObject attrKVP = (BasicDBObject)attrInDb;
|
||||
String key = (String)attrKVP.get("key");
|
||||
Object value = attrKVP.get("value");
|
||||
genotypeAttributes.put(key, value);
|
||||
}
|
||||
|
||||
Genotype genotype = new Genotype(sample, genotypeAlleles, genotypeError);
|
||||
genotype = Genotype.modifyAttributes(genotype, genotypeAttributes);
|
||||
|
||||
builder.id(id);
|
||||
builder.log10PError(error);
|
||||
builder.genotypes(genotype);
|
||||
builder.attributes(attributes);
|
||||
builder.filters(filters);
|
||||
|
||||
vcs.add(builder.make());
|
||||
}
|
||||
|
||||
return vcs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if vc has a variant call for (at least one of) the samples.
|
||||
* @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for discordances to (e.g. HapMap)
|
||||
|
|
|
|||
|
|
@ -1,8 +1,11 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import com.mongodb.BasicDBList;
|
||||
import com.mongodb.BasicDBObject;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
@ -1218,6 +1221,86 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
this.getGenotypes());
|
||||
}
|
||||
|
||||
public List<BasicDBObject> toMongoDB() {
|
||||
List<BasicDBObject> vcDocs = new ArrayList<BasicDBObject>();
|
||||
for (Genotype genotype : this.getGenotypes()) {
|
||||
BasicDBObject vcDoc = new BasicDBObject();
|
||||
vcDoc.put("location", contig + ":" + (start - stop == 0 ? start : start + "-" + stop));
|
||||
vcDoc.put("contig", contig);
|
||||
vcDoc.put("start", start);
|
||||
vcDoc.put("stop", stop);
|
||||
vcDoc.put("id", this.getID());
|
||||
vcDoc.put("error", this.getLog10PError());
|
||||
vcDoc.put("sample", genotype.getSampleName());
|
||||
vcDoc.put("source", this.getSource());
|
||||
vcDoc.put("type", this.getType().toString());
|
||||
|
||||
Integer alleleIndex = 0;
|
||||
BasicDBObject allelesDoc = new BasicDBObject();
|
||||
for (Allele allele : this.getAlleles())
|
||||
{
|
||||
String index = alleleIndex.toString();
|
||||
allelesDoc.put(index, allele.toString());
|
||||
alleleIndex++;
|
||||
}
|
||||
vcDoc.put("alleles", allelesDoc);
|
||||
|
||||
List<BasicDBObject> attributesDocs = new ArrayList<BasicDBObject>();
|
||||
for (Map.Entry<String, Object> attribute : this.getAttributes().entrySet() )
|
||||
{
|
||||
String key = attribute.getKey();
|
||||
Object value = attribute.getValue();
|
||||
BasicDBObject attributesDoc = new BasicDBObject();
|
||||
attributesDoc.put("key", key);
|
||||
attributesDoc.put("value", value);
|
||||
attributesDocs.add(attributesDoc);
|
||||
}
|
||||
vcDoc.put("attributes", attributesDocs);
|
||||
|
||||
BasicDBObject genotypesDoc = new BasicDBObject();
|
||||
Integer genotypeAlleleIndex = 0;
|
||||
BasicDBObject genotypeAllelesDoc = new BasicDBObject();
|
||||
for (Allele allele : genotype.getAlleles())
|
||||
{
|
||||
String index = genotypeAlleleIndex.toString();
|
||||
genotypeAllelesDoc.put(index, allele.toString());
|
||||
genotypeAlleleIndex++;
|
||||
}
|
||||
genotypesDoc.put("alleles", genotypeAllelesDoc);
|
||||
|
||||
List<BasicDBObject> genotypesAttributesDocs = new ArrayList<BasicDBObject>();
|
||||
for (Map.Entry<String, Object> attribute : genotype.getAttributes().entrySet() )
|
||||
{
|
||||
String key = attribute.getKey();
|
||||
Object value = attribute.getValue();
|
||||
BasicDBObject genotypesAttributesDoc = new BasicDBObject();
|
||||
genotypesAttributesDoc.put("key", key);
|
||||
genotypesAttributesDoc.put("value", value);
|
||||
genotypesAttributesDocs.add(genotypesAttributesDoc);
|
||||
}
|
||||
genotypesDoc.put("attributes", genotypesAttributesDocs);
|
||||
genotypesDoc.put("error", genotype.getLog10PError());
|
||||
|
||||
vcDoc.put("genotype", genotypesDoc);
|
||||
|
||||
Integer filterIndex = 0;
|
||||
BasicDBObject filtersDoc = new BasicDBObject();
|
||||
for (String filter : this.getFilters())
|
||||
{
|
||||
String index = filterIndex.toString();
|
||||
filtersDoc.put(index, filter.toString());
|
||||
filterIndex++;
|
||||
}
|
||||
if (filterIndex > 0) {
|
||||
vcDoc.put("filters", filtersDoc);
|
||||
}
|
||||
|
||||
vcDocs.add(vcDoc);
|
||||
}
|
||||
|
||||
return vcDocs;
|
||||
}
|
||||
|
||||
// protected basic manipulation routines
|
||||
private static List<Allele> makeAlleles(Collection<Allele> alleles) {
|
||||
final List<Allele> alleleList = new ArrayList<Allele>(alleles.size());
|
||||
|
|
|
|||
Loading…
Reference in New Issue