Move all MongoDB files into private/java/src/org/broadinstitute/sting/mongodb

This commit is contained in:
Joel Thibault 2012-05-01 18:20:18 -04:00
parent bdf6d1f109
commit 4d732fa586
4 changed files with 2 additions and 473 deletions

View File

@ -1,121 +0,0 @@
package org.broadinstitute.sting.gatk.walkers;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 3/30/12
* Time: 4:47 PM
* To change this template use File | Settings | File Templates.
*/
import com.mongodb.BasicDBObject;
import com.mongodb.DBCollection;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.db.MongoDB;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File;
import java.io.PrintStream;
import java.util.Collection;
import java.util.List;
/**
* Inserts all of the RODs in the input data set. Data is inserted using VariantContext.toMongoDB().
*/
public class InsertRODsWalker extends RodWalker<Integer, Integer> {
@Input(fullName="input", shortName = "input", doc="The input ROD which should be inserted into the DB.", required=true)
public RodBinding<Feature> input;
@Output
PrintStream out;
private String RODFileName;
@Override
public void initialize() {
DBCollection mongoAttributes = MongoDB.getAttributesCollection();
DBCollection mongoSamples = MongoDB.getSamplesCollection();
RODFileName = input.getSource();
int lastSep = RODFileName.lastIndexOf(File.separator);
RODFileName = RODFileName.substring(lastSep + 1);
// set up indices
mongoAttributes.ensureIndex("location");
mongoAttributes.ensureIndex("sourceROD");
mongoAttributes.ensureIndex("contig");
mongoAttributes.ensureIndex("start");
mongoAttributes.ensureIndex("stop");
mongoSamples.ensureIndex("location");
mongoSamples.ensureIndex("sample");
mongoSamples.ensureIndex("sourceROD");
mongoSamples.ensureIndex("contig");
mongoSamples.ensureIndex("start");
mongoSamples.ensureIndex("stop");
// set up primary keys
mongoAttributes.ensureIndex(new BasicDBObject("location", 1).append("sourceROD", 1).append("alleles", 1), new BasicDBObject("unique", 1));
mongoSamples.ensureIndex(new BasicDBObject("location", 1).append("sourceROD", 1).append("alleles", 1).append("sample", 1), new BasicDBObject("unique", 1));
}
/**
* Initialize the number of loci processed to zero.
*
* @return 0
*/
public Integer reduceInit() { return 0; }
/**
*
* @param tracker the meta-data tracker
* @param ref the reference base
* @param context the context for the given locus
* @return 1 if the locus was successfully processed, 0 if otherwise
*/
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( tracker == null )
return 0;
DBCollection mongoAttributes = MongoDB.getAttributesCollection();
DBCollection mongoSamples = MongoDB.getSamplesCollection();
for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
if ( feature instanceof VariantContext ) {
VariantContext vc = (VariantContext) feature;
Pair<BasicDBObject,List<BasicDBObject>> mongoCollections = vc.toMongoDB(RODFileName);
mongoAttributes.insert(mongoCollections.first);
for (BasicDBObject sampleForMongo : mongoCollections.second) {
mongoSamples.insert(sampleForMongo);
}
}
}
return 1;
}
/**
* Increment the number of rods processed.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
* @return the new number of rods processed.
*/
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
public void onTraversalDone(Integer result) {
MongoDB.close();
}
}

View File

@ -24,10 +24,6 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
@ -40,12 +36,9 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MendelianViolation;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.db.MongoDB;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -191,8 +184,7 @@ import java.util.*;
*
*/
public class SelectVariants extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
@ArgumentCollection
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
/**
* A site is considered discordant if there exists some sample in the variant track that has a non-reference genotype
@ -365,8 +357,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
private Set<String> IDsToKeep = null;
private final static boolean mongoOn = false;
/**
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
*/
@ -488,7 +478,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
if ( tracker == null )
return 0;
Collection<VariantContext> vcs = mongoOn ? getMongoVariants(ref, context.getLocation()) : tracker.getValues(variantCollection.variants, context.getLocation());
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
if ( vcs == null || vcs.size() == 0) {
return 0;
@ -568,193 +558,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
return 1;
}
private Collection<VariantContext> getMongoVariants(ReferenceContext ref, GenomeLoc location) {
String contig = location.getContig();
long start = location.getStart();
ArrayList<VariantContext> vcs = new ArrayList<VariantContext>();
BasicDBObject query = new BasicDBObject();
query.put("contig", contig);
query.put("start", start);
// can't know stop location for deletions from reference
DBCursor attributesCursor = MongoDB.getAttributesCollection().find(query);
DBCursor samplesCursor = MongoDB.getSamplesCollection().find(query);
Map<Pair<String,List<Allele>>,VariantContextBuilder> attributesFromDB = new HashMap<Pair<String,List<Allele>>,VariantContextBuilder>();
while(attributesCursor.hasNext()) {
DBObject oneResult = attributesCursor.next();
String sourceROD = (String)oneResult.get("sourceROD");
ArrayList<Allele> alleles = new ArrayList<Allele>();
BasicDBObject allelesInDb = (BasicDBObject)oneResult.get("alleles");
for (Object alleleInDb : allelesInDb.values()) {
String rawAllele = (String)alleleInDb;
boolean isRef = rawAllele.contains("*");
String allele = rawAllele.replace("*", "");
alleles.add(Allele.create(allele, isRef));
}
// primary key to uniquely identify variant
Pair<String, List<Allele>> sourceRodAllelePair = new Pair<String, List<Allele>>(sourceROD, alleles);
Map<String, Object> attributes = new TreeMap<String, Object>();
BasicDBList attrsInDb = (BasicDBList)oneResult.get("attributes");
for (Object attrInDb : attrsInDb) {
BasicDBObject attrKVP = (BasicDBObject)attrInDb;
String key = (String)attrKVP.get("key");
Object value = attrKVP.get("value");
attributes.put(key, value);
}
Set<String> filters = new HashSet<String>();
BasicDBObject filtersInDb = (BasicDBObject)oneResult.get("filters");
if (filtersInDb != null) {
for (Object filterInDb : filtersInDb.values()) {
filters.add((String)filterInDb);
}
}
String source = (String)oneResult.get("source");
String id = (String)oneResult.get("id");
Double error = (Double)oneResult.get("error");
Long stop = (Long)oneResult.get("stop");
VariantContextBuilder builder = new VariantContextBuilder(source, contig, start, stop, sourceRodAllelePair.getSecond());
builder.id(id);
builder.log10PError(error);
builder.attributes(attributes);
builder.filters(filters);
long index = start - ref.getWindow().getStart() - 1;
if ( index >= 0 ) {
// we were given enough reference context to create the VariantContext
builder.referenceBaseForIndel(ref.getBases()[(int)index]); // TODO: needed?
}
builder.referenceBaseForIndel(ref.getBases()[0]); // TODO: correct?
attributesFromDB.put(sourceRodAllelePair, builder);
}
while(samplesCursor.hasNext()) {
DBObject oneResult = samplesCursor.next();
String sourceROD = (String)oneResult.get("sourceROD");
ArrayList<Allele> alleles = new ArrayList<Allele>();
BasicDBObject allelesInDb = (BasicDBObject)oneResult.get("alleles");
for (Object alleleInDb : allelesInDb.values()) {
String rawAllele = (String)alleleInDb;
boolean isRef = rawAllele.contains("*");
String allele = rawAllele.replace("*", "");
alleles.add(Allele.create(allele, isRef));
}
// primary key to uniquely identify variant
Pair<String, List<Allele>> sourceRodAllelePair = new Pair<String, List<Allele>>(sourceROD, alleles);
VariantContextBuilder builder = attributesFromDB.get(sourceRodAllelePair);
String sample = (String)oneResult.get("sample");
BasicDBObject genotypeInDb = (BasicDBObject)oneResult.get("genotype");
Double genotypeError = (Double)genotypeInDb.get("error");
ArrayList<Allele> genotypeAlleles = new ArrayList<Allele>();
BasicDBObject genotypeAllelesInDb = (BasicDBObject)genotypeInDb.get("alleles");
for (Object alleleInDb : genotypeAllelesInDb.values()) {
String rawAllele = (String)alleleInDb;
boolean isRef = rawAllele.contains("*");
String allele = rawAllele.replace("*", "");
genotypeAlleles.add(Allele.create(allele, isRef));
}
Map<String, Object> genotypeAttributes = new TreeMap<String, Object>();
BasicDBList genotypeAttrsInDb = (BasicDBList)genotypeInDb.get("attributes");
for (Object attrInDb : genotypeAttrsInDb) {
BasicDBObject attrKVP = (BasicDBObject)attrInDb;
String key = (String)attrKVP.get("key");
Object value = attrKVP.get("value");
genotypeAttributes.put(key, value);
}
Genotype genotype = new Genotype(sample, genotypeAlleles, genotypeError);
builder.genotypes(Genotype.modifyAttributes(genotype, genotypeAttributes));
vcs.add(builder.make());
}
return combineMongoVariants(vcs);
}
// Copied from CombineVariants
private Collection<VariantContext> combineMongoVariants(Collection<VariantContext> vcs) {
if (vcs.size() < 2)
return vcs;
List<VariantContext> mergedVCs = new ArrayList<VariantContext>();
//defaults from CombineVariants
VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE;
List<String> priority = new ArrayList<String>();
priority.add("input");
VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
boolean printComplexMerges = false;
String SET_KEY = "set";
boolean filteredAreUncalled = false;
boolean MERGE_INFO_WITH_MAX_AC = false;
if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) {
Map<VariantContext.Type, List<VariantContext>> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs);
// TODO -- clean this up in a refactoring
// merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type)
if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) {
final List<VariantContext> refs = VCsByType.remove(VariantContext.Type.NO_VARIATION);
for ( VariantContext.Type type : VariantContext.Type.values() ) {
if ( VCsByType.containsKey(type) ) {
VCsByType.get(type).addAll(refs);
break;
}
}
}
// iterate over the types so that it's deterministic
for (VariantContext.Type type : VariantContext.Type.values()) {
if (VCsByType.containsKey(type))
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
}
else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs,
priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
}
else {
logger.warn("Ignoring all records at site");
}
List<VariantContext> recomputedVCs = new ArrayList<VariantContext>();
for ( VariantContext mergedVC : mergedVCs ) {
// only operate at the start of events
if ( mergedVC == null )
continue;
final VariantContextBuilder builder = new VariantContextBuilder(mergedVC);
// re-compute chromosome counts
VariantContextUtils.calculateChromosomeCounts(builder, false);
recomputedVCs.add(builder.make());
}
return recomputedVCs;
}
private boolean hasPLs(final VariantContext vc) {
for ( Genotype g : vc.getGenotypes() ) {
if ( g.hasLikelihoods() )
@ -857,10 +660,6 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
}
public void onTraversalDone(Integer result) {
if (mongoOn) {
MongoDB.close();
}
logger.info(result + " records processed.");
if (SELECT_RANDOM_NUMBER) {

View File

@ -1,52 +0,0 @@
package org.broadinstitute.sting.utils.db;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.Mongo;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.net.UnknownHostException;
/**
* Created with IntelliJ IDEA.
* User: thibault
* Date: 4/26/12
* Time: 3:01 PM
* Handles Mongo DB connections
*/
final public class MongoDB {
private final static String MONGO_HOST = "couchdb.broadinstitute.org";
private final static Integer MONGO_PORT = 43054;
private final static String MONGO_DB_NAME = "bjorn";
private final static String MONGO_ATTRIBUTES_COLLECTION = "attributes";
private final static String MONGO_SAMPLES_COLLECTION = "samples";
protected Mongo mongo;
protected DBCollection mongoAttributes;
protected DBCollection mongoSamples;
final private static MongoDB INSTANCE = new MongoDB();
public static DBCollection getAttributesCollection() {
return INSTANCE.mongoAttributes;
}
public static DBCollection getSamplesCollection() {
return INSTANCE.mongoSamples;
}
public static void close() {
INSTANCE.mongo.close();
}
private MongoDB() {
try {
mongo = new Mongo(MONGO_HOST, MONGO_PORT);
DB mongoDb = mongo.getDB(MONGO_DB_NAME);
mongoAttributes = mongoDb.getCollection(MONGO_ATTRIBUTES_COLLECTION);
mongoSamples = mongoDb.getCollection(MONGO_SAMPLES_COLLECTION);
} catch (UnknownHostException e) {
throw new StingException(e.getMessage(), e);
}
}
}

View File

@ -1,11 +1,9 @@
package org.broadinstitute.sting.utils.variantcontext;
import com.mongodb.BasicDBObject;
import org.broad.tribble.Feature;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.*;
@ -1220,101 +1218,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
this.getGenotypes());
}
/**
* Generate a Mongo DB attributes collection element and a set of samples collection elements
* @param sourceROD
* @return
*/
public Pair<BasicDBObject,List<BasicDBObject>> toMongoDB(String sourceROD) {
// fields common to both attributes and samples collections
BasicDBObject siteDoc = new BasicDBObject();
siteDoc.put("location", contig + ":" + (start - stop == 0 ? start : start + "-" + stop));
siteDoc.put("contig", contig);
siteDoc.put("start", start);
siteDoc.put("stop", stop);
siteDoc.put("id", this.getID());
siteDoc.put("error", this.getLog10PError());
siteDoc.put("source", this.getSource());
siteDoc.put("sourceROD", sourceROD);
siteDoc.put("type", this.getType().toString());
Integer alleleIndex = 0;
BasicDBObject allelesDoc = new BasicDBObject();
for (Allele allele : this.getAlleles())
{
String index = alleleIndex.toString();
allelesDoc.put(index, allele.toString());
alleleIndex++;
}
siteDoc.put("alleles", allelesDoc);
Integer filterIndex = 0;
BasicDBObject filtersDoc = new BasicDBObject();
for (String filter : this.getFilters())
{
String index = filterIndex.toString();
filtersDoc.put(index, filter.toString());
filterIndex++;
}
if (filterIndex > 0) {
siteDoc.put("filters", filtersDoc);
}
// attributes collection
BasicDBObject attributesDoc = new BasicDBObject(siteDoc);
List<BasicDBObject> attributeKVPs = new ArrayList<BasicDBObject>();
for (Map.Entry<String, Object> attribute : this.getAttributes().entrySet() )
{
String key = attribute.getKey();
Object value = attribute.getValue();
BasicDBObject attributeKVP = new BasicDBObject();
attributeKVP.put("key", key);
attributeKVP.put("value", value);
attributeKVPs.add(attributeKVP);
}
attributesDoc.put("attributes", attributeKVPs);
// samples collection
List<BasicDBObject> samplesDocs = new ArrayList<BasicDBObject>();
for (Genotype genotype : this.getGenotypes()) {
BasicDBObject sampleDoc = new BasicDBObject(siteDoc);
sampleDoc.put("sample", genotype.getSampleName());
BasicDBObject genotypesDoc = new BasicDBObject();
Integer genotypeAlleleIndex = 0;
BasicDBObject genotypeAllelesDoc = new BasicDBObject();
for (Allele allele : genotype.getAlleles())
{
String index = genotypeAlleleIndex.toString();
genotypeAllelesDoc.put(index, allele.toString());
genotypeAlleleIndex++;
}
genotypesDoc.put("alleles", genotypeAllelesDoc);
List<BasicDBObject> genotypesAttributesDocs = new ArrayList<BasicDBObject>();
for (Map.Entry<String, Object> attribute : genotype.getAttributes().entrySet() )
{
String key = attribute.getKey();
Object value = attribute.getValue();
BasicDBObject genotypesAttributesDoc = new BasicDBObject();
genotypesAttributesDoc.put("key", key);
genotypesAttributesDoc.put("value", value);
genotypesAttributesDocs.add(genotypesAttributesDoc);
}
genotypesDoc.put("attributes", genotypesAttributesDocs);
genotypesDoc.put("error", genotype.getLog10PError());
sampleDoc.put("genotype", genotypesDoc);
samplesDocs.add(sampleDoc);
}
return new Pair<BasicDBObject,List<BasicDBObject>>(attributesDoc, samplesDocs);
}
// protected basic manipulation routines
private static List<Allele> makeAlleles(Collection<Allele> alleles) {
final List<Allele> alleleList = new ArrayList<Allele>(alleles.size());