diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/InsertRODsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/InsertRODsWalker.java deleted file mode 100644 index b24feeed4..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/InsertRODsWalker.java +++ /dev/null @@ -1,121 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers; - -/** - * Created with IntelliJ IDEA. - * User: thibault - * Date: 3/30/12 - * Time: 4:47 PM - * To change this template use File | Settings | File Templates. - */ - -import com.mongodb.BasicDBObject; -import com.mongodb.DBCollection; -import org.broad.tribble.Feature; -import org.broadinstitute.sting.commandline.Input; -import org.broadinstitute.sting.commandline.Output; -import org.broadinstitute.sting.commandline.RodBinding; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.db.MongoDB; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.io.File; -import java.io.PrintStream; -import java.util.Collection; -import java.util.List; - -/** - * Inserts all of the RODs in the input data set. Data is inserted using VariantContext.toMongoDB(). - */ -public class InsertRODsWalker extends RodWalker { - @Input(fullName="input", shortName = "input", doc="The input ROD which should be inserted into the DB.", required=true) - public RodBinding input; - - @Output - PrintStream out; - - - private String RODFileName; - - @Override - public void initialize() { - DBCollection mongoAttributes = MongoDB.getAttributesCollection(); - DBCollection mongoSamples = MongoDB.getSamplesCollection(); - - RODFileName = input.getSource(); - int lastSep = RODFileName.lastIndexOf(File.separator); - RODFileName = RODFileName.substring(lastSep + 1); - - // set up indices - - mongoAttributes.ensureIndex("location"); - mongoAttributes.ensureIndex("sourceROD"); - mongoAttributes.ensureIndex("contig"); - mongoAttributes.ensureIndex("start"); - mongoAttributes.ensureIndex("stop"); - - mongoSamples.ensureIndex("location"); - mongoSamples.ensureIndex("sample"); - mongoSamples.ensureIndex("sourceROD"); - mongoSamples.ensureIndex("contig"); - mongoSamples.ensureIndex("start"); - mongoSamples.ensureIndex("stop"); - - // set up primary keys - mongoAttributes.ensureIndex(new BasicDBObject("location", 1).append("sourceROD", 1).append("alleles", 1), new BasicDBObject("unique", 1)); - mongoSamples.ensureIndex(new BasicDBObject("location", 1).append("sourceROD", 1).append("alleles", 1).append("sample", 1), new BasicDBObject("unique", 1)); - } - - /** - * Initialize the number of loci processed to zero. - * - * @return 0 - */ - public Integer reduceInit() { return 0; } - - /** - * - * @param tracker the meta-data tracker - * @param ref the reference base - * @param context the context for the given locus - * @return 1 if the locus was successfully processed, 0 if otherwise - */ - public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( tracker == null ) - return 0; - - DBCollection mongoAttributes = MongoDB.getAttributesCollection(); - DBCollection mongoSamples = MongoDB.getSamplesCollection(); - - for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) { - if ( feature instanceof VariantContext ) { - VariantContext vc = (VariantContext) feature; - - Pair> mongoCollections = vc.toMongoDB(RODFileName); - mongoAttributes.insert(mongoCollections.first); - for (BasicDBObject sampleForMongo : mongoCollections.second) { - mongoSamples.insert(sampleForMongo); - } - } - } - - return 1; - } - - /** - * Increment the number of rods processed. - * - * @param value result of the map. - * @param sum accumulator for the reduce. - * @return the new number of rods processed. - */ - public Integer reduce(Integer value, Integer sum) { - return sum + value; - } - - public void onTraversalDone(Integer result) { - MongoDB.close(); - } -} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 546c7b5bc..184dfc78b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -24,10 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; -import com.mongodb.BasicDBList; -import com.mongodb.BasicDBObject; -import com.mongodb.DBCursor; -import com.mongodb.DBObject; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; @@ -40,12 +36,9 @@ import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; -import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.db.MongoDB; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; @@ -191,8 +184,7 @@ import java.util.*; * */ public class SelectVariants extends RodWalker implements TreeReducible { - @ArgumentCollection - protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** * A site is considered discordant if there exists some sample in the variant track that has a non-reference genotype @@ -365,8 +357,6 @@ public class SelectVariants extends RodWalker implements TreeR private Set IDsToKeep = null; - private final static boolean mongoOn = false; - /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher */ @@ -488,7 +478,7 @@ public class SelectVariants extends RodWalker implements TreeR if ( tracker == null ) return 0; - Collection vcs = mongoOn ? getMongoVariants(ref, context.getLocation()) : tracker.getValues(variantCollection.variants, context.getLocation()); + Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); if ( vcs == null || vcs.size() == 0) { return 0; @@ -568,193 +558,6 @@ public class SelectVariants extends RodWalker implements TreeR return 1; } - private Collection getMongoVariants(ReferenceContext ref, GenomeLoc location) { - String contig = location.getContig(); - long start = location.getStart(); - - ArrayList vcs = new ArrayList(); - - BasicDBObject query = new BasicDBObject(); - query.put("contig", contig); - query.put("start", start); - // can't know stop location for deletions from reference - - DBCursor attributesCursor = MongoDB.getAttributesCollection().find(query); - DBCursor samplesCursor = MongoDB.getSamplesCollection().find(query); - - Map>,VariantContextBuilder> attributesFromDB = new HashMap>,VariantContextBuilder>(); - - while(attributesCursor.hasNext()) { - DBObject oneResult = attributesCursor.next(); - - String sourceROD = (String)oneResult.get("sourceROD"); - - ArrayList alleles = new ArrayList(); - BasicDBObject allelesInDb = (BasicDBObject)oneResult.get("alleles"); - for (Object alleleInDb : allelesInDb.values()) { - String rawAllele = (String)alleleInDb; - boolean isRef = rawAllele.contains("*"); - String allele = rawAllele.replace("*", ""); - alleles.add(Allele.create(allele, isRef)); - } - - // primary key to uniquely identify variant - Pair> sourceRodAllelePair = new Pair>(sourceROD, alleles); - - Map attributes = new TreeMap(); - BasicDBList attrsInDb = (BasicDBList)oneResult.get("attributes"); - for (Object attrInDb : attrsInDb) { - BasicDBObject attrKVP = (BasicDBObject)attrInDb; - String key = (String)attrKVP.get("key"); - Object value = attrKVP.get("value"); - attributes.put(key, value); - } - - Set filters = new HashSet(); - BasicDBObject filtersInDb = (BasicDBObject)oneResult.get("filters"); - if (filtersInDb != null) { - for (Object filterInDb : filtersInDb.values()) { - filters.add((String)filterInDb); - } - } - - String source = (String)oneResult.get("source"); - String id = (String)oneResult.get("id"); - Double error = (Double)oneResult.get("error"); - Long stop = (Long)oneResult.get("stop"); - - VariantContextBuilder builder = new VariantContextBuilder(source, contig, start, stop, sourceRodAllelePair.getSecond()); - - builder.id(id); - builder.log10PError(error); - builder.attributes(attributes); - builder.filters(filters); - - long index = start - ref.getWindow().getStart() - 1; - if ( index >= 0 ) { - // we were given enough reference context to create the VariantContext - builder.referenceBaseForIndel(ref.getBases()[(int)index]); // TODO: needed? - } - - builder.referenceBaseForIndel(ref.getBases()[0]); // TODO: correct? - - attributesFromDB.put(sourceRodAllelePair, builder); - } - - while(samplesCursor.hasNext()) { - DBObject oneResult = samplesCursor.next(); - - String sourceROD = (String)oneResult.get("sourceROD"); - - ArrayList alleles = new ArrayList(); - BasicDBObject allelesInDb = (BasicDBObject)oneResult.get("alleles"); - for (Object alleleInDb : allelesInDb.values()) { - String rawAllele = (String)alleleInDb; - boolean isRef = rawAllele.contains("*"); - String allele = rawAllele.replace("*", ""); - alleles.add(Allele.create(allele, isRef)); - } - - // primary key to uniquely identify variant - Pair> sourceRodAllelePair = new Pair>(sourceROD, alleles); - VariantContextBuilder builder = attributesFromDB.get(sourceRodAllelePair); - - String sample = (String)oneResult.get("sample"); - - BasicDBObject genotypeInDb = (BasicDBObject)oneResult.get("genotype"); - Double genotypeError = (Double)genotypeInDb.get("error"); - - ArrayList genotypeAlleles = new ArrayList(); - BasicDBObject genotypeAllelesInDb = (BasicDBObject)genotypeInDb.get("alleles"); - for (Object alleleInDb : genotypeAllelesInDb.values()) { - String rawAllele = (String)alleleInDb; - boolean isRef = rawAllele.contains("*"); - String allele = rawAllele.replace("*", ""); - genotypeAlleles.add(Allele.create(allele, isRef)); - } - - Map genotypeAttributes = new TreeMap(); - BasicDBList genotypeAttrsInDb = (BasicDBList)genotypeInDb.get("attributes"); - for (Object attrInDb : genotypeAttrsInDb) { - BasicDBObject attrKVP = (BasicDBObject)attrInDb; - String key = (String)attrKVP.get("key"); - Object value = attrKVP.get("value"); - genotypeAttributes.put(key, value); - } - - Genotype genotype = new Genotype(sample, genotypeAlleles, genotypeError); - builder.genotypes(Genotype.modifyAttributes(genotype, genotypeAttributes)); - vcs.add(builder.make()); - } - - return combineMongoVariants(vcs); - } - - // Copied from CombineVariants - private Collection combineMongoVariants(Collection vcs) { - if (vcs.size() < 2) - return vcs; - - List mergedVCs = new ArrayList(); - - //defaults from CombineVariants - VariantContextUtils.MultipleAllelesMergeType multipleAllelesMergeType = VariantContextUtils.MultipleAllelesMergeType.BY_TYPE; - List priority = new ArrayList(); - priority.add("input"); - VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; - VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE; - boolean printComplexMerges = false; - String SET_KEY = "set"; - boolean filteredAreUncalled = false; - boolean MERGE_INFO_WITH_MAX_AC = false; - - if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.BY_TYPE) { - Map> VCsByType = VariantContextUtils.separateVariantContextsByType(vcs); - - // TODO -- clean this up in a refactoring - // merge NO_VARIATION into another type of variant (based on the ordering in VariantContext.Type) - if ( VCsByType.containsKey(VariantContext.Type.NO_VARIATION) && VCsByType.size() > 1 ) { - final List refs = VCsByType.remove(VariantContext.Type.NO_VARIATION); - for ( VariantContext.Type type : VariantContext.Type.values() ) { - if ( VCsByType.containsKey(type) ) { - VCsByType.get(type).addAll(refs); - break; - } - } - } - - // iterate over the types so that it's deterministic - for (VariantContext.Type type : VariantContext.Type.values()) { - if (VCsByType.containsKey(type)) - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type), - priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, - SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); - } - } - else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) { - mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs, - priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges, - SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC)); - } - else { - logger.warn("Ignoring all records at site"); - } - - List recomputedVCs = new ArrayList(); - for ( VariantContext mergedVC : mergedVCs ) { - // only operate at the start of events - if ( mergedVC == null ) - continue; - - final VariantContextBuilder builder = new VariantContextBuilder(mergedVC); - // re-compute chromosome counts - VariantContextUtils.calculateChromosomeCounts(builder, false); - recomputedVCs.add(builder.make()); - } - - return recomputedVCs; - } - private boolean hasPLs(final VariantContext vc) { for ( Genotype g : vc.getGenotypes() ) { if ( g.hasLikelihoods() ) @@ -857,10 +660,6 @@ public class SelectVariants extends RodWalker implements TreeR } public void onTraversalDone(Integer result) { - if (mongoOn) { - MongoDB.close(); - } - logger.info(result + " records processed."); if (SELECT_RANDOM_NUMBER) { diff --git a/public/java/src/org/broadinstitute/sting/utils/db/MongoDB.java b/public/java/src/org/broadinstitute/sting/utils/db/MongoDB.java deleted file mode 100644 index ec5f0823f..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/db/MongoDB.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.broadinstitute.sting.utils.db; - -import com.mongodb.DB; -import com.mongodb.DBCollection; -import com.mongodb.Mongo; -import org.broadinstitute.sting.utils.exceptions.StingException; - -import java.net.UnknownHostException; - -/** - * Created with IntelliJ IDEA. - * User: thibault - * Date: 4/26/12 - * Time: 3:01 PM - * Handles Mongo DB connections - */ -final public class MongoDB { - private final static String MONGO_HOST = "couchdb.broadinstitute.org"; - private final static Integer MONGO_PORT = 43054; - private final static String MONGO_DB_NAME = "bjorn"; - private final static String MONGO_ATTRIBUTES_COLLECTION = "attributes"; - private final static String MONGO_SAMPLES_COLLECTION = "samples"; - - protected Mongo mongo; - protected DBCollection mongoAttributes; - protected DBCollection mongoSamples; - - final private static MongoDB INSTANCE = new MongoDB(); - - public static DBCollection getAttributesCollection() { - return INSTANCE.mongoAttributes; - } - - public static DBCollection getSamplesCollection() { - return INSTANCE.mongoSamples; - } - - public static void close() { - INSTANCE.mongo.close(); - } - - private MongoDB() { - try { - mongo = new Mongo(MONGO_HOST, MONGO_PORT); - DB mongoDb = mongo.getDB(MONGO_DB_NAME); - mongoAttributes = mongoDb.getCollection(MONGO_ATTRIBUTES_COLLECTION); - mongoSamples = mongoDb.getCollection(MONGO_SAMPLES_COLLECTION); - } catch (UnknownHostException e) { - throw new StingException(e.getMessage(), e); - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index f3e9e840d..3faad46e2 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1,11 +1,9 @@ package org.broadinstitute.sting.utils.variantcontext; -import com.mongodb.BasicDBObject; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.*; @@ -1220,101 +1218,6 @@ public class VariantContext implements Feature { // to enable tribble integratio this.getGenotypes()); } - /** - * Generate a Mongo DB attributes collection element and a set of samples collection elements - * @param sourceROD - * @return - */ - public Pair> toMongoDB(String sourceROD) { - // fields common to both attributes and samples collections - BasicDBObject siteDoc = new BasicDBObject(); - - siteDoc.put("location", contig + ":" + (start - stop == 0 ? start : start + "-" + stop)); - siteDoc.put("contig", contig); - siteDoc.put("start", start); - siteDoc.put("stop", stop); - siteDoc.put("id", this.getID()); - siteDoc.put("error", this.getLog10PError()); - siteDoc.put("source", this.getSource()); - siteDoc.put("sourceROD", sourceROD); - siteDoc.put("type", this.getType().toString()); - - Integer alleleIndex = 0; - BasicDBObject allelesDoc = new BasicDBObject(); - for (Allele allele : this.getAlleles()) - { - String index = alleleIndex.toString(); - allelesDoc.put(index, allele.toString()); - alleleIndex++; - } - siteDoc.put("alleles", allelesDoc); - - Integer filterIndex = 0; - BasicDBObject filtersDoc = new BasicDBObject(); - for (String filter : this.getFilters()) - { - String index = filterIndex.toString(); - filtersDoc.put(index, filter.toString()); - filterIndex++; - } - if (filterIndex > 0) { - siteDoc.put("filters", filtersDoc); - } - - // attributes collection - - BasicDBObject attributesDoc = new BasicDBObject(siteDoc); - List attributeKVPs = new ArrayList(); - for (Map.Entry attribute : this.getAttributes().entrySet() ) - { - String key = attribute.getKey(); - Object value = attribute.getValue(); - BasicDBObject attributeKVP = new BasicDBObject(); - attributeKVP.put("key", key); - attributeKVP.put("value", value); - attributeKVPs.add(attributeKVP); - } - attributesDoc.put("attributes", attributeKVPs); - - // samples collection - - List samplesDocs = new ArrayList(); - for (Genotype genotype : this.getGenotypes()) { - BasicDBObject sampleDoc = new BasicDBObject(siteDoc); - sampleDoc.put("sample", genotype.getSampleName()); - - BasicDBObject genotypesDoc = new BasicDBObject(); - Integer genotypeAlleleIndex = 0; - BasicDBObject genotypeAllelesDoc = new BasicDBObject(); - for (Allele allele : genotype.getAlleles()) - { - String index = genotypeAlleleIndex.toString(); - genotypeAllelesDoc.put(index, allele.toString()); - genotypeAlleleIndex++; - } - genotypesDoc.put("alleles", genotypeAllelesDoc); - - List genotypesAttributesDocs = new ArrayList(); - for (Map.Entry attribute : genotype.getAttributes().entrySet() ) - { - String key = attribute.getKey(); - Object value = attribute.getValue(); - BasicDBObject genotypesAttributesDoc = new BasicDBObject(); - genotypesAttributesDoc.put("key", key); - genotypesAttributesDoc.put("value", value); - genotypesAttributesDocs.add(genotypesAttributesDoc); - } - genotypesDoc.put("attributes", genotypesAttributesDocs); - genotypesDoc.put("error", genotype.getLog10PError()); - - sampleDoc.put("genotype", genotypesDoc); - - samplesDocs.add(sampleDoc); - } - - return new Pair>(attributesDoc, samplesDocs); - } - // protected basic manipulation routines private static List makeAlleles(Collection alleles) { final List alleleList = new ArrayList(alleles.size());