diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 23ec0fa70..7a1381b62 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.codecs.vcf.*; +import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.*; @@ -565,15 +566,23 @@ public class SelectVariants extends RodWalker implements TreeR // can't know stop location for deletions from reference DBCursor cursor = mongoCollection.find(query); - Map results = new HashMap(); - Map> genotypes = new HashMap>(); + Map>,DBObject> results = new HashMap>,DBObject>(); + Map>,List> genotypes = new HashMap>,List>(); + while(cursor.hasNext()) { DBObject oneResult = cursor.next(); - String type = (String)oneResult.get("type"); - results.put(type, oneResult); - String sample = (String)oneResult.get("sample"); + String sourceROD = (String)oneResult.get("sourceROD"); + + ArrayList alleles = new ArrayList(); + BasicDBObject allelesInDb = (BasicDBObject)oneResult.get("alleles"); + for (Object alleleInDb : allelesInDb.values()) { + String rawAllele = (String)alleleInDb; + boolean isRef = rawAllele.contains("*"); + String allele = rawAllele.replace("*", ""); + alleles.add(Allele.create(allele, isRef)); + } BasicDBObject genotypeInDb = (BasicDBObject)oneResult.get("genotype"); Double genotypeError = (Double)genotypeInDb.get("error"); @@ -598,24 +607,20 @@ public class SelectVariants extends RodWalker implements TreeR Genotype genotype = new Genotype(sample, genotypeAlleles, genotypeError); - if (!genotypes.containsKey(type)) - genotypes.put(type, new ArrayList()); + // primary key to uniquely identify variant + Pair> sourceRodAllelePair = new Pair>(sourceROD, alleles); - Collection genotypesByType = genotypes.get(type); - genotypesByType.add(Genotype.modifyAttributes(genotype, genotypeAttributes)); + if (!genotypes.containsKey(sourceRodAllelePair)) + genotypes.put(sourceRodAllelePair, new ArrayList()); + + Collection genotypesBySourceROD = genotypes.get(sourceRodAllelePair); + genotypesBySourceROD.add(Genotype.modifyAttributes(genotype, genotypeAttributes)); + + results.put(sourceRodAllelePair, oneResult); } - for (String type : results.keySet()) { - DBObject result = results.get(type); - - ArrayList alleles = new ArrayList(); - BasicDBObject allelesInDb = (BasicDBObject)result.get("alleles"); - for (Object alleleInDb : allelesInDb.values()) { - String rawAllele = (String)alleleInDb; - boolean isRef = rawAllele.contains("*"); - String allele = rawAllele.replace("*", ""); - alleles.add(Allele.create(allele, isRef)); - } + for (Pair> sourceRodAllelePair : results.keySet()) { + DBObject result = results.get(sourceRodAllelePair); Map attributes = new TreeMap(); BasicDBList attrsInDb = (BasicDBList)result.get("attributes"); @@ -638,11 +643,11 @@ public class SelectVariants extends RodWalker implements TreeR String id = (String)result.get("id"); Double error = (Double)result.get("error"); - VariantContextBuilder builder = new VariantContextBuilder(source, contig, start, stop, alleles); + VariantContextBuilder builder = new VariantContextBuilder(source, contig, start, stop, sourceRodAllelePair.getSecond()); builder.id(id); builder.log10PError(error); - builder.genotypes(genotypes.get(type)); + builder.genotypes(genotypes.get(sourceRodAllelePair)); builder.attributes(attributes); builder.filters(filters);