From 09c3b15af3dea830815e955edd5f26a25d78548a Mon Sep 17 00:00:00 2001
From: weisburd <weisburd@348d0f76-0448-11de-a6fe-93d51630548a>
Date: Wed, 2 Jun 2010 16:28:06 +0000
Subject: [PATCH] Implemented joins

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3476 348d0f76-0448-11de-a6fe-93d51630548a
---
 .../annotator/VariantAnnotatorEngine.java     | 437 ++++++++++++++----
 1 file changed, 357 insertions(+), 80 deletions(-)
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
index 962088598..b398173fe 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@@ -25,6 +25,18 @@
 
 package org.broadinstitute.sting.gatk.walkers.annotator;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+
 import org.broad.tribble.dbsnp.DbSNPFeature;
 import org.broad.tribble.vcf.VCFHeaderLine;
 import org.broad.tribble.vcf.VCFInfoHeaderLine;
@@ -42,10 +54,9 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationType
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.playground.gatk.walkers.annotator.GenomicAnnotation;
-import org.broadinstitute.sting.utils.classloader.PackageUtils;
+import org.broadinstitute.sting.playground.gatk.walkers.annotator.JoinTable;
 import org.broadinstitute.sting.utils.StingException;
-
-import java.util.*;
+import org.broadinstitute.sting.utils.classloader.PackageUtils;
 
 
 public class VariantAnnotatorEngine {
@@ -60,12 +71,20 @@ public class VariantAnnotatorEngine {
     // how about hapmap3?
     private boolean annotateHapmap3 = false;
 
-    // command-line option used for GenomicAnnotation.
+    // command-line option from GenomicAnnotator.
     private Map<String, Set<String>> requestedColumnsMap;
 
-    // command-line option used for GenomicAnnotation.
+    // command-line option from GenomicAnnotator.
     private boolean oneToMany;
 
+    // command-line option from GenomicAnnotator.
+    private List<JoinTable> joinTables;
+
+    // used by GenomicAnnotator. Maps binding name to number of output VCF records
+    // annotated with records from the input table with this binding name. Only used for
+    // printing out stats at the end.
+    private Map<String, Integer> inputTableHitCounter = new HashMap<String, Integer>();
+
 
     // use this constructor if you want all possible annotations
     public VariantAnnotatorEngine(GenomeAnalysisEngine engine) {
@@ -178,7 +197,7 @@ public class VariantAnnotatorEngine {
 
     public Collection<VariantContext> annotateContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
 
-        Map<String, Object> infoAnnotations = new HashMap<String, Object>(vc.getAttributes());
+        Map<String, Object> infoAnnotations = new LinkedHashMap<String, Object>(vc.getAttributes());
 
         // annotate dbsnp occurrence
         if ( annotateDbsnp ) {
@@ -201,9 +220,8 @@ public class VariantAnnotatorEngine {
 
         //Process the info field
         List<Map<String, Object>> infoAnnotationOutputsList = new LinkedList<Map<String, Object>>(); //each element in infoAnnotationOutputs corresponds to a single line in the output VCF file
-        infoAnnotationOutputsList.add(new HashMap<String, Object>(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file gains 1 line.
-        // put the DB membership info in
-        infoAnnotationOutputsList.get(0).putAll(infoAnnotations);
+        infoAnnotationOutputsList.add(new LinkedHashMap<String, Object>(vc.getAttributes())); //keep the existing info-field annotations. After this infoAnnotationOutputsList.size() == 1, which means the output VCF file has 1 additional line.
+        infoAnnotationOutputsList.get(0).putAll(infoAnnotations);  // put the DB membership info in
 
         //go through all the requested info annotationTypes
         for ( InfoFieldAnnotation annotationType : requestedInfoAnnotations )
@@ -215,24 +233,7 @@ public class VariantAnnotatorEngine {
 
             if(annotationType instanceof GenomicAnnotation)
             {
-                //go through the annotations returned by GenericAnnotation for each -B input file.
-                for( Map.Entry<String, Object> annotationsFromInputFile : annotationsFromCurrentType.entrySet() )
-                {
-                    final String inputFileBindingName = annotationsFromInputFile.getKey();
-                    final List<Map<String, String>> matchingRecords = (List<Map<String, String>>) annotationsFromInputFile.getValue();
-
-                    if( matchingRecords.size() > 1 && oneToMany)
-                    {
-                        //More than one record matched in this file. After this, infoAnnotationOutputsList.size() will be infoAnnotationOutputsList.size()*matchingRecords.size().
-                        infoAnnotationOutputsList = explodeInfoAnnotationOutputsList( infoAnnotationOutputsList, matchingRecords, inputFileBindingName);
-                    }
-                    else
-                    {
-                        //This doesn't change infoAnnotationOutputsList.size(). If more than one record matched, their annotations will
-                        //all be added to the same output line, with keys disambiguated by appending _i .
-                        addToExistingAnnotationOutputs( infoAnnotationOutputsList, matchingRecords, inputFileBindingName);
-                    }
-                }
+                infoAnnotationOutputsList = processGenomicAnnotation( infoAnnotationOutputsList, annotationsFromCurrentType );
             }
             else
             {
@@ -276,11 +277,146 @@ public class VariantAnnotatorEngine {
         return returnValue;
     }
 
+    /**
+     * Finish processing data from GenomicAnnotation.
+     *
+     * @param infoAnnotationOutputsList
+     * @param annotationsForCurrentLocusFromAllAnnotatorInputTables
+     * @return
+     */
+    private List<Map<String, Object>> processGenomicAnnotation( List<Map<String, Object>> infoAnnotationOutputsList, Map<String, Object> annotationsForCurrentLocusFromAllAnnotatorInputTables)
+    {
+
+        //process the map returned by GenomicAnnotation. This completes processing of the -B args.
+        for( Map.Entry<String, Object> annotationsFromOneInputTable : annotationsForCurrentLocusFromAllAnnotatorInputTables.entrySet() )
+        {
+            final String inputTableBindingName = annotationsFromOneInputTable.getKey();
+            final List<Map<String, String>> matchingRecords = (List<Map<String, String>>) annotationsFromOneInputTable.getValue();
+
+            if( matchingRecords.size() > 1 && oneToMany)
+            {
+                //More than one record matched in this file. After this, infoAnnotationOutputsList.size() will be infoAnnotationOutputsList.size()*matchingRecords.size().
+                infoAnnotationOutputsList = explodeInfoAnnotationOutputsList( infoAnnotationOutputsList, matchingRecords, inputTableBindingName );
+            }
+            else
+            {
+                //This doesn't change infoAnnotationOutputsList.size(). If more than one record matched, their annotations will
+                //all be added to the same output line, with keys disambiguated by appending _i .
+                addToExistingAnnotationOutputs( infoAnnotationOutputsList, matchingRecords, inputTableBindingName );
+            }
+        }
+
+        //process -J args
+        if(joinTables != null)
+        {
+            //for each joinTable, join it with the data in the info-field of each output line.
+            for(JoinTable joinTable : joinTables)
+            {
+                //for each info field, join it to the current join table
+                final List<Map<String, Object>> previousInfoAnnotationOutputsList = new LinkedList<Map<String, Object>>(infoAnnotationOutputsList);  //create a shallow copy because infoAnnotationOutputsList will change during the iteration.
+                for(Map<String, Object> outputRecordInfoField : previousInfoAnnotationOutputsList)
+                {
+                    infoAnnotationOutputsList = performJoin( infoAnnotationOutputsList, outputRecordInfoField, joinTable );
+                }
+            }
+        }
+
+        //apply -S args last to select the columns requested by the user
+        if(requestedColumnsMap != null) {
+            infoAnnotationOutputsList = applySelectArg(infoAnnotationOutputsList);
+        }
+
+        return infoAnnotationOutputsList;
+    }
 
     /**
-     * Implements non-explode mode, where the output lines have a one-to-one relationship
+     * Performs a join between the an info field record represented by outputRecordInfoField and the infoAnnotationOutputsList.
+     *
+     * @param infoAnnotationOutputsList
+     * @param outputRecordInfoField
+     * @param joinTable
+     * @return
+     */
+    private List<Map<String, Object>> performJoin( List<Map<String, Object>> infoAnnotationOutputsList,  Map<String, Object> outputRecordInfoField, JoinTable joinTable)
+    {
+        //System.err.println("Looking at: " + joinTable.getLocalBindingName()+ "- join to " + joinTable.getExternalBindingName() + "." + joinTable.getExternalColumnName() );
+        //for the current joinTable, for each output line, find the externalJoinColumnValue and see if it matches the joinColumnValue of any record(s) in this joinTable.
+        final String externalBindingName = joinTable.getExternalBindingName();
+        final String externalColumnName = joinTable.getExternalColumnName();
+        final String fullyQualifiedExternalColumnName = GenomicAnnotation.generateInfoFieldKey(externalBindingName, externalColumnName);
+
+        //find the externalJoinColumnValue in the current info field, and then look up any joinTable records that have this value for the localJoinColumnValue
+        List<ArrayList<String>> matchingJoinTableRecords = null; //record(s) in the join table whose joinColumnValue(s) matches the joinColumnValue inside the current outputRecordInfoField. Since the join keys don't have to be unique, there may be more than one record in the join table thtat matches.
+        final Object numInfoFieldKeysToCheckObj = outputRecordInfoField.get(GenomicAnnotation.generateInfoFieldKey(externalBindingName, GenomicAnnotation.NUM_MATCHES_SPECIAL_INFO_FIELD));
+        if(numInfoFieldKeysToCheckObj == null) {
+            //only 1 record in the externalBindingName -B AnnotatoInfoTable overlapped the current position
+            Object externalColumnValue = outputRecordInfoField.get(fullyQualifiedExternalColumnName);
+            if(externalColumnValue != null) {
+                matchingJoinTableRecords = joinTable.get(externalColumnValue.toString());
+                //System.err.println("Found matching record(s) in join table for record: " + outputRecordInfoField + " where " + fullyQualifiedExternalColumnName  + "==" + externalColumnValue +  ":  " + matchingJoinTableRecords);
+            }
+        } else {
+            //multiple records in the externalBindingName -B AnnotatoInfoTable overlapped the current position
+            final int numInfoFieldKeysToCheck = Integer.parseInt(numInfoFieldKeysToCheckObj.toString());
+            for(int i = 0; i < numInfoFieldKeysToCheck; i++) {
+                final Object externalColumnValue = outputRecordInfoField.get(fullyQualifiedExternalColumnName + "_" + i);
+                if(externalColumnValue != null) {
+                    matchingJoinTableRecords = joinTable.get(externalColumnValue.toString());
+                    if(matchingJoinTableRecords != null) {
+                        //System.err.println("Found matching record(s) in join table for record: " + outputRecordInfoField + " where " + fullyQualifiedExternalColumnName  + "==" + externalColumnValue +  ":  " + matchingJoinTableRecords);
+                        break;
+                    }
+                }
+            }
+        }
+
+        //if match(s) for the externalJoinColumnValue in the current outputRecordInfoField have been found in the join table, perform the join.
+        if(matchingJoinTableRecords != null)
+        {
+            final String joinTableBindingName = joinTable.getLocalBindingName();
+
+            //convert the List<ArrayList<String>> to List<Map<String, String>> by hashing the values from the ArrayList<String> by their column names.
+            final List<Map<String, String>> matchingJoinTableRecordsConverted = new LinkedList<Map<String,String>>();
+            for(ArrayList<String> columnValues : matchingJoinTableRecords) {
+                final List<String> columnNames = joinTable.getColumnNames();
+
+                final Map<String, String> matchingRecord = new LinkedHashMap<String, String>();
+                for(int i = 0; i < columnNames.size(); i++) {
+                    matchingRecord.put(columnNames.get(i), columnValues.get(i));
+                }
+
+                matchingJoinTableRecordsConverted.add(GenomicAnnotation.convertRecordToAnnotations(joinTableBindingName, matchingRecord));
+            }
+
+
+
+            // do the join between the outputRecordInfoField and the matchingJoinTableRecords, then add the results to to infoAnnotationOutputsList
+            List<Map<String, Object>> tempList = new LinkedList<Map<String, Object>>();
+            tempList.add(outputRecordInfoField);
+            if( matchingJoinTableRecordsConverted.size() > 1 && oneToMany)
+            {
+                //More than one record in the joinTable matched the current info field. After this, infoAnnotationOutputsList.size() will be infoAnnotationOutputsList.size()*matchingRecords.size().
+                tempList = explodeInfoAnnotationOutputsList( tempList, matchingJoinTableRecordsConverted, joinTableBindingName );
+            }
+            else
+            {
+                //This doesn't change infoAnnotationOutputsList.size(). If more than one record matched, their annotations will
+                //all be added to the same output line, with keys disambiguated by appending _i .
+                addToExistingAnnotationOutputs( tempList, matchingJoinTableRecordsConverted, joinTableBindingName );
+            }
+
+            infoAnnotationOutputsList.remove(outputRecordInfoField); //remove the old info field
+            infoAnnotationOutputsList.addAll(tempList); //add the new info field(s) that have been joined with the matchingJoinTableRecords
+        }
+        return infoAnnotationOutputsList;
+    }
+
+
+    /**
+     * Implements not-oneToMany mode, where the output lines have a one-to-one relationship
      * with the input variants, and all multiple-match records are collapsed into the single info field.
-     * The collapsing is done by appending an _i to each key name (where 'i' is a record counter).
+     * The collapsing is done by appending an _i to each key name (where 'i' is a record counter), as well
+     * as a special bindingName.numMatchingRecords=n key-value pair which specifies the upper limit of the counter.
      *
      * @param infoAnnotationOutputsList
      * @param matchingRecords
@@ -293,38 +429,54 @@ public class VariantAnnotatorEngine {
         //For each matching record, just add its annotations to all existing output lines.
         final boolean renameKeys = matchingRecords.size() > 1;
         for(int i = 0; i < matchingRecords.size(); i++) {
-            Map<String,String> annotationsForRecord = matchingRecords.get(i);
-            annotationsForRecord = selectColumnsFromRecord(bindingName, annotationsForRecord); //use only those columns that the user specifically requested.
+            Map<String,String> currentRecord = matchingRecords.get(i);
 
             if(renameKeys) {
-                //Rename keys to avoid naming conflicts (eg. if you have multiple dbsnp matches,
-                // dbSNP.avHet=value1 from record 1 and dbSNP.avHet=value2 from record 2 will become dbSNP.avHet_1=value1 and dbSNP.avHet_2=value2 )
-                Map<String,String> annotationsForRecordWithRenamedKeys = new HashMap<String, String>();
-                for(Map.Entry<String, String> annotation : annotationsForRecord.entrySet()) {
-                    annotationsForRecordWithRenamedKeys.put(annotation.getKey() + "_" + (i + 1), annotation.getValue());
+                //Rename keys to avoid naming conflicts. After this all keys from the i'th matching record will have _i appended to them.
+                // (This solves the following problem: if you have multiple dbsnp matches - such as dbSNP.avHet=value1 from record 1 and
+                //  dbSNP.avHet=value2 from record 2, the keys will be renamed to dbSNP.avHet_1=value1 and dbSNP.avHet_2=value2 )
+                Map<String,String> currentRecordWithRenamedKeys = new LinkedHashMap<String, String>();
+                for(final Map.Entry<String, String> annotation : currentRecord.entrySet()) {
+                    currentRecordWithRenamedKeys.put(annotation.getKey() + "_" + (i + 1), annotation.getValue());
                 }
-
-                annotationsForRecord = annotationsForRecordWithRenamedKeys;
+                currentRecordWithRenamedKeys.put(GenomicAnnotation.generateInfoFieldKey(bindingName, GenomicAnnotation.NUM_MATCHES_SPECIAL_INFO_FIELD),
+                        Integer.toString(matchingRecords.size())); //add the special field that specifies how many matchingRecords there were.
+                currentRecord = currentRecordWithRenamedKeys;
             }
 
             //Add the annotations from this record to each output line.
-            for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
-                infoAnnotationOutput.putAll(annotationsForRecord);
+            for(Map<String, Object> outputRecordInfoField : infoAnnotationOutputsList) {
+                outputRecordInfoField.putAll(currentRecord);
             }
         }
+
+        incrementStatsCounter(bindingName, infoAnnotationOutputsList.size());
+    }
+
+    /**
+     * Records statistics that will be printed when GenomicAnnotator finishes.
+     *
+     * @param bindingName The table from which annotations were gotten
+     * @param numNewRecords The number of new output VCF records created with annotations from this table
+     */
+    private void incrementStatsCounter( final String bindingName, int numNewRecords) {
+        //record some stats - there were infoAnnotationOutputsList.size() output VCF records annotated with data from the 'bindingName' input table.
+        Integer counter = inputTableHitCounter.get(bindingName);
+        if( counter == null ) {
+            inputTableHitCounter.put(bindingName, numNewRecords); //init the counter
+        } else {
+            inputTableHitCounter.put(bindingName, counter + numNewRecords); //increment the counter
+        }
     }
 
     /**
-     * Implements "explode" mode. Takes the current list of
-     * infoAnnotationOutputs (each element of will end up in a different line
-     * of the output VCF file), and generates/returns a new list of infoAnnotationOutputs
-     * which contain one copy of the current infoAnnotationOutputs for each record
-     * in matching records. The returned list will have size:
-     *
+     * Implements oneToMany mode. Takes the current infoAnnotationOutputsList
+     * (where each element represents a line in the output VCF file), and
+     * generates a new infoAnnotationOutputsList which contains one copy of the current
+     * infoAnnotationOutputs for each record matchingRecords.
+     * The returned list will have size:
      * infoAnnotationOutputsList.size() * matchingRecords.size()
      *
-     * See class-level comments for more details.
-     *
      * @param infoAnnotationOutputsList
      * @param matchingRecords
      * @param bindingName
@@ -342,58 +494,117 @@ public class VariantAnnotatorEngine {
         //For each matching record, generate a new output line
         for(int i = 0; i < matchingRecords.size(); i++) {
             Map<String,String> annotationsForRecord = matchingRecords.get(i);
-            annotationsForRecord = selectColumnsFromRecord(bindingName, annotationsForRecord); //use only those columns that the user specifically requested.
 
             //Add the annotations from this record to each output line.
-            for(Map<String, Object> infoAnnotationOutput : infoAnnotationOutputsList) {
-                Map<String, Object> infoAnnotationOutputCopy = new HashMap<String, Object>(infoAnnotationOutput); //create a new copy of this line.
-                infoAnnotationOutputCopy.putAll(annotationsForRecord); //Adds the column-value pairs from this record to this line.
+            for(Map<String, Object> outputRecordInfoField : infoAnnotationOutputsList) {
+                Map<String, Object> outputRecordInfoFieldCopy = new LinkedHashMap<String, Object>(outputRecordInfoField); //create a new copy of this line.
+                outputRecordInfoFieldCopy.putAll(annotationsForRecord); //Adds the column-value pairs from this record to this line.
 
-                newInfoAnnotationOutputsList.add(infoAnnotationOutputCopy); //Add the line to the new list of lines.
+                newInfoAnnotationOutputsList.add(outputRecordInfoFieldCopy); //Add the line to the new list of lines.
             }
         }
 
+        recordStats(bindingName, newInfoAnnotationOutputsList.size(), infoAnnotationOutputsList, matchingRecords.size());
+
         return newInfoAnnotationOutputsList;
     }
 
 
-
     /**
-     * Takes a list of key-value pairs and returns a new Map containing only the columns which were requested by the user
-     * via the -s arg. If there was no -s arg that referenced the given bindingName, all annotationsForRecord returned untouched.
-     *
-     * @param bindingName The binding name for a particular ROD input file.
-     * @param annotationsForRecord The list of column_name -> value pairs for a particular record from the given input file.
-     *
-     * @return Map - see above.
+     * Records statistics for the explodeInfoAnnotationOutputsList(..) calculation.
+     * @param bindingName The table from which annotations were gotten
+     * @param numNewVCFRecordsAnnotatedWithBindingNameData The number of new output VCF records created with annotations from this table
+     * @param infoAnnotationOutputsList
+     * @parma matchingRecordsSize
      */
-    private Map<String, String> selectColumnsFromRecord( String bindingName, Map<String, String> annotationsForRecord) {
-        if(requestedColumnsMap == null || !requestedColumnsMap.containsKey(bindingName)) {
-            return annotationsForRecord;
-        }
+    private void recordStats( final String bindingName, int numNewVCFRecordsAnnotatedWithBindingNameData, final List<Map<String, Object>> infoAnnotationOutputsList, int matchingRecordsSize ) {
 
-        Set<String> requestedColumns = requestedColumnsMap.get(bindingName);
-        Map<String, String> subsettedAnnotations = new HashMap<String, String>();
-        for(Map.Entry<String, String> e : annotationsForRecord.entrySet() ) {
-            if(requestedColumns.contains(e.getKey())) {
-                subsettedAnnotations.put(e.getKey(), e.getValue());
+        //update stats for the 'bindingName' table
+        incrementStatsCounter(bindingName, numNewVCFRecordsAnnotatedWithBindingNameData); //All records in newInfoAnnotationOutputsList were annotated with data from bindingName.
+
+        //update stats for all other tables besides 'bindingName'
+        for(String otherBindingName : inputTableHitCounter.keySet()) {
+            if(otherBindingName.equals(bindingName)) {
+                continue;
+            }
+
+            //count how many records in the initial infoAnnotationOutputsList were annotated with data from otherBindingName
+            int numAnnotatedWithOtherBindingNameData = 0;
+            for(Map<String, Object> outputRecordInfoField : infoAnnotationOutputsList) {
+                for(String outputRecordInfoFieldKey : outputRecordInfoField.keySet()) {
+                    if(outputRecordInfoFieldKey.contains(otherBindingName)) {
+                        //this record has some annotations from the otherBindingName table
+                        numAnnotatedWithOtherBindingNameData++;
+                        break;
+                    }
+                }
+            }
+
+            if(numAnnotatedWithOtherBindingNameData > 0) {
+                //numAnnotatedWithOtherBindingNameData * (matchingRecordsSize - 1) is how many additional output VCF records were created with annotations from otherBindingName
+                incrementStatsCounter(otherBindingName, numAnnotatedWithOtherBindingNameData * (matchingRecordsSize - 1));
             }
         }
-
-        if(subsettedAnnotations.isEmpty()) {
-            throw new StingException("Invalid -s argument for the '" + bindingName + "' input file. " +
-                    "It caused all columns in the file to be rejected. Please check to make sure the -s column " +
-                    "names match the column names in the '" + bindingName + "' file's HEADER line.");
-        }
-
-        return subsettedAnnotations;
     }
 
 
 
+    /**
+     * Applies the -S arg to the results
+     *
+     * @param infoAnnotationOutputsList
+     * @return The new newInfoAnnotationOutputList with -S arg applied.
+     */
+    private List<Map<String, Object>> applySelectArg( final List<Map<String, Object>> infoAnnotationOutputsList )
+    {
+        final List<Map<String, Object>> newInfoAnnotationOutputList = new LinkedList<Map<String, Object>>();
+        for(final Map<String, Object> outputRecordInfoField : infoAnnotationOutputsList) {
+            final Map<String, Object> newOutputRecordInfoField = new LinkedHashMap<String, Object>();
+            for(final Entry<String, Object>  keyValue : outputRecordInfoField.entrySet()) {
+                if(!isKeyFilteredOutBySelectArg(keyValue.getKey())) {
+                    newOutputRecordInfoField.put(keyValue.getKey(), keyValue.getValue());
+                }
+            }
+            newInfoAnnotationOutputList.add(newOutputRecordInfoField);
+        }
+
+        return newInfoAnnotationOutputList;
+    }
+
+
+    /**
+     * Determines whether to exclude the given column from the annotations.
+     * @param key The fully qualified columnName
+     * @return Whether the -S arg specifies that this column should be included in the annotations.
+     *
+     * TODO this function can be optimized through memoization
+     */
+    private boolean isKeyFilteredOutBySelectArg(String key)
+    {
+        for(final String bindingName : requestedColumnsMap.keySet()) {
+
+            if(key.contains(bindingName)) {
+                final Set<String> selectArgsWithThisBindingName = requestedColumnsMap.get(bindingName);
+                for(final String selectArgWithThisBindingName : selectArgsWithThisBindingName) {
+                    if(key.contains(selectArgWithThisBindingName)) {
+                        return false; //this key matches one of the -s args, so the user explicitly requested this key
+                    }
+                }
+                if(!selectArgsWithThisBindingName.isEmpty()) {
+                    return true; //the -S arg contains some keys with this binding name, but doesn't include this key
+                }
+            }
+        }
+
+        return false; //the -S arg doesn't have anything with the same binding name as this key, so the user implicitly requested this key
+    }
+
+
+
+
     /**
      * Determines how the engine will handle the case where multiple records in a ROD file
-     * overlap a particular single locus. If explode is set to true, the output will be
+     * overlap a particular single locus. If oneToMany is set to true, the output will be
      * one-to-many, so that each locus in the input VCF file could result in multiple
      * entries in the output VCF file. Otherwise, the output will be one-to-one, and
      * all multiple-match records will be collapsed into the single info field.
@@ -422,6 +633,72 @@ public class VariantAnnotatorEngine {
 
         //System.err.println("COLUMNS:  "+Arrays.asList(columns).toString());
 
-        this.requestedColumnsMap = GenomicAnnotation.parseColumnsArg(columns);
+        this.requestedColumnsMap = parseColumnsArg(columns);
     }
+
+
+    /**
+     * Passes in a pointer to the JoinTables.
+     *
+     * @param joinTables The list of JoinTables. There should be one JoinTable object for each -J arg.
+     */
+    public void setJoinTables(List<JoinTable> joinTables) {
+        this.joinTables = joinTables;
+    }
+
+
+    /**
+     * Parses the columns arg and returns a Map of columns hashed by their binding name.
+     * For example:
+     *   The command line:
+     *      -s dbSnp.valid,dbsnp.avHet -s refGene.txStart,refGene.txEnd
+     *
+     *   will be passed to this method as:
+     *       ["dbSnp.valid,dbsnp.avHet", "refGene.txStart,refGene.txEnd"]
+     *
+     *   resulting in a return value of:
+     *      {
+     *       "dbSnp" -> "dbSnp.valid" ,
+     *       "dbSnp" -> "dbsnp.avHet" ,
+     *       "refGene" -> "refGene.txStart",
+     *       "refGene" -> "refGene.txEnd"
+     *      }
+     *
+     * @param columnsArg The -s command line arg value.
+     *
+     * @return Map representing a parsed version of this arg - see above.
+     */
+    private static Map<String, Set<String>> parseColumnsArg(String[] columnsArg) {
+        Map<String, Set<String>> result = new HashMap<String, Set<String>>();
+
+        for(String s : columnsArg) {
+            for(String columnSpecifier : s.split(",") ) {
+                String[] rodNameColumnName = columnSpecifier.split("\\.");
+                if(rodNameColumnName.length != 2) {
+                    throw new IllegalArgumentException("The following column specifier in the -s arg is invalid: [" + columnSpecifier + "]. It must be of the form 'bindingName.columnName'.");
+                }
+                String rodName = rodNameColumnName[0];
+                //String columnName = rodNameColumnName[1];
+
+                Set<String> requestedColumns = result.get(rodName);
+                if(requestedColumns == null) {
+                    requestedColumns = new HashSet<String>();
+                    result.put(rodName, requestedColumns);
+                }
+                requestedColumns.add(columnSpecifier);
+            }
+        }
+
+        return result;
+    }
+
+
+    /**
+     * Returns a map containing stats on how many output vcf records were annotated from each database
+     */
+    public Map<String, Integer> getInputTableHitCounter() {
+        return Collections.unmodifiableMap(inputTableHitCounter);
+    }
+
+
 }