Cap the default size of join tables; this can be modified with the --maxJoinTableSize argument. Also, misc cleanup of the comments.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4125 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-26 05:21:26 +00:00
parent 79cd716671
commit e06b2c90ef
4 changed files with 27 additions and 14 deletions

View File

@ -120,9 +120,8 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
//Otherwise, the HAPLOTYPE_REFERENCE_COLUMN is only considered to be matching the variant's reference if the string values of the two
//are exactly equal (case-insensitive).
//The HAPLOTYPE_REFERENCE_COLUMN is matches the variant's reference allele based on a case-insensitive string comparison.
//The HAPLOTYPE_ALTERNATE_COLUMN is can optionally list more than allele separated by one of these chars: ,\/:|
//The matches if any of the
//The HAPLOTYPE_REFERENCE_COLUMN matches the variant's reference allele based on a case-insensitive string comparison.
//The HAPLOTYPE_ALTERNATE_COLUMN can optionally list more than allele separated by one of these chars: ,\/:|
String hapAltValue = annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_ALTERNATE_COLUMN) );
if(hapAltValue != null)
{

View File

@ -76,6 +76,9 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> implements Tre
@Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false)
protected Boolean ONE_TO_MANY = false;
@Argument(fullName="maxJoinTableSize", shortName="maxJoin", doc="The maximum allowed size (i.e. number of rows) for a table provided with the -J argument", required=false)
protected Integer MAX_JOIN_TABLE_SIZE = 500000;
private VariantAnnotatorEngine engine;
/**
@ -159,7 +162,7 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> implements Tre
}
//read in the file contents into a JoinTable object
final JoinTable joinTable = new JoinTable();
final JoinTable joinTable = new JoinTable(MAX_JOIN_TABLE_SIZE);
joinTable.parseFromFile(filename, localBindingName, localColumnName, externalBindingName, externalColumnName);
joinTables.add(joinTable);

View File

@ -63,15 +63,21 @@ public class JoinTable
//and the entry value is an ArrayList representing the entire join table record.
private HashMap<String, ArrayList<String>> joinColumnValueToRecords = new HashMap<String, ArrayList<String>>();
private int maxSize;
private boolean parsedFromFile = false;
public JoinTable(int maxSize) {
this.maxSize = maxSize;
}
/**
* Parses the table from the given file using the JoinTableParser.
*
* @param filename The file containing the table.
* @param localBindingName The binding name within the given file to join on.
* @param localColumnName The column name within the given file to join on.
* @param externalBindingName The bindingName of another file (previously specified with either -B or -J).
* @param externalColumnName The columnName in this other file to join on.
* @param externalBindingName The binding name of another file (previously specified with either -B or -J).
* @param externalColumnName The column name in this other file to join on.
*/
public void parseFromFile(String filename, String localBindingName, String localColumnName, String externalBindingName, String externalColumnName) {
if(parsedFromFile) {
@ -135,7 +141,7 @@ public class JoinTable
/**
* If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2,
* this returns bindingName1.
* @return
* @return local binding name
*/
public String getLocalBindingName() {
return localBindingName;
@ -159,7 +165,7 @@ public class JoinTable
/**
* If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2,
* this returns columnName2.
* @return
* @return external column name
*/
public String getExternalColumnName() {
return externalColumnName;
@ -173,7 +179,7 @@ public class JoinTable
/**
* If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2,
* this returns bindingName2.
* @return
* @return external binding name
*/
public String getExternalBindingName() {
return externalBindingName;
@ -187,7 +193,7 @@ public class JoinTable
/**
* Whether any join table records have the given value in the join column.
* @param joinColumnValue value
* @return
* @return true if the given name value exists in the file
*/
public boolean containsJoinColumnValue(String joinColumnValue) {
return joinColumnValueToRecords.containsKey(joinColumnValue);
@ -206,10 +212,13 @@ public class JoinTable
* Adds the given record to the map.
* @param joinColumnValue value
* @param record row
* @param filename the source file name
*/
protected void put(String joinColumnValue, ArrayList<String> record, String filename) {
if ( joinColumnValueToRecords.containsKey(joinColumnValue) )
throw new IllegalStateException("The file " + filename + " contains non-unique entries for the requested column, which isn't allowed.");
joinColumnValueToRecords.put(joinColumnValue, record);
if ( joinColumnValueToRecords.size() > maxSize )
throw new IllegalStateException("The file " + filename + " contains more than the maximum number (" + maxSize + ") of allowed rows (see the --maxJoinTableSize argument).");
}
}

View File

@ -56,7 +56,8 @@ public class JoinTableParser
/**
* Returns the header and returns it.
* @param br source
* @return
* @return column names
* @throws IOException on read
*/
public List<String> readHeader(BufferedReader br) throws IOException
{
@ -81,7 +82,8 @@ public class JoinTableParser
/**
* Parses the line into an ArrayList containing the values for each column.
*
* @param line
* @param line to parse
* @return tokens
*/
public ArrayList<String> parseLine(String line) {
@ -99,14 +101,14 @@ public class JoinTableParser
* Returns the header.
* @param br The file to read.
* @return ArrayList containing column names from the header.
* @throws IOException
* @throws IOException on reading
*/
public static ArrayList<String> parseHeader(final BufferedReader br) throws IOException
{
ArrayList<String> header = null;
//find the 1st line that's non-empty and not a comment
String line = null;
String line;
while( (line = br.readLine()) != null ) {
line = line.trim();
if ( line.isEmpty() || line.startsWith("#") ) {