Cap the default size of join tables; this can be modified with the --maxJoinTableSize argument. Also, misc cleanup of the comments.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4125 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
79cd716671
commit
e06b2c90ef
|
|
@ -120,9 +120,8 @@ public class GenomicAnnotation implements InfoFieldAnnotation {
|
|||
//Otherwise, the HAPLOTYPE_REFERENCE_COLUMN is only considered to be matching the variant's reference if the string values of the two
|
||||
//are exactly equal (case-insensitive).
|
||||
|
||||
//The HAPLOTYPE_REFERENCE_COLUMN is matches the variant's reference allele based on a case-insensitive string comparison.
|
||||
//The HAPLOTYPE_ALTERNATE_COLUMN is can optionally list more than allele separated by one of these chars: ,\/:|
|
||||
//The matches if any of the
|
||||
//The HAPLOTYPE_REFERENCE_COLUMN matches the variant's reference allele based on a case-insensitive string comparison.
|
||||
//The HAPLOTYPE_ALTERNATE_COLUMN can optionally list more than allele separated by one of these chars: ,\/:|
|
||||
String hapAltValue = annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_ALTERNATE_COLUMN) );
|
||||
if(hapAltValue != null)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -76,6 +76,9 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> implements Tre
|
|||
@Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false)
|
||||
protected Boolean ONE_TO_MANY = false;
|
||||
|
||||
@Argument(fullName="maxJoinTableSize", shortName="maxJoin", doc="The maximum allowed size (i.e. number of rows) for a table provided with the -J argument", required=false)
|
||||
protected Integer MAX_JOIN_TABLE_SIZE = 500000;
|
||||
|
||||
private VariantAnnotatorEngine engine;
|
||||
|
||||
/**
|
||||
|
|
@ -159,7 +162,7 @@ public class GenomicAnnotator extends RodWalker<Integer, Integer> implements Tre
|
|||
}
|
||||
|
||||
//read in the file contents into a JoinTable object
|
||||
final JoinTable joinTable = new JoinTable();
|
||||
final JoinTable joinTable = new JoinTable(MAX_JOIN_TABLE_SIZE);
|
||||
joinTable.parseFromFile(filename, localBindingName, localColumnName, externalBindingName, externalColumnName);
|
||||
joinTables.add(joinTable);
|
||||
|
||||
|
|
|
|||
|
|
@ -63,15 +63,21 @@ public class JoinTable
|
|||
//and the entry value is an ArrayList representing the entire join table record.
|
||||
private HashMap<String, ArrayList<String>> joinColumnValueToRecords = new HashMap<String, ArrayList<String>>();
|
||||
|
||||
private int maxSize;
|
||||
private boolean parsedFromFile = false;
|
||||
|
||||
public JoinTable(int maxSize) {
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the table from the given file using the JoinTableParser.
|
||||
*
|
||||
* @param filename The file containing the table.
|
||||
* @param localBindingName The binding name within the given file to join on.
|
||||
* @param localColumnName The column name within the given file to join on.
|
||||
* @param externalBindingName The bindingName of another file (previously specified with either -B or -J).
|
||||
* @param externalColumnName The columnName in this other file to join on.
|
||||
* @param externalBindingName The binding name of another file (previously specified with either -B or -J).
|
||||
* @param externalColumnName The column name in this other file to join on.
|
||||
*/
|
||||
public void parseFromFile(String filename, String localBindingName, String localColumnName, String externalBindingName, String externalColumnName) {
|
||||
if(parsedFromFile) {
|
||||
|
|
@ -135,7 +141,7 @@ public class JoinTable
|
|||
/**
|
||||
* If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2,
|
||||
* this returns bindingName1.
|
||||
* @return
|
||||
* @return local binding name
|
||||
*/
|
||||
public String getLocalBindingName() {
|
||||
return localBindingName;
|
||||
|
|
@ -159,7 +165,7 @@ public class JoinTable
|
|||
/**
|
||||
* If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2,
|
||||
* this returns columnName2.
|
||||
* @return
|
||||
* @return external column name
|
||||
*/
|
||||
public String getExternalColumnName() {
|
||||
return externalColumnName;
|
||||
|
|
@ -173,7 +179,7 @@ public class JoinTable
|
|||
/**
|
||||
* If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2,
|
||||
* this returns bindingName2.
|
||||
* @return
|
||||
* @return external binding name
|
||||
*/
|
||||
public String getExternalBindingName() {
|
||||
return externalBindingName;
|
||||
|
|
@ -187,7 +193,7 @@ public class JoinTable
|
|||
/**
|
||||
* Whether any join table records have the given value in the join column.
|
||||
* @param joinColumnValue value
|
||||
* @return
|
||||
* @return true if the given name value exists in the file
|
||||
*/
|
||||
public boolean containsJoinColumnValue(String joinColumnValue) {
|
||||
return joinColumnValueToRecords.containsKey(joinColumnValue);
|
||||
|
|
@ -206,10 +212,13 @@ public class JoinTable
|
|||
* Adds the given record to the map.
|
||||
* @param joinColumnValue value
|
||||
* @param record row
|
||||
* @param filename the source file name
|
||||
*/
|
||||
protected void put(String joinColumnValue, ArrayList<String> record, String filename) {
|
||||
if ( joinColumnValueToRecords.containsKey(joinColumnValue) )
|
||||
throw new IllegalStateException("The file " + filename + " contains non-unique entries for the requested column, which isn't allowed.");
|
||||
joinColumnValueToRecords.put(joinColumnValue, record);
|
||||
if ( joinColumnValueToRecords.size() > maxSize )
|
||||
throw new IllegalStateException("The file " + filename + " contains more than the maximum number (" + maxSize + ") of allowed rows (see the --maxJoinTableSize argument).");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,7 +56,8 @@ public class JoinTableParser
|
|||
/**
|
||||
* Returns the header and returns it.
|
||||
* @param br source
|
||||
* @return
|
||||
* @return column names
|
||||
* @throws IOException on read
|
||||
*/
|
||||
public List<String> readHeader(BufferedReader br) throws IOException
|
||||
{
|
||||
|
|
@ -81,7 +82,8 @@ public class JoinTableParser
|
|||
/**
|
||||
* Parses the line into an ArrayList containing the values for each column.
|
||||
*
|
||||
* @param line
|
||||
* @param line to parse
|
||||
* @return tokens
|
||||
*/
|
||||
public ArrayList<String> parseLine(String line) {
|
||||
|
||||
|
|
@ -99,14 +101,14 @@ public class JoinTableParser
|
|||
* Returns the header.
|
||||
* @param br The file to read.
|
||||
* @return ArrayList containing column names from the header.
|
||||
* @throws IOException
|
||||
* @throws IOException on reading
|
||||
*/
|
||||
public static ArrayList<String> parseHeader(final BufferedReader br) throws IOException
|
||||
{
|
||||
ArrayList<String> header = null;
|
||||
|
||||
//find the 1st line that's non-empty and not a comment
|
||||
String line = null;
|
||||
String line;
|
||||
while( (line = br.readLine()) != null ) {
|
||||
line = line.trim();
|
||||
if ( line.isEmpty() || line.startsWith("#") ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue