From e06b2c90ef03b002e5062360bd89cad115bc8356 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 26 Aug 2010 05:21:26 +0000 Subject: [PATCH] Cap the default size of join tables; this can be modified with the --maxJoinTableSize argument. Also, misc cleanup of the comments. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4125 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/GenomicAnnotation.java | 5 ++--- .../walkers/annotator/GenomicAnnotator.java | 5 ++++- .../gatk/walkers/annotator/JoinTable.java | 21 +++++++++++++------ .../walkers/annotator/JoinTableParser.java | 10 +++++---- 4 files changed, 27 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java index c0f605d4c..93ce8a119 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotation.java @@ -120,9 +120,8 @@ public class GenomicAnnotation implements InfoFieldAnnotation { //Otherwise, the HAPLOTYPE_REFERENCE_COLUMN is only considered to be matching the variant's reference if the string values of the two //are exactly equal (case-insensitive). - //The HAPLOTYPE_REFERENCE_COLUMN is matches the variant's reference allele based on a case-insensitive string comparison. - //The HAPLOTYPE_ALTERNATE_COLUMN is can optionally list more than allele separated by one of these chars: ,\/:| - //The matches if any of the + //The HAPLOTYPE_REFERENCE_COLUMN matches the variant's reference allele based on a case-insensitive string comparison. + //The HAPLOTYPE_ALTERNATE_COLUMN can optionally list more than allele separated by one of these chars: ,\/:| String hapAltValue = annotationsForRecord.get( generateInfoFieldKey(name, HAPLOTYPE_ALTERNATE_COLUMN) ); if(hapAltValue != null) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java index b5e6d7005..0d6d8e139 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java @@ -76,6 +76,9 @@ public class GenomicAnnotator extends RodWalker implements Tre @Argument(fullName="oneToMany", shortName="m", doc="If more than one record from the same file matches a particular locus (for example, multiple dbSNP records with the same position), create multiple entries in the ouptut VCF file - one for each match. If a particular tabular file has J matches, and another tabular file has K matches for a given locus, then J*K output VCF records will be generated - one for each pair of K, J. If this flag is not provided, the multiple records are still generated, but they are stored in the INFO field of a single output VCF record, with their annotation keys differentiated by appending '_i' with i varying from 1 to K*J. ", required=false) protected Boolean ONE_TO_MANY = false; + @Argument(fullName="maxJoinTableSize", shortName="maxJoin", doc="The maximum allowed size (i.e. number of rows) for a table provided with the -J argument", required=false) + protected Integer MAX_JOIN_TABLE_SIZE = 500000; + private VariantAnnotatorEngine engine; /** @@ -159,7 +162,7 @@ public class GenomicAnnotator extends RodWalker implements Tre } //read in the file contents into a JoinTable object - final JoinTable joinTable = new JoinTable(); + final JoinTable joinTable = new JoinTable(MAX_JOIN_TABLE_SIZE); joinTable.parseFromFile(filename, localBindingName, localColumnName, externalBindingName, externalColumnName); joinTables.add(joinTable); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTable.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTable.java index ca841439d..d9e55c03e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTable.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTable.java @@ -63,15 +63,21 @@ public class JoinTable //and the entry value is an ArrayList representing the entire join table record. private HashMap> joinColumnValueToRecords = new HashMap>(); + private int maxSize; private boolean parsedFromFile = false; + public JoinTable(int maxSize) { + this.maxSize = maxSize; + } + /** * Parses the table from the given file using the JoinTableParser. * * @param filename The file containing the table. + * @param localBindingName The binding name within the given file to join on. * @param localColumnName The column name within the given file to join on. - * @param externalBindingName The bindingName of another file (previously specified with either -B or -J). - * @param externalColumnName The columnName in this other file to join on. + * @param externalBindingName The binding name of another file (previously specified with either -B or -J). + * @param externalColumnName The column name in this other file to join on. */ public void parseFromFile(String filename, String localBindingName, String localColumnName, String externalBindingName, String externalColumnName) { if(parsedFromFile) { @@ -135,7 +141,7 @@ public class JoinTable /** * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, * this returns bindingName1. - * @return + * @return local binding name */ public String getLocalBindingName() { return localBindingName; @@ -159,7 +165,7 @@ public class JoinTable /** * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, * this returns columnName2. - * @return + * @return external column name */ public String getExternalColumnName() { return externalColumnName; @@ -173,7 +179,7 @@ public class JoinTable /** * If the -J arg was: -J bindingName1,/path/to/file,bindingName1.columnName=bindingName2.columnName2, * this returns bindingName2. - * @return + * @return external binding name */ public String getExternalBindingName() { return externalBindingName; @@ -187,7 +193,7 @@ public class JoinTable /** * Whether any join table records have the given value in the join column. * @param joinColumnValue value - * @return + * @return true if the given name value exists in the file */ public boolean containsJoinColumnValue(String joinColumnValue) { return joinColumnValueToRecords.containsKey(joinColumnValue); @@ -206,10 +212,13 @@ public class JoinTable * Adds the given record to the map. * @param joinColumnValue value * @param record row + * @param filename the source file name */ protected void put(String joinColumnValue, ArrayList record, String filename) { if ( joinColumnValueToRecords.containsKey(joinColumnValue) ) throw new IllegalStateException("The file " + filename + " contains non-unique entries for the requested column, which isn't allowed."); joinColumnValueToRecords.put(joinColumnValue, record); + if ( joinColumnValueToRecords.size() > maxSize ) + throw new IllegalStateException("The file " + filename + " contains more than the maximum number (" + maxSize + ") of allowed rows (see the --maxJoinTableSize argument)."); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTableParser.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTableParser.java index 06f77262b..d4128f5b9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTableParser.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/JoinTableParser.java @@ -56,7 +56,8 @@ public class JoinTableParser /** * Returns the header and returns it. * @param br source - * @return + * @return column names + * @throws IOException on read */ public List readHeader(BufferedReader br) throws IOException { @@ -81,7 +82,8 @@ public class JoinTableParser /** * Parses the line into an ArrayList containing the values for each column. * - * @param line + * @param line to parse + * @return tokens */ public ArrayList parseLine(String line) { @@ -99,14 +101,14 @@ public class JoinTableParser * Returns the header. * @param br The file to read. * @return ArrayList containing column names from the header. - * @throws IOException + * @throws IOException on reading */ public static ArrayList parseHeader(final BufferedReader br) throws IOException { ArrayList header = null; //find the 1st line that's non-empty and not a comment - String line = null; + String line; while( (line = br.readLine()) != null ) { line = line.trim(); if ( line.isEmpty() || line.startsWith("#") ) {