diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 0048bfac1..3f41e9bbb 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -186,7 +186,7 @@ public class GenomeAnalysisEngine { } // parse out the rod bindings - ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); + ReferenceOrderedData.parseBindings(argCollection.RODBindings, rods); validateSuppliedReferenceOrderedDataAgainstWalker( my_walker, rods ); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index f15b429b3..3d3082c83 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -2,13 +2,11 @@ package org.broadinstitute.sting.gatk.refdata; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MalformedGenomeLocException; +import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.xReadLines; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileWriter; -import java.io.IOException; +import java.io.*; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; @@ -16,7 +14,7 @@ import java.util.*; /** * Class for representing arbitrary reference ordered data sets - * + *

* User: mdepristo * Date: Feb 27, 2009 * Time: 10:47:14 AM @@ -26,14 +24,16 @@ public class ReferenceOrderedData implements private String name; private File file = null; private String fieldDelimiter; - - /** - * Header object returned from the datum - */ + + /** Header object returned from the datum */ private Object header = null; - + private Class type = null; // runtime type information for object construction + /** our log, which we want to capture anything from this class */ + private static Logger logger = Logger.getLogger(ReferenceOrderedData.class); + + // ---------------------------------------------------------------------- // // Static ROD type management @@ -42,6 +42,7 @@ public class ReferenceOrderedData implements public static class RODBinding { public final String name; public final Class type; + public RODBinding(final String name, final Class type) { this.name = name; this.type = type; @@ -49,9 +50,10 @@ public class ReferenceOrderedData implements } public static HashMap Types = new HashMap(); + public static void addModule(final String name, final Class rodType) { final String boundName = name.toLowerCase(); - if ( Types.containsKey(boundName) ) { + if (Types.containsKey(boundName)) { throw new RuntimeException(String.format("GATK BUG: adding ROD module %s that is already bound", boundName)); } System.out.printf("* Adding rod class %s%n", name); @@ -85,30 +87,36 @@ public class ReferenceOrderedData implements * name, of type, ready to read from the file. This function does check for the strings to be well formed * and such. * - * @param logger * @param bindings * @param rods */ - public static void parseBindings(Logger logger, ArrayList bindings, List > rods) - { + public static void parseBindings(ArrayList bindings, List> rods) { + // pre-process out any files that were passed in as rod binding command line options + for (int x = 0; x < bindings.size(); x++) { + if (new File(bindings.get(x)).exists()) { + extractRodsFromFile(bindings, bindings.get(x)); + bindings.remove(x); + x--; + } + } // Loop over triplets - for( String bindingSets: bindings ) { + for (String bindingSets : bindings) { String[] bindingTokens = bindingSets.split(","); - if( bindingTokens.length % 3 != 0 ) + if (bindingTokens.length % 3 != 0) Utils.scareUser(String.format("Invalid ROD specification: requires triplets of ,, but got %s", Utils.join(",", bindings))); - for ( int bindingSet = 0; bindingSet < bindingTokens.length; bindingSet += 3 ) { + for (int bindingSet = 0; bindingSet < bindingTokens.length; bindingSet += 3) { logger.info("Processing ROD bindings: " + bindings.size() + " -> " + Utils.join(" : ", bindingTokens)); final String name = bindingTokens[bindingSet]; final String typeName = bindingTokens[bindingSet + 1]; final String fileName = bindingTokens[bindingSet + 2]; - ReferenceOrderedData rod = parse1Binding(logger, name, typeName, fileName); + ReferenceOrderedData rod = parse1Binding(name, typeName, fileName); // check that we're not generating duplicate bindings - for ( ReferenceOrderedData rod2 : rods ) - if ( rod2.getName().equals(rod.getName()) ) + for (ReferenceOrderedData rod2 : rods) + if (rod2.getName().equals(rod.getName())) Utils.scareUser(String.format("Found duplicate rod bindings", rod.getName())); rods.add(rod); @@ -116,20 +124,42 @@ public class ReferenceOrderedData implements } } + /** + * given a existing file, open it and append all the valid triplet lines to an existing list + * + * @param rodTripletList the list of existing triplets + * @param filename the file to attempt to extract ROD triplets from + */ + protected static void extractRodsFromFile(List rodTripletList, String filename) { + BufferedReader str; + try { + str = new BufferedReader(new FileReader(new File(filename))); + } catch (FileNotFoundException e) { + throw new StingException("Unable to load the ROD input file " + filename,e); + } + String line = "NO LINES READ IN"; + try { + while ((line = str.readLine()) != null) { + if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim()); + else logger.warn("the following file line didn't parsing into a triplet -> " + line); + } + } catch (IOException e) { + throw new StingException("Failed reading the input rod file " + filename + " last line read was " + line,e); + } + } + /** * Helpful function that parses a single triplet of and returns the corresponding ROD with * , of type that reads its input from . - * - * @param logger + * * @param trackName * @param typeName * @param fileName * @return */ - private static ReferenceOrderedData parse1Binding( Logger logger, final String trackName, final String typeName, final String fileName ) - { + private static ReferenceOrderedData parse1Binding(final String trackName, final String typeName, final String fileName) { // Gracefully fail if we don't have the type - if ( ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null ) + if (ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null) Utils.scareUser(String.format("Unknown ROD type: %s", typeName)); // Lookup the type @@ -160,34 +190,36 @@ public class ReferenceOrderedData implements * Special equals override to see if this ROD is compatible with the given * name and type. 'Compatible' means that this ROD has the name that's passed * in and its data can fit into the container specified by type. + * * @param name Name to check. * @param type Type to check. + * * @return True if these parameters imply this rod. False otherwise. */ - public boolean matches( String name, Class type ) { + public boolean matches(String name, Class type) { return this.name.equals(name) && type.isAssignableFrom(this.type); } public RODIterator iterator() { - Iterator it; + Iterator it; try { - Method m = type.getDeclaredMethod("createIterator", String.class,java.io.File.class); + Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class); it = (Iterator) m.invoke(null, name, file); - } catch ( java.lang.NoSuchMethodException e ) { + } catch (java.lang.NoSuchMethodException e) { it = new SimpleRODIterator(); - } catch ( java.lang.NullPointerException e ) { + } catch (java.lang.NullPointerException e) { throw new RuntimeException(e); - } catch ( java.lang.SecurityException e ) { + } catch (java.lang.SecurityException e) { throw new RuntimeException(e); - } catch ( java.lang.IllegalAccessException e ) { - throw new RuntimeException(e); - } catch ( java.lang.IllegalArgumentException e ) { - throw new RuntimeException(e); - } catch ( java.lang.reflect.InvocationTargetException e ) { - throw new RuntimeException(e); - } + } catch (java.lang.IllegalAccessException e) { + throw new RuntimeException(e); + } catch (java.lang.IllegalArgumentException e) { + throw new RuntimeException(e); + } catch (java.lang.reflect.InvocationTargetException e) { + throw new RuntimeException(e); + } return new RODIterator(it); - } + } // ---------------------------------------------------------------------- // @@ -195,12 +227,12 @@ public class ReferenceOrderedData implements // // ---------------------------------------------------------------------- public void testMe() { - for ( ReferenceOrderedDatum rec : this ) { + for (ReferenceOrderedDatum rec : this) { System.out.println(rec.toString()); - rodGFF gff = (rodGFF)rec; + rodGFF gff = (rodGFF) rec; String[] keys = {"LENGTH", "ALT", "FOBARBAR"}; - for ( String key : keys) { + for (String key : keys) { System.out.printf(" -> %s is (%s)%n", key, gff.containsAttribute(key) ? gff.getAttribute(key) : "none"); } } @@ -214,7 +246,7 @@ public class ReferenceOrderedData implements // ---------------------------------------------------------------------- public ArrayList readAll() { ArrayList elts = new ArrayList(); - for ( ReferenceOrderedDatum rec : this ) { + for (ReferenceOrderedDatum rec : this) { elts.add(rec); } elts.trimToSize(); @@ -228,7 +260,7 @@ public class ReferenceOrderedData implements public static void write(ArrayList data, File output) throws IOException { final FileWriter out = new FileWriter(output); - for ( ReferenceOrderedDatum rec : data ) { + for (ReferenceOrderedDatum rec : data) { out.write(rec.repl() + "\n"); } @@ -237,12 +269,12 @@ public class ReferenceOrderedData implements public boolean validateFile() throws Exception { ReferenceOrderedDatum last = null; - for ( ReferenceOrderedDatum rec : this ) { - if ( last != null && last.compareTo(rec) == 1 ) { - // It's out of order - throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString()); - } - last = rec; + for (ReferenceOrderedDatum rec : this) { + if (last != null && last.compareTo(rec) == 1) { + // It's out of order + throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString()); + } + last = rec; } return true; } @@ -262,7 +294,7 @@ public class ReferenceOrderedData implements public SimpleRODIterator() { try { parser = new xReadLines(file); - } catch ( FileNotFoundException e ) { + } catch (FileNotFoundException e) { Utils.scareUser("Couldn't open file: " + file); } } @@ -281,7 +313,7 @@ public class ReferenceOrderedData implements final String line = parser.next(); //System.out.printf("Line is '%s'%n", line); String parts[] = line.split(fieldDelimiter); - + try { n = parseLine(parts); // Two failure conditions: @@ -291,12 +323,12 @@ public class ReferenceOrderedData implements // TODO: Clean this up so that all errors are handled in one spot. success = (n != null); } - catch( MalformedGenomeLocException ex ) { - if( firstFailure ) { + catch (MalformedGenomeLocException ex) { + if (firstFailure) { Utils.warnUser("Failed to parse contig on line '" + line + "'. The reason given was: " + ex.getMessage() + " Skipping ahead to the next recognized GenomeLoc. "); firstFailure = false; } - if( !parser.hasNext() ) + if (!parser.hasNext()) Utils.warnUser("Unable to find more valid reference-ordered data. Giving up."); } @@ -304,7 +336,7 @@ public class ReferenceOrderedData implements return n; } - + public void remove() { throw new UnsupportedOperationException(); } @@ -315,26 +347,31 @@ public class ReferenceOrderedData implements // Parsing // // ---------------------------------------------------------------------- - private Constructor parsing_constructor; - private ROD newROD( final String name, final Class type ) { + private Constructor parsing_constructor; + + private ROD newROD(final String name, final Class type) { try { - return (ROD)parsing_constructor.newInstance(name); - } catch ( java.lang.InstantiationException e ) { + return (ROD) parsing_constructor.newInstance(name); + } catch (java.lang.InstantiationException e) { throw new RuntimeException(e); - } catch ( java.lang.IllegalAccessException e ) { + } catch (java.lang.IllegalAccessException e) { throw new RuntimeException(e); - } catch ( InvocationTargetException e ) { + } catch (InvocationTargetException e) { throw new RuntimeException(e); } } private Object initializeROD(final String name, final File file, final Class type) { - try { parsing_constructor = type.getConstructor(String.class); } - catch (java.lang.NoSuchMethodException e) { throw new RuntimeException(e); } + try { + parsing_constructor = type.getConstructor(String.class); + } + catch (java.lang.NoSuchMethodException e) { + throw new RuntimeException(e); + } ROD rod = newROD(name, type); try { return rod.initialize(file); - } catch ( FileNotFoundException e ) { + } catch (FileNotFoundException e) { throw new RuntimeException(e); } } @@ -343,7 +380,7 @@ public class ReferenceOrderedData implements //System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts)); ROD obj = newROD(name, type); try { - if ( ! obj.parseLine(header, parts) ) + if (!obj.parseLine(header, parts)) obj = null; } catch (IOException e) { throw new RuntimeException("Badly formed ROD: " + e); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataTest.java new file mode 100644 index 000000000..810de7be1 --- /dev/null +++ b/java/test/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedDataTest.java @@ -0,0 +1,47 @@ +package org.broadinstitute.sting.gatk.refdata; + +import org.broadinstitute.sting.BaseTest; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.List; + + +/** + * + * @author aaron + * + * Class ReferenceOrderedDataTest + * + * some functionality to test parts of the reference ordered data system that I've added. This is by NO MEANS + * a complete test suite, but additions would be extremely welcome + */ +public class ReferenceOrderedDataTest extends BaseTest { + @Test + public void extractRodsFromFileTest() { + String file = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl.csv"; + List lst = new ArrayList(); + ReferenceOrderedData.extractRodsFromFile(lst,file); + Assert.assertEquals(6,lst.size()); + int index = 0; + for (String entry: lst) { + String first = entry.subSequence(0,entry.indexOf(",")).toString(); + Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); + } + } + @Test + public void extractRodsFromMultiFileTest() { + String file = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl.csv"; + String file2 = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl2.csv"; + List lst = new ArrayList(); + ReferenceOrderedData.extractRodsFromFile(lst,file); + ReferenceOrderedData.extractRodsFromFile(lst,file2); + Assert.assertEquals(12,lst.size()); + int index = 0; + for (String entry: lst) { + String first = entry.subSequence(0,entry.indexOf(",")).toString(); + Assert.assertTrue(first.equals("rod" + String.valueOf(++index))); + } + } +}