added the ability to pass in a csv file of ROD triplets (one triplet per line) to the -B option

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1412 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-08-11 22:10:20 +00:00
parent e4acd14675
commit d101c20b30
3 changed files with 151 additions and 67 deletions

View File

@ -186,7 +186,7 @@ public class GenomeAnalysisEngine {
} }
// parse out the rod bindings // parse out the rod bindings
ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods); ReferenceOrderedData.parseBindings(argCollection.RODBindings, rods);
validateSuppliedReferenceOrderedDataAgainstWalker( my_walker, rods ); validateSuppliedReferenceOrderedDataAgainstWalker( my_walker, rods );

View File

@ -2,13 +2,11 @@ package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MalformedGenomeLocException; import org.broadinstitute.sting.utils.MalformedGenomeLocException;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.xReadLines; import org.broadinstitute.sting.utils.xReadLines;
import java.io.File; import java.io.*;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method; import java.lang.reflect.Method;
@ -16,7 +14,7 @@ import java.util.*;
/** /**
* Class for representing arbitrary reference ordered data sets * Class for representing arbitrary reference ordered data sets
* * <p/>
* User: mdepristo * User: mdepristo
* Date: Feb 27, 2009 * Date: Feb 27, 2009
* Time: 10:47:14 AM * Time: 10:47:14 AM
@ -27,13 +25,15 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
private File file = null; private File file = null;
private String fieldDelimiter; private String fieldDelimiter;
/** /** Header object returned from the datum */
* Header object returned from the datum
*/
private Object header = null; private Object header = null;
private Class<ROD> type = null; // runtime type information for object construction private Class<ROD> type = null; // runtime type information for object construction
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
// //
// Static ROD type management // Static ROD type management
@ -42,6 +42,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
public static class RODBinding { public static class RODBinding {
public final String name; public final String name;
public final Class<? extends ReferenceOrderedDatum> type; public final Class<? extends ReferenceOrderedDatum> type;
public RODBinding(final String name, final Class<? extends ReferenceOrderedDatum> type) { public RODBinding(final String name, final Class<? extends ReferenceOrderedDatum> type) {
this.name = name; this.name = name;
this.type = type; this.type = type;
@ -49,6 +50,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
} }
public static HashMap<String, RODBinding> Types = new HashMap<String, RODBinding>(); public static HashMap<String, RODBinding> Types = new HashMap<String, RODBinding>();
public static void addModule(final String name, final Class<? extends ReferenceOrderedDatum> rodType) { public static void addModule(final String name, final Class<? extends ReferenceOrderedDatum> rodType) {
final String boundName = name.toLowerCase(); final String boundName = name.toLowerCase();
if (Types.containsKey(boundName)) { if (Types.containsKey(boundName)) {
@ -85,12 +87,18 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
* name, of type, ready to read from the file. This function does check for the strings to be well formed * name, of type, ready to read from the file. This function does check for the strings to be well formed
* and such. * and such.
* *
* @param logger
* @param bindings * @param bindings
* @param rods * @param rods
*/ */
public static void parseBindings(Logger logger, ArrayList<String> bindings, List<ReferenceOrderedData<? extends ReferenceOrderedDatum> > rods) public static void parseBindings(ArrayList<String> bindings, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
{ // pre-process out any files that were passed in as rod binding command line options
for (int x = 0; x < bindings.size(); x++) {
if (new File(bindings.get(x)).exists()) {
extractRodsFromFile(bindings, bindings.get(x));
bindings.remove(x);
x--;
}
}
// Loop over triplets // Loop over triplets
for (String bindingSets : bindings) { for (String bindingSets : bindings) {
String[] bindingTokens = bindingSets.split(","); String[] bindingTokens = bindingSets.split(",");
@ -104,7 +112,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
final String typeName = bindingTokens[bindingSet + 1]; final String typeName = bindingTokens[bindingSet + 1];
final String fileName = bindingTokens[bindingSet + 2]; final String fileName = bindingTokens[bindingSet + 2];
ReferenceOrderedData<?> rod = parse1Binding(logger, name, typeName, fileName); ReferenceOrderedData<?> rod = parse1Binding(name, typeName, fileName);
// check that we're not generating duplicate bindings // check that we're not generating duplicate bindings
for (ReferenceOrderedData rod2 : rods) for (ReferenceOrderedData rod2 : rods)
@ -116,18 +124,40 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
} }
} }
/**
* given a existing file, open it and append all the valid triplet lines to an existing list
*
* @param rodTripletList the list of existing triplets
* @param filename the file to attempt to extract ROD triplets from
*/
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
BufferedReader str;
try {
str = new BufferedReader(new FileReader(new File(filename)));
} catch (FileNotFoundException e) {
throw new StingException("Unable to load the ROD input file " + filename,e);
}
String line = "NO LINES READ IN";
try {
while ((line = str.readLine()) != null) {
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
}
} catch (IOException e) {
throw new StingException("Failed reading the input rod file " + filename + " last line read was " + line,e);
}
}
/** /**
* Helpful function that parses a single triplet of <name> <type> <file> and returns the corresponding ROD with * Helpful function that parses a single triplet of <name> <type> <file> and returns the corresponding ROD with
* <name>, of type <type> that reads its input from <file>. * <name>, of type <type> that reads its input from <file>.
* *
* @param logger
* @param trackName * @param trackName
* @param typeName * @param typeName
* @param fileName * @param fileName
* @return * @return
*/ */
private static ReferenceOrderedData<?> parse1Binding( Logger logger, final String trackName, final String typeName, final String fileName ) private static ReferenceOrderedData<?> parse1Binding(final String trackName, final String typeName, final String fileName) {
{
// Gracefully fail if we don't have the type // Gracefully fail if we don't have the type
if (ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null) if (ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null)
Utils.scareUser(String.format("Unknown ROD type: %s", typeName)); Utils.scareUser(String.format("Unknown ROD type: %s", typeName));
@ -160,8 +190,10 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
* Special equals override to see if this ROD is compatible with the given * Special equals override to see if this ROD is compatible with the given
* name and type. 'Compatible' means that this ROD has the name that's passed * name and type. 'Compatible' means that this ROD has the name that's passed
* in and its data can fit into the container specified by type. * in and its data can fit into the container specified by type.
*
* @param name Name to check. * @param name Name to check.
* @param type Type to check. * @param type Type to check.
*
* @return True if these parameters imply this rod. False otherwise. * @return True if these parameters imply this rod. False otherwise.
*/ */
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) { public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
@ -316,6 +348,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
// //
// ---------------------------------------------------------------------- // ----------------------------------------------------------------------
private Constructor<ROD> parsing_constructor; private Constructor<ROD> parsing_constructor;
private ROD newROD(final String name, final Class<ROD> type) { private ROD newROD(final String name, final Class<ROD> type) {
try { try {
return (ROD) parsing_constructor.newInstance(name); return (ROD) parsing_constructor.newInstance(name);
@ -329,8 +362,12 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
} }
private Object initializeROD(final String name, final File file, final Class<ROD> type) { private Object initializeROD(final String name, final File file, final Class<ROD> type) {
try { parsing_constructor = type.getConstructor(String.class); } try {
catch (java.lang.NoSuchMethodException e) { throw new RuntimeException(e); } parsing_constructor = type.getConstructor(String.class);
}
catch (java.lang.NoSuchMethodException e) {
throw new RuntimeException(e);
}
ROD rod = newROD(name, type); ROD rod = newROD(name, type);
try { try {
return rod.initialize(file); return rod.initialize(file);

View File

@ -0,0 +1,47 @@
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.BaseTest;
import org.junit.Assert;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
/**
*
* @author aaron
*
* Class ReferenceOrderedDataTest
*
* some functionality to test parts of the reference ordered data system that I've added. This is by NO MEANS
* a complete test suite, but additions would be extremely welcome
*/
public class ReferenceOrderedDataTest extends BaseTest {
@Test
public void extractRodsFromFileTest() {
String file = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl.csv";
List<String> lst = new ArrayList<String>();
ReferenceOrderedData.extractRodsFromFile(lst,file);
Assert.assertEquals(6,lst.size());
int index = 0;
for (String entry: lst) {
String first = entry.subSequence(0,entry.indexOf(",")).toString();
Assert.assertTrue(first.equals("rod" + String.valueOf(++index)));
}
}
@Test
public void extractRodsFromMultiFileTest() {
String file = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl.csv";
String file2 = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl2.csv";
List<String> lst = new ArrayList<String>();
ReferenceOrderedData.extractRodsFromFile(lst,file);
ReferenceOrderedData.extractRodsFromFile(lst,file2);
Assert.assertEquals(12,lst.size());
int index = 0;
for (String entry: lst) {
String first = entry.subSequence(0,entry.indexOf(",")).toString();
Assert.assertTrue(first.equals("rod" + String.valueOf(++index)));
}
}
}