added the ability to pass in a csv file of ROD triplets (one triplet per line) to the -B option
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1412 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e4acd14675
commit
d101c20b30
|
|
@ -186,7 +186,7 @@ public class GenomeAnalysisEngine {
|
|||
}
|
||||
|
||||
// parse out the rod bindings
|
||||
ReferenceOrderedData.parseBindings(logger, argCollection.RODBindings, rods);
|
||||
ReferenceOrderedData.parseBindings(argCollection.RODBindings, rods);
|
||||
|
||||
validateSuppliedReferenceOrderedDataAgainstWalker( my_walker, rods );
|
||||
|
||||
|
|
|
|||
|
|
@ -2,13 +2,11 @@ package org.broadinstitute.sting.gatk.refdata;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MalformedGenomeLocException;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
|
|
@ -16,7 +14,7 @@ import java.util.*;
|
|||
|
||||
/**
|
||||
* Class for representing arbitrary reference ordered data sets
|
||||
*
|
||||
* <p/>
|
||||
* User: mdepristo
|
||||
* Date: Feb 27, 2009
|
||||
* Time: 10:47:14 AM
|
||||
|
|
@ -26,14 +24,16 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
private String name;
|
||||
private File file = null;
|
||||
private String fieldDelimiter;
|
||||
|
||||
/**
|
||||
* Header object returned from the datum
|
||||
*/
|
||||
|
||||
/** Header object returned from the datum */
|
||||
private Object header = null;
|
||||
|
||||
|
||||
private Class<ROD> type = null; // runtime type information for object construction
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Static ROD type management
|
||||
|
|
@ -42,6 +42,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
public static class RODBinding {
|
||||
public final String name;
|
||||
public final Class<? extends ReferenceOrderedDatum> type;
|
||||
|
||||
public RODBinding(final String name, final Class<? extends ReferenceOrderedDatum> type) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
|
|
@ -49,9 +50,10 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
}
|
||||
|
||||
public static HashMap<String, RODBinding> Types = new HashMap<String, RODBinding>();
|
||||
|
||||
public static void addModule(final String name, final Class<? extends ReferenceOrderedDatum> rodType) {
|
||||
final String boundName = name.toLowerCase();
|
||||
if ( Types.containsKey(boundName) ) {
|
||||
if (Types.containsKey(boundName)) {
|
||||
throw new RuntimeException(String.format("GATK BUG: adding ROD module %s that is already bound", boundName));
|
||||
}
|
||||
System.out.printf("* Adding rod class %s%n", name);
|
||||
|
|
@ -85,30 +87,36 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
* name, of type, ready to read from the file. This function does check for the strings to be well formed
|
||||
* and such.
|
||||
*
|
||||
* @param logger
|
||||
* @param bindings
|
||||
* @param rods
|
||||
*/
|
||||
public static void parseBindings(Logger logger, ArrayList<String> bindings, List<ReferenceOrderedData<? extends ReferenceOrderedDatum> > rods)
|
||||
{
|
||||
public static void parseBindings(ArrayList<String> bindings, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
|
||||
// pre-process out any files that were passed in as rod binding command line options
|
||||
for (int x = 0; x < bindings.size(); x++) {
|
||||
if (new File(bindings.get(x)).exists()) {
|
||||
extractRodsFromFile(bindings, bindings.get(x));
|
||||
bindings.remove(x);
|
||||
x--;
|
||||
}
|
||||
}
|
||||
// Loop over triplets
|
||||
for( String bindingSets: bindings ) {
|
||||
for (String bindingSets : bindings) {
|
||||
String[] bindingTokens = bindingSets.split(",");
|
||||
if( bindingTokens.length % 3 != 0 )
|
||||
if (bindingTokens.length % 3 != 0)
|
||||
Utils.scareUser(String.format("Invalid ROD specification: requires triplets of <name>,<type>,<file> but got %s", Utils.join(",", bindings)));
|
||||
|
||||
for ( int bindingSet = 0; bindingSet < bindingTokens.length; bindingSet += 3 ) {
|
||||
for (int bindingSet = 0; bindingSet < bindingTokens.length; bindingSet += 3) {
|
||||
logger.info("Processing ROD bindings: " + bindings.size() + " -> " + Utils.join(" : ", bindingTokens));
|
||||
|
||||
final String name = bindingTokens[bindingSet];
|
||||
final String typeName = bindingTokens[bindingSet + 1];
|
||||
final String fileName = bindingTokens[bindingSet + 2];
|
||||
|
||||
ReferenceOrderedData<?> rod = parse1Binding(logger, name, typeName, fileName);
|
||||
ReferenceOrderedData<?> rod = parse1Binding(name, typeName, fileName);
|
||||
|
||||
// check that we're not generating duplicate bindings
|
||||
for ( ReferenceOrderedData rod2 : rods )
|
||||
if ( rod2.getName().equals(rod.getName()) )
|
||||
for (ReferenceOrderedData rod2 : rods)
|
||||
if (rod2.getName().equals(rod.getName()))
|
||||
Utils.scareUser(String.format("Found duplicate rod bindings", rod.getName()));
|
||||
|
||||
rods.add(rod);
|
||||
|
|
@ -116,20 +124,42 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* given a existing file, open it and append all the valid triplet lines to an existing list
|
||||
*
|
||||
* @param rodTripletList the list of existing triplets
|
||||
* @param filename the file to attempt to extract ROD triplets from
|
||||
*/
|
||||
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
|
||||
BufferedReader str;
|
||||
try {
|
||||
str = new BufferedReader(new FileReader(new File(filename)));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to load the ROD input file " + filename,e);
|
||||
}
|
||||
String line = "NO LINES READ IN";
|
||||
try {
|
||||
while ((line = str.readLine()) != null) {
|
||||
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
|
||||
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new StingException("Failed reading the input rod file " + filename + " last line read was " + line,e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helpful function that parses a single triplet of <name> <type> <file> and returns the corresponding ROD with
|
||||
* <name>, of type <type> that reads its input from <file>.
|
||||
*
|
||||
* @param logger
|
||||
*
|
||||
* @param trackName
|
||||
* @param typeName
|
||||
* @param fileName
|
||||
* @return
|
||||
*/
|
||||
private static ReferenceOrderedData<?> parse1Binding( Logger logger, final String trackName, final String typeName, final String fileName )
|
||||
{
|
||||
private static ReferenceOrderedData<?> parse1Binding(final String trackName, final String typeName, final String fileName) {
|
||||
// Gracefully fail if we don't have the type
|
||||
if ( ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null )
|
||||
if (ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null)
|
||||
Utils.scareUser(String.format("Unknown ROD type: %s", typeName));
|
||||
|
||||
// Lookup the type
|
||||
|
|
@ -160,34 +190,36 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
* Special equals override to see if this ROD is compatible with the given
|
||||
* name and type. 'Compatible' means that this ROD has the name that's passed
|
||||
* in and its data can fit into the container specified by type.
|
||||
*
|
||||
* @param name Name to check.
|
||||
* @param type Type to check.
|
||||
*
|
||||
* @return True if these parameters imply this rod. False otherwise.
|
||||
*/
|
||||
public boolean matches( String name, Class<? extends ReferenceOrderedDatum> type ) {
|
||||
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
|
||||
return this.name.equals(name) && type.isAssignableFrom(this.type);
|
||||
}
|
||||
|
||||
public RODIterator<ROD> iterator() {
|
||||
Iterator<ROD> it;
|
||||
Iterator<ROD> it;
|
||||
try {
|
||||
Method m = type.getDeclaredMethod("createIterator", String.class,java.io.File.class);
|
||||
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
|
||||
it = (Iterator<ROD>) m.invoke(null, name, file);
|
||||
} catch ( java.lang.NoSuchMethodException e ) {
|
||||
} catch (java.lang.NoSuchMethodException e) {
|
||||
it = new SimpleRODIterator();
|
||||
} catch ( java.lang.NullPointerException e ) {
|
||||
} catch (java.lang.NullPointerException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.SecurityException e ) {
|
||||
} catch (java.lang.SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.IllegalAccessException e ) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.IllegalArgumentException e ) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.reflect.InvocationTargetException e ) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
} catch (java.lang.IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.IllegalArgumentException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.reflect.InvocationTargetException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return new RODIterator<ROD>(it);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
|
|
@ -195,12 +227,12 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public void testMe() {
|
||||
for ( ReferenceOrderedDatum rec : this ) {
|
||||
for (ReferenceOrderedDatum rec : this) {
|
||||
System.out.println(rec.toString());
|
||||
|
||||
rodGFF gff = (rodGFF)rec;
|
||||
rodGFF gff = (rodGFF) rec;
|
||||
String[] keys = {"LENGTH", "ALT", "FOBARBAR"};
|
||||
for ( String key : keys) {
|
||||
for (String key : keys) {
|
||||
System.out.printf(" -> %s is (%s)%n", key, gff.containsAttribute(key) ? gff.getAttribute(key) : "none");
|
||||
}
|
||||
}
|
||||
|
|
@ -214,7 +246,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
// ----------------------------------------------------------------------
|
||||
public ArrayList<ReferenceOrderedDatum> readAll() {
|
||||
ArrayList<ReferenceOrderedDatum> elts = new ArrayList<ReferenceOrderedDatum>();
|
||||
for ( ReferenceOrderedDatum rec : this ) {
|
||||
for (ReferenceOrderedDatum rec : this) {
|
||||
elts.add(rec);
|
||||
}
|
||||
elts.trimToSize();
|
||||
|
|
@ -228,7 +260,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
|
||||
final FileWriter out = new FileWriter(output);
|
||||
|
||||
for ( ReferenceOrderedDatum rec : data ) {
|
||||
for (ReferenceOrderedDatum rec : data) {
|
||||
out.write(rec.repl() + "\n");
|
||||
}
|
||||
|
||||
|
|
@ -237,12 +269,12 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
|
||||
public boolean validateFile() throws Exception {
|
||||
ReferenceOrderedDatum last = null;
|
||||
for ( ReferenceOrderedDatum rec : this ) {
|
||||
if ( last != null && last.compareTo(rec) == 1 ) {
|
||||
// It's out of order
|
||||
throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString());
|
||||
}
|
||||
last = rec;
|
||||
for (ReferenceOrderedDatum rec : this) {
|
||||
if (last != null && last.compareTo(rec) == 1) {
|
||||
// It's out of order
|
||||
throw new Exception("Out of order elements at \n" + last.toString() + "\n" + rec.toString());
|
||||
}
|
||||
last = rec;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
@ -262,7 +294,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
public SimpleRODIterator() {
|
||||
try {
|
||||
parser = new xReadLines(file);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
} catch (FileNotFoundException e) {
|
||||
Utils.scareUser("Couldn't open file: " + file);
|
||||
}
|
||||
}
|
||||
|
|
@ -281,7 +313,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
final String line = parser.next();
|
||||
//System.out.printf("Line is '%s'%n", line);
|
||||
String parts[] = line.split(fieldDelimiter);
|
||||
|
||||
|
||||
try {
|
||||
n = parseLine(parts);
|
||||
// Two failure conditions:
|
||||
|
|
@ -291,12 +323,12 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
// TODO: Clean this up so that all errors are handled in one spot.
|
||||
success = (n != null);
|
||||
}
|
||||
catch( MalformedGenomeLocException ex ) {
|
||||
if( firstFailure ) {
|
||||
catch (MalformedGenomeLocException ex) {
|
||||
if (firstFailure) {
|
||||
Utils.warnUser("Failed to parse contig on line '" + line + "'. The reason given was: " + ex.getMessage() + " Skipping ahead to the next recognized GenomeLoc. ");
|
||||
firstFailure = false;
|
||||
}
|
||||
if( !parser.hasNext() )
|
||||
if (!parser.hasNext())
|
||||
Utils.warnUser("Unable to find more valid reference-ordered data. Giving up.");
|
||||
}
|
||||
|
||||
|
|
@ -304,7 +336,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
|
@ -315,26 +347,31 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
// Parsing
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
private Constructor<ROD> parsing_constructor;
|
||||
private ROD newROD( final String name, final Class<ROD> type ) {
|
||||
private Constructor<ROD> parsing_constructor;
|
||||
|
||||
private ROD newROD(final String name, final Class<ROD> type) {
|
||||
try {
|
||||
return (ROD)parsing_constructor.newInstance(name);
|
||||
} catch ( java.lang.InstantiationException e ) {
|
||||
return (ROD) parsing_constructor.newInstance(name);
|
||||
} catch (java.lang.InstantiationException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( java.lang.IllegalAccessException e ) {
|
||||
} catch (java.lang.IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch ( InvocationTargetException e ) {
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private Object initializeROD(final String name, final File file, final Class<ROD> type) {
|
||||
try { parsing_constructor = type.getConstructor(String.class); }
|
||||
catch (java.lang.NoSuchMethodException e) { throw new RuntimeException(e); }
|
||||
try {
|
||||
parsing_constructor = type.getConstructor(String.class);
|
||||
}
|
||||
catch (java.lang.NoSuchMethodException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
ROD rod = newROD(name, type);
|
||||
try {
|
||||
return rod.initialize(file);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
|
@ -343,7 +380,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
//System.out.printf("Parsing GFFLine %s%n", Utils.join(" ", parts));
|
||||
ROD obj = newROD(name, type);
|
||||
try {
|
||||
if ( ! obj.parseLine(header, parts) )
|
||||
if (!obj.parseLine(header, parts))
|
||||
obj = null;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Badly formed ROD: " + e);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class ReferenceOrderedDataTest
|
||||
*
|
||||
* some functionality to test parts of the reference ordered data system that I've added. This is by NO MEANS
|
||||
* a complete test suite, but additions would be extremely welcome
|
||||
*/
|
||||
public class ReferenceOrderedDataTest extends BaseTest {
|
||||
@Test
|
||||
public void extractRodsFromFileTest() {
|
||||
String file = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl.csv";
|
||||
List<String> lst = new ArrayList<String>();
|
||||
ReferenceOrderedData.extractRodsFromFile(lst,file);
|
||||
Assert.assertEquals(6,lst.size());
|
||||
int index = 0;
|
||||
for (String entry: lst) {
|
||||
String first = entry.subSequence(0,entry.indexOf(",")).toString();
|
||||
Assert.assertTrue(first.equals("rod" + String.valueOf(++index)));
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void extractRodsFromMultiFileTest() {
|
||||
String file = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl.csv";
|
||||
String file2 = "/humgen/gsa-scr1/GATK_Data/Validation_Data/testRODFileImpl2.csv";
|
||||
List<String> lst = new ArrayList<String>();
|
||||
ReferenceOrderedData.extractRodsFromFile(lst,file);
|
||||
ReferenceOrderedData.extractRodsFromFile(lst,file2);
|
||||
Assert.assertEquals(12,lst.size());
|
||||
int index = 0;
|
||||
for (String entry: lst) {
|
||||
String first = entry.subSequence(0,entry.indexOf(",")).toString();
|
||||
Assert.assertTrue(first.equals("rod" + String.valueOf(++index)));
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue