New ROD accessing system, including a generalized interface for binding ROD on the command line that doesn't require you to chance GenomeAnalysisTK.java
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@355 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f5cc2d8b0b
commit
17b3d5b554
|
|
@ -105,7 +105,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
*/
|
||||
private static Logger logger = Logger.getLogger(GenomeAnalysisTK.class);
|
||||
|
||||
public static ArrayList<String> ROD_BINDINGS = null;
|
||||
public static ArrayList<String> ROD_BINDINGS = new ArrayList<String>();
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -143,9 +143,11 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
m_parser.addOptionalArg("daughter", "KID", "Daughter's genotype (SAM pileup)", "DAUGHTER_GENOTYPE_FILE");
|
||||
|
||||
// --rodBind <name> <type> <file>
|
||||
//m_parser.addOptionalArg("rods", "B", "Bind rod with <name> and <type> to <file>", "ROD_BINDINGS");
|
||||
|
||||
Option rodBinder = OptionBuilder.withArgName("rodBind")
|
||||
.hasArgs()
|
||||
.withDescription( "Bind rod with <name> and <type> to <file>" )
|
||||
.withDescription( "" )
|
||||
.create("B");
|
||||
m_parser.addOptionalArg(rodBinder, "ROD_BINDINGS");
|
||||
}
|
||||
|
|
@ -187,48 +189,38 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
start(Instance, argv);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that binds RODs using the old-style command line parser to the new style list for
|
||||
* a uniform processing.
|
||||
*
|
||||
* @param name
|
||||
* @param type
|
||||
* @param file
|
||||
*/
|
||||
private static void bindConvenienceRods(final String name, final String type, final String file )
|
||||
{
|
||||
ROD_BINDINGS.add(name);
|
||||
ROD_BINDINGS.add(type);
|
||||
ROD_BINDINGS.add(file);
|
||||
}
|
||||
|
||||
protected int execute() {
|
||||
final boolean TEST_ROD = false;
|
||||
List<ReferenceOrderedData<? extends ReferenceOrderedDatum> > rods = new ArrayList<ReferenceOrderedData<? extends ReferenceOrderedDatum> >();
|
||||
|
||||
if ( ROD_BINDINGS != null ) {
|
||||
System.out.printf("ROD BINDINGS are %s%n", Utils.join(":", ROD_BINDINGS));
|
||||
}
|
||||
|
||||
|
||||
if ( TEST_ROD ) {
|
||||
ReferenceOrderedData<rodGFF> gff = new ReferenceOrderedData<rodGFF>("test", new File("trunk/data/gFFTest.gff"), rodGFF.class );
|
||||
gff.testMe();
|
||||
|
||||
//ReferenceOrderedData dbsnp = new ReferenceOrderedData(new File("trunk/data/dbSNP_head.txt"), rodDbSNP.class );
|
||||
ReferenceOrderedData<rodDbSNP> dbsnp = new ReferenceOrderedData<rodDbSNP>("dbSNP", new File("/Volumes/Users/mdepristo/broad/ATK/exampleSAMs/dbSNP_chr20.txt"), rodDbSNP.class );
|
||||
//dbsnp.testMe();
|
||||
rods.add(dbsnp); // { gff, dbsnp };
|
||||
} else {
|
||||
if ( DBSNP_FILE != null ) {
|
||||
ReferenceOrderedData<rodDbSNP> dbsnp = new ReferenceOrderedData<rodDbSNP>("dbSNP", new File(DBSNP_FILE), rodDbSNP.class );
|
||||
//dbsnp.testMe();
|
||||
rods.add(dbsnp); // { gff, dbsnp };
|
||||
}
|
||||
if ( HAPMAP_FILE != null ) {
|
||||
ReferenceOrderedData<HapMapAlleleFrequenciesROD> hapmap = new ReferenceOrderedData<HapMapAlleleFrequenciesROD>("hapmap", new File(HAPMAP_FILE), HapMapAlleleFrequenciesROD.class );
|
||||
//dbsnp.testMe();
|
||||
rods.add(hapmap); // { gff, dbsnp };
|
||||
}
|
||||
if ( HAPMAP_CHIP_FILE != null ) {
|
||||
ReferenceOrderedData<rodGFF> hapmapChip = new ReferenceOrderedData<rodGFF>("hapmap-chip", new File(HAPMAP_CHIP_FILE), rodGFF.class );
|
||||
rods.add(hapmapChip);
|
||||
}
|
||||
//TODO: remove when walkers can ask for tracks
|
||||
if ( MOTHER_GENOTYPE_FILE != null )
|
||||
rods.add( new ReferenceOrderedData<rodSAMPileup>("mother", new File(MOTHER_GENOTYPE_FILE), rodSAMPileup.class ) );
|
||||
if ( FATHER_GENOTYPE_FILE != null )
|
||||
rods.add( new ReferenceOrderedData<rodSAMPileup>("father", new File(FATHER_GENOTYPE_FILE), rodSAMPileup.class ) );
|
||||
if ( DAUGHTER_GENOTYPE_FILE != null )
|
||||
rods.add( new ReferenceOrderedData<rodSAMPileup>("daughter", new File(DAUGHTER_GENOTYPE_FILE), rodSAMPileup.class ) );
|
||||
|
||||
}
|
||||
//
|
||||
// please don't use these in the future, use the new syntax
|
||||
//
|
||||
if ( DBSNP_FILE != null ) bindConvenienceRods("dbSNP", "dbsnp", DBSNP_FILE);
|
||||
if ( HAPMAP_FILE != null ) bindConvenienceRods("hapmap", "HapMapAlleleFrequencies", HAPMAP_FILE);
|
||||
if ( HAPMAP_CHIP_FILE != null ) bindConvenienceRods("hapmap-chip", "GFF", HAPMAP_CHIP_FILE);
|
||||
//TODO: remove when walkers can ask for tracks
|
||||
if ( MOTHER_GENOTYPE_FILE != null ) bindConvenienceRods("mother", "SAMPileup", MOTHER_GENOTYPE_FILE);
|
||||
if ( FATHER_GENOTYPE_FILE != null ) bindConvenienceRods("father", "SAMPileup", FATHER_GENOTYPE_FILE);
|
||||
if ( DAUGHTER_GENOTYPE_FILE != null ) bindConvenienceRods("daughter", "SAMPileup", DAUGHTER_GENOTYPE_FILE);
|
||||
|
||||
ReferenceOrderedData.parseBindings(logger, ROD_BINDINGS, rods);
|
||||
|
||||
initializeOutputStreams();
|
||||
|
||||
Walker<?,?> my_walker = null;
|
||||
|
|
@ -281,7 +273,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
|
|||
// Prepare the sort ordering w.r.t. the sequence dictionary
|
||||
if (REF_FILE_ARG != null) {
|
||||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
|
||||
GenomeLoc.setupRefContigOrdering(refFile);
|
||||
//GenomeLoc.setupRefContigOrdering(refFile);
|
||||
}
|
||||
|
||||
// Determine the validation stringency. Default to ValidationStringency.STRICT.
|
||||
|
|
|
|||
|
|
@ -7,11 +7,20 @@ import java.util.List;
|
|||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* This class represents the Reference Metadata available at a particular site in the genome. It can be
|
||||
* used to conveniently lookup the RODs at this site, as well just getting a list of all of the RODs
|
||||
*
|
||||
* The standard interaction model is:
|
||||
*
|
||||
* Traversal system arrives at a site, which has a bunch of rods covering it
|
||||
* Traversal calls tracker.bind(name, rod) for each rod in rods
|
||||
* Traversal passes tracker to the walker
|
||||
* walker calls lookup(name, default) to obtain the rod values at this site, or default if none was
|
||||
* bound at this site.
|
||||
*
|
||||
* User: mdepristo
|
||||
* Date: Apr 3, 2009
|
||||
* Time: 3:05:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class RefMetaDataTracker {
|
||||
final HashMap<String, ReferenceOrderedDatum> map = new HashMap<String, ReferenceOrderedDatum>();
|
||||
|
|
@ -26,29 +35,65 @@ public class RefMetaDataTracker {
|
|||
*/
|
||||
public ReferenceOrderedDatum lookup(final String name, ReferenceOrderedDatum defaultValue) {
|
||||
//logger.debug(String.format("Lookup %s%n", name));
|
||||
if ( map.containsKey(name) )
|
||||
return map.get(name);
|
||||
final String luName = canonicalName(name);
|
||||
if ( map.containsKey(luName) )
|
||||
return map.get(luName);
|
||||
else
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see this.lookup
|
||||
* @param name
|
||||
* @param defaultValue
|
||||
* @return
|
||||
*/
|
||||
public Object lookup(final String name, Object defaultValue) {
|
||||
if ( map.containsKey(name) )
|
||||
return map.get(name);
|
||||
final String luName = canonicalName(name);
|
||||
if ( map.containsKey(luName) )
|
||||
return map.get(luName);
|
||||
else
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
public Object hasROD(final String name) {
|
||||
return map.containsKey(name);
|
||||
/**
|
||||
* Returns the canonical name of the rod name
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
private final String canonicalName(final String name)
|
||||
{
|
||||
return name.toLowerCase();
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there a binding at this site to a ROD with name?
|
||||
*
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
public Object hasROD(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of the RODs at the current site
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Collection<ReferenceOrderedDatum> getAllRods() {
|
||||
return map.values();
|
||||
}
|
||||
|
||||
/**
|
||||
* Binds the reference ordered datum ROD to name at this site. Should be used only but the traversal
|
||||
* system to provide access to RODs in a structured way to the walkers.
|
||||
*
|
||||
* @param name
|
||||
* @param rod
|
||||
*/
|
||||
public void bind(final String name, ReferenceOrderedDatum rod) {
|
||||
//logger.debug(String.format("Binding %s to %s%n", name, rod));
|
||||
map.put(name, rod);
|
||||
map.put(canonicalName(name), rod);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,9 +4,7 @@ import java.io.File;
|
|||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Iterator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.*;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
|
|
@ -14,6 +12,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
|||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.xReadLines;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
/**
|
||||
* Class for representing arbitrary reference ordered data sets
|
||||
|
|
@ -28,6 +27,98 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
|
|||
private File file = null;
|
||||
private Class<ROD> type = null; // runtime type information for object construction
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Static ROD type management
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public static class RODBinding {
|
||||
public final String name;
|
||||
public final Class<? extends ReferenceOrderedDatum> type;
|
||||
public RODBinding(final String name, final Class<? extends ReferenceOrderedDatum> type) {
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
}
|
||||
}
|
||||
|
||||
public static HashMap<String, RODBinding> Types = new HashMap<String, RODBinding>();
|
||||
public static void addModule(final String name, final Class<? extends ReferenceOrderedDatum> rodType) {
|
||||
System.out.printf("* Adding rod class %s%n", name);
|
||||
Types.put(name.toLowerCase(), new RODBinding(name, rodType));
|
||||
}
|
||||
|
||||
static {
|
||||
// All known ROD types
|
||||
addModule("GFF", rodGFF.class);
|
||||
addModule("dbSNP", rodDbSNP.class);
|
||||
addModule("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class);
|
||||
addModule("SAMPileup", rodSAMPileup.class);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parse the ROD bindings. These are of the form of a single list of strings, each triplet of the
|
||||
* form <name> <type> <file>. After this function, the List of RODs contains new RODs bound to each of
|
||||
* name, of type, ready to read from the file. This function does check for the strings to be well formed
|
||||
* and such.
|
||||
*
|
||||
* @param logger
|
||||
* @param bindings
|
||||
* @param rods
|
||||
*/
|
||||
public static void parseBindings(Logger logger, ArrayList<String> bindings, List<ReferenceOrderedData<? extends ReferenceOrderedDatum> > rods)
|
||||
{
|
||||
logger.info("Processing ROD bindings: " + bindings.size() + " -> " + Utils.join(" : ", bindings));
|
||||
if ( bindings.size() % 3 != 0 )
|
||||
Utils.scareUser(String.format("Invalid ROD specification: requires triplets of <name> <type> <file> but got %s", Utils.join(" ", bindings)));
|
||||
|
||||
// Loop over triplets
|
||||
for ( int i = 0; i < bindings.size(); i += 3) {
|
||||
final String name = bindings.get(i);
|
||||
final String typeName = bindings.get(i+1);
|
||||
final String fileName = bindings.get(i+2);
|
||||
|
||||
ReferenceOrderedData<?> rod = parse1Binding(logger, name, typeName, fileName);
|
||||
|
||||
// check that we're not generating duplicate bindings
|
||||
for ( ReferenceOrderedData rod2 : rods )
|
||||
if ( rod2.getName().equals(rod.getName()) )
|
||||
Utils.scareUser(String.format("Found duplicate rod bindings", rod.getName()));
|
||||
|
||||
rods.add(rod);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helpful function that parses a single triplet of <name> <type> <file> and returns the corresponding ROD with
|
||||
* <name>, of type <type> that reads its input from <file>.
|
||||
*
|
||||
* @param logger
|
||||
* @param trackName
|
||||
* @param typeName
|
||||
* @param fileName
|
||||
* @return
|
||||
*/
|
||||
private static ReferenceOrderedData<?> parse1Binding( Logger logger, final String trackName, final String typeName, final String fileName )
|
||||
{
|
||||
// Gracefully fail if we don't have the type
|
||||
if ( ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null )
|
||||
Utils.scareUser(String.format("Unknown ROD type: %s", typeName));
|
||||
|
||||
// Lookup the type
|
||||
Class rodClass = ReferenceOrderedData.Types.get(typeName.toLowerCase()).type;
|
||||
|
||||
// Create the ROD
|
||||
ReferenceOrderedData<?> rod = new ReferenceOrderedData<ReferenceOrderedDatum>(trackName.toLowerCase(), new File(fileName), rodClass );
|
||||
logger.info(String.format("Created binding from %s to %s of type %s", trackName.toLowerCase(), fileName, rodClass));
|
||||
return rod;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public ReferenceOrderedData(final String name, File file, Class<ROD> type ) {
|
||||
this.file = file;
|
||||
this.type = type;
|
||||
|
|
|
|||
|
|
@ -24,18 +24,6 @@ public class PrepareROD extends CommandLineProgram {
|
|||
@Option(shortName="RODNAME", doc="Name of the data") public String ROD_NAME = null;
|
||||
@Option(shortName="RODTYPE", doc="Referenced Ordered Data type") public String ROD_TYPE = null;
|
||||
|
||||
public static HashMap<String, Class<? extends ReferenceOrderedDatum>> Types = new HashMap<String,Class<? extends ReferenceOrderedDatum>>();
|
||||
public static void addModule(final String name, final Class<? extends ReferenceOrderedDatum> rodType) {
|
||||
System.out.printf("* Adding rod class %s%n", name);
|
||||
Types.put(name.toLowerCase(), rodType);
|
||||
}
|
||||
|
||||
static {
|
||||
addModule("GFF", rodGFF.class);
|
||||
addModule("dbSNP", rodDbSNP.class);
|
||||
addModule("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class);
|
||||
}
|
||||
|
||||
/** Required main method implementation. */
|
||||
public static void main(String[] argv) {
|
||||
System.exit(new PrepareROD().instanceMain(argv));
|
||||
|
|
@ -47,7 +35,7 @@ public class PrepareROD extends CommandLineProgram {
|
|||
final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
|
||||
GenomeLoc.setupRefContigOrdering(refFile);
|
||||
|
||||
Class<? extends ReferenceOrderedDatum> rodClass = Types.get(ROD_TYPE.toLowerCase());
|
||||
Class<? extends ReferenceOrderedDatum> rodClass = ReferenceOrderedData.Types.get(ROD_TYPE.toLowerCase()).type;
|
||||
|
||||
ReferenceOrderedData<? extends ReferenceOrderedDatum> rod = new ReferenceOrderedData("ROD", new File(ROD_FILE), rodClass );
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -99,9 +99,9 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
return false;
|
||||
} else {
|
||||
contigInfo = seqDict;
|
||||
logger.info(String.format("Prepared reference sequence contig dictionary%n order ->"));
|
||||
logger.debug(String.format("Prepared reference sequence contig dictionary"));
|
||||
for (SAMSequenceRecord contig : seqDict.getSequences() ) {
|
||||
logger.info(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||
logger.debug(String.format(" %s (%d bp)", contig.getSequenceName(), contig.getSequenceLength()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -225,7 +225,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
Collections.sort(locs);
|
||||
//logger.info(String.format("Going to process %d locations", locs.length));
|
||||
locs = mergeOverlappingLocations(locs);
|
||||
logger.info(" Locations are:\n" + Utils.join("\n", Functions.map(Operators.toString, locs)));
|
||||
logger.info("Locations are:" + Utils.join(", ", Functions.map(Operators.toString, locs)));
|
||||
return locs;
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
|
|
@ -235,7 +235,7 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
|
|||
}
|
||||
|
||||
public static ArrayList<GenomeLoc> mergeOverlappingLocations(final ArrayList<GenomeLoc> raw) {
|
||||
logger.info(" Raw locations are:\n" + Utils.join("\n", Functions.map(Operators.toString, raw)));
|
||||
logger.debug(" Raw locations are:\n" + Utils.join("\n", Functions.map(Operators.toString, raw)));
|
||||
if ( raw.size() <= 1 )
|
||||
return raw;
|
||||
else {
|
||||
|
|
|
|||
Loading…
Reference in New Issue