Two fixes together:

1) Some improvements to the VCF4 parsing, including disabling validation. 2) Reimplemented RefSeq in the new Tribble-style rod system. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3630 348d0f76-0448-11de-a6fe-93d51630548a
2010-06-24 22:17:03 +00:00 · 2010-06-24 22:17:03 +00:00 · 682f9b46c6
parent 72e669538e
commit 682f9b46c6
11 changed files with 506 additions and 333 deletions
--- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java
@ -0,0 +1,80 @@
+package org.broadinstitute.sting.gatk.refdata.features.refseq;
+
+import org.broad.tribble.Feature;
+import org.broad.tribble.FeatureCodec;
+import org.broad.tribble.util.LineReader;
+import org.broadinstitute.sting.gatk.refdata.BasicReferenceOrderedDatum;
+import org.broadinstitute.sting.gatk.refdata.Transcript;
+import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
+import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * the ref seq codec
+ */
+public class RefSeqCodec implements FeatureCodec {
+
+    @Override
+    public Feature decodeLoc(String line) {
+        String fields[] = line.split("\t");
+        String contig_name = fields[2];
+        return new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
+    }
+
+    /** Fills this object from a text line in RefSeq (UCSC) text dump file */
+    @Override
+    public Feature decode(String line) {
+        String fields[] = line.split("\t");
+
+        String contig_name = fields[2];
+        RefSeqFeature feature = new RefSeqFeature(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
+
+        feature.setTranscript_id(fields[1]);
+        if ( fields[3].length()==1 && fields[3].charAt(0)=='+') feature.setStrand(1);
+        else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1);
+        else throw new StingException("Expected strand symbol (+/-), found: "+fields[3]);
+
+
+        feature.setTranscript_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
+        feature.setTranscript_coding_interval(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
+        feature.setGene_name(fields[12]);
+        String[] exon_starts = fields[9].split(",");
+        String[] exon_stops = fields[10].split(",");
+        String[] eframes = fields[15].split(",");
+
+        assert exon_starts.length == exon_stops.length : "Data format error: numbers of exon start and stop positions differ";
+        assert exon_starts.length == eframes.length : "Data format error: numbers of exons and exon frameshifts differ";
+
+        ArrayList<GenomeLoc> exons = new ArrayList<GenomeLoc>(exon_starts.length);
+        ArrayList<Integer> exon_frames = new ArrayList<Integer>(eframes.length);
+
+        for ( int i = 0 ; i < exon_starts.length  ; i++ ) {
+            exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
+            exon_frames.add(Integer.decode(eframes[i]));
+        }
+
+        feature.setExons(exons);
+        feature.setExon_frames(exon_frames);
+        return feature;
+    }
+
+    @Override
+    public int readHeader(LineReader reader) {
+        return 0;  //To change body of implemented methods use File | Settings | File Templates.
+    }
+
+    @Override
+    public Class getFeatureType() {
+        return RefSeqCodec.class;
+    }
+
+    @Override
+    public Object getHeader(Class clazz) throws ClassCastException {
+        return null;  // we don't have a header
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqFeature.java
@ -0,0 +1,285 @@
+package org.broadinstitute.sting.gatk.refdata.features.refseq;
+
+import org.broad.tribble.Feature;
+import org.broadinstitute.sting.gatk.refdata.Transcript;
+import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
+import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * the ref seq feature
+ */
+public class RefSeqFeature implements Transcript, Feature {
+
+    private String transcript_id;
+    private int strand;
+    private GenomeLoc transcript_interval;
+    private GenomeLoc transcript_coding_interval;
+    private List<GenomeLoc> exons;
+    private String gene_name;
+    private List<Integer> exon_frames;
+    private String name;
+
+    // store the contig, start, and stop for this record
+    private final String contig;
+    private final int start;
+    private final int stop;
+
+    public RefSeqFeature(String contig, int start, int stop) {
+        this.contig = contig;
+        this.start = start;
+        this.stop = stop;
+    }
+
+    /** Returns id of the transcript (RefSeq NM_* id) */
+    public String getTranscriptId() { return transcript_id; }
+
+    /** Returns coding strand of the transcript, 1 or -1 for positive or negative strand, respectively */
+    public int getStrand() { return strand; }
+
+    /** Returns transcript's full genomic interval (includes all exons with UTRs) */
+    public GenomeLoc getLocation() {
+        if (transcript_interval == null)
+            transcript_interval = GenomeLocParser.parseGenomeLoc(contig,start,stop);
+        return transcript_interval;
+    }
+
+    /** Returns genomic interval of the coding sequence (does not include UTRs, but still includes introns, since it's a single interval on the DNA) */
+    public GenomeLoc getCodingLocation() { return transcript_coding_interval; }
+
+    /** Name of the gene this transcript corresponds to (NOT gene id such as Entrez etc) */
+    public String getGeneName() { return gene_name; }
+
+    /** Number of exons in this transcript */
+    public int getNumExons() { return exons.size(); }
+
+    /** Genomic location of the n-th exon; throws an exception if n is out of bounds */
+    public GenomeLoc getExonLocation(int n) {
+        if ( n >= exons.size() || n < 0 ) throw new StingException("Index out-of-bounds. Transcript has " + exons.size() +" exons; requested: "+n);
+        return exons.get(n);
+    }
+
+    /** Returns the list of all exons in this transcript, as genomic intervals */
+    public List<GenomeLoc> getExons() { return exons; }
+
+    /** Returns all exons falling ::entirely:: inside an interval **/
+    public List<GenomeLoc> getExonsInInterval( GenomeLoc interval ) {
+        List<GenomeLoc> relevantExons = new ArrayList<GenomeLoc>(exons.size());
+        for ( GenomeLoc exon : getExons() ) {
+            if ( interval.containsP(exon) ) {
+                relevantExons.add(exon);
+            }
+        }
+
+        return relevantExons;
+    }
+
+    /** convenience method; returns the numbers of the exons in the interval **/
+    public List<Integer> getExonNumbersInInterval( GenomeLoc interval ) {
+        List<Integer> numbers = new ArrayList<Integer>();
+        int iNo = 0;
+        for ( GenomeLoc exon : getExons() ) {
+            if ( interval.containsP(exon) ) {
+                numbers.add(iNo);
+            }
+            iNo++;
+        }
+
+        return numbers;
+    }
+
+    public String getTranscriptUniqueGeneName() {
+        return String.format("%s(%s)",getGeneName(),getTranscriptId());
+    }
+
+    public String getOverlapString(GenomeLoc position) {
+        boolean is_exon = false;
+        StringBuilder overlapString = new StringBuilder();
+        int exonNo = 1;
+
+        for ( GenomeLoc exon : exons ) {
+            if ( exon.containsP(position) ) {
+                overlapString.append(String.format("exon_%d",exonNo));
+                is_exon = true;
+                break;
+            }
+            exonNo ++;
+        }
+
+        if ( ! is_exon ) {
+            if ( overlapsCodingP(position) ) {
+                overlapString.append("Intron");
+            } else {
+                overlapString.append("UTR");
+            }
+        }
+
+        return overlapString.toString();
+    }
+
+    /** Returns true if the specified interval 'that' overlaps with the full genomic interval of this transcript */
+    public boolean overlapsP (GenomeLoc that) {
+        return getLocation().overlapsP(that);
+    }
+
+    /** Returns true if the specified interval 'that' overlaps with the coding genomic interval of this transcript.
+     * NOTE: since "coding interval" is still a single genomic interval, it will not contain UTRs of the outermost exons,
+     * but it will still contain introns and/or exons internal to this genomic locus that are not spliced into this transcript.
+     * @see #overlapsExonP
+     */
+    public boolean overlapsCodingP (GenomeLoc that) {
+        return transcript_coding_interval.overlapsP(that);
+    }
+
+    /** Returns true if the specified interval 'that' overlaps with any of the exons actually spliced into this transcript */
+    public boolean overlapsExonP (GenomeLoc that) {
+        for ( GenomeLoc e : exons ) {
+            if ( e.overlapsP(that) ) return true;
+        }
+        return false;
+    }
+    public String toString() {
+            StringBuilder b = new StringBuilder("000\t"); // first field is unused but required in th ecurrent format; just set to something
+            b.append(transcript_id);   // #1
+            b.append('\t');
+            b.append(getLocation().getContig()); // #2
+            b.append('\t');
+            b.append( (strand==1?'+':'-') ); // #3
+            b.append('\t');
+            b.append( (getLocation().getStart() - 1) ); // #4
+            b.append('\t');
+            b.append( getLocation().getStop());  // #5
+            b.append('\t');
+            b.append( (transcript_coding_interval.getStart() - 1) ); // #6
+            b.append('\t');
+            b.append( transcript_coding_interval.getStop());  // #7
+            b.append('\t');
+            b.append(exons.size()); // #8
+            b.append('\t');
+            for ( GenomeLoc loc : exons ) { b.append( (loc.getStart()-1) ); b.append(','); } // #9
+            b.append('\t');
+            for ( GenomeLoc loc : exons ) { b.append( loc.getStop() ); b.append(','); } // #10
+            b.append("\t0\t"); // # 11 - unused?
+            b.append(gene_name); // # 12
+            b.append("\tcmpl\tcmpl\t"); // #13, #14 - unused?
+            for ( Integer f : exon_frames ) { b.append( f ); b.append(','); } // #15
+
+
+            return b.toString();
+        }
+
+        /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
+         * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
+         * this position is fully whithin an exon of <i>any</i> of those transcripts. Passing null is safe (will return false).
+         * NOTE: position can be still within a UTR, see #isCoding
+         * @return true if it's an exon
+         */
+        public static boolean isExon(RODRecordList l) {
+
+            if ( l == null ) return false;
+
+            GenomeLoc loc = l.getLocation();
+
+            for ( GATKFeature t : l ) {
+                if ( ((RefSeqFeature)t.getUnderlyingObject()).overlapsExonP(loc) ) return true;
+            }
+            return false;
+
+        }
+
+        /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
+         * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
+         * this position is fully whithin a coding region of <i>any</i> of those transcripts.
+         * Passing null is safe (will return false).
+         * NOTE: "coding" interval is defined as a single genomic interval, so it
+         * does not include the UTRs of the outermost exons, but it includes introns between exons spliced into a
+         * transcript, or internal exons that are not spliced into a given transcript. To check that a position is
+         * indeed within an exon but not in UTR, use #isCodingExon().
+         * @return
+         */
+        public static boolean isCoding(RODRecordList l) {
+
+            if ( l == null ) return false;
+
+            GenomeLoc loc = l.getLocation();
+
+            for ( GATKFeature t : l ) {
+                if ( ((RefSeqFeature)t.getUnderlyingObject()).overlapsCodingP(loc) ) return true;
+            }
+            return false;
+
+        }
+
+        /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
+         * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
+         * this position is fully whithin a coding exon portion (i.e. true coding sequence) of <i>any</i> of those transcripts.
+         * Passing null is safe (will return false). In other words, this method returns true if the list contains a transcript,
+         * for which the current position is within an exon <i>and</i> within a coding interval simultaneously.
+         * @return
+         */
+        public static boolean isCodingExon(RODRecordList l) {
+
+            if ( l == null ) return false;
+
+            GenomeLoc loc = l.getLocation();
+
+            for ( GATKFeature t : l ) {
+                if ( ((RefSeqFeature)t.getUnderlyingObject()).overlapsCodingP(loc) && ((RefSeqFeature)t.getUnderlyingObject()).overlapsExonP(loc) ) return true;
+            }
+            return false;
+
+        }
+
+
+    public void setTranscript_id(String transcript_id) {
+        this.transcript_id = transcript_id;
+    }
+
+    public void setStrand(int strand) {
+        this.strand = strand;
+    }
+
+    public void setTranscript_interval(GenomeLoc transcript_interval) {
+        this.transcript_interval = transcript_interval;
+    }
+
+    public void setTranscript_coding_interval(GenomeLoc transcript_coding_interval) {
+        this.transcript_coding_interval = transcript_coding_interval;
+    }
+
+    public void setExons(List<GenomeLoc> exons) {
+        this.exons = exons;
+    }
+
+    public void setGene_name(String gene_name) {
+        this.gene_name = gene_name;
+    }
+
+    public void setExon_frames(List<Integer> exon_frames) {
+        this.exon_frames = exon_frames;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    @Override
+    public String getChr() {
+        return contig;
+    }
+
+    @Override
+    public int getStart() {
+        return start;
+    }
+
+    @Override
+    public int getEnd() {
+        return stop;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java
@ -35,7 +35,7 @@ public class VCF4Codec implements FeatureCodec {
    private static Map<String, List<Allele>> alleleMap = new HashMap<String, List<Allele>>(3);

    // cache the genotyope values
-    private static String[] CachedGTValues = new String[100];
+    private static String[] GTValueArray = new String[100];

    // for performance testing purposes
    public static boolean validate = true;
@ -56,7 +56,7 @@ public class VCF4Codec implements FeatureCodec {
    ArrayList<String> filterFields = new ArrayList<String>();

    // do we want to validate the info, format, and filter fields
-    private final boolean validateFromHeader = true;
+    private final boolean validateFromHeader = false;

    // we store a name to give to each of the variant contexts we emit
    private String name = "Unkown";
@ -377,49 +377,72 @@ public class VCF4Codec implements FeatureCodec {

        // do we have genotyping data
        if (parts.length > 8) {
-            String GT = parts[8];
            int genotypesStart = 9;
-            // parse genotypes
-            int nGTKeys = ParsingUtils.split(GT, genotypeKeyArray, ':');
-            genotypes = new HashMap<String, Genotype>(Math.max(parts.length - genotypesStart, 1));
-            Iterator<String> iter = header.getGenotypeSamples().iterator();
-
-            alleleMap.clear();
-            for (int genotypeOffset = genotypesStart; genotypeOffset < parts.length; genotypeOffset++) {
-                String sample = parts[genotypeOffset];
-                String[] GTValues = CachedGTValues;
-                ParsingUtils.split(sample, GTValues, ':');
-                List<Allele> genotypeAlleles = parseGenotypeAlleles(GTValues[0], locAndAlleles.second, alleleMap);
-                double GTQual = VariantContext.NO_NEG_LOG_10PERROR;
-                Set<String> genotypeFilters = null;
-
-                // todo -- the parsing of attributes could be made lazy for performance
-                Map<String, String> gtAttributes = null;
-                if (nGTKeys > 1) {
-                    gtAttributes = new HashMap<String, String>(nGTKeys - 1);
-                    for (int i = 1; i < nGTKeys; i++) {
-                        if (genotypeKeyArray[i].equals("GQ")) {
-                            GTQual = parseQual(GTValues[i]);
-                        }
-                        if (genotypeKeyArray[i].equals("FL")) { // deal with genotype filters here
-                            genotypeFilters.addAll(parseFilters(GTValues[i]));
-                        } else {
-                            gtAttributes.put(genotypeKeyArray[i], GTValues[i]);
-                        }
-                    }
-                    // validate the format fields
-                    validateFields(gtAttributes.keySet(), new ArrayList(formatFields.keySet()));
-                }
-
-                boolean phased = genotypeKeyArray[0].charAt(1) == '|';
-
-                Genotype g = new Genotype(iter.next(), genotypeAlleles, GTQual, genotypeFilters, gtAttributes, phased);
-                genotypes.put(g.getSampleName(), g);
-            }
+            genotypes = createGenotypeMap(parts, locAndAlleles, genotypesStart);
        }
        return new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes);
    }

+    /**
+     * create a genotype map
+     * @param parts the string parts
+     * @param locAndAlleles the locations and the list of alleles
+     * @param genotypesStart the position in the parts array that the genotype strings start
+     * @return a mapping of sample name to genotype object
+     */
+    private Map<String, Genotype> createGenotypeMap(String[] parts, Pair<GenomeLoc, List<Allele>> locAndAlleles, int genotypesStart) {
+        Map<String, Genotype> genotypes = new LinkedHashMap<String, Genotype>(Math.max(parts.length - genotypesStart, 1));
+
+        // get the format keys
+        int nGTKeys = ParsingUtils.split(parts[8], genotypeKeyArray, ':');
+
+        // cycle through the sample names
+        Iterator<String> sampleNameIterator = header.getGenotypeSamples().iterator();
+
+        // clear out our allele mapping
+        alleleMap.clear();
+
+        // cycle through the genotype strings
+        for (int genotypeOffset = genotypesStart; genotypeOffset < parts.length; genotypeOffset++) {
+            int GTValueSplitSize = ParsingUtils.split(parts[genotypeOffset], GTValueArray, ':');
+            List<Allele> genotypeAlleles = parseGenotypeAlleles(GTValueArray[0], locAndAlleles.second, alleleMap);
+            double GTQual = VariantContext.NO_NEG_LOG_10PERROR;
+            Set<String> genotypeFilters = null;
+            String sampleName = sampleNameIterator.next();
+
+
+            // todo -- the parsing of attributes could be made lazy for performance
+            Map<String, String> gtAttributes = null;
+
+            // check to see if the value list is longer than the key list, which is a problem
+            if (nGTKeys < GTValueSplitSize)
+                throw new StingException("Too few keys for compared to the value string " + sampleName + ", keys = " + parts[8] + " values = " + parts[genotypeOffset]);
+
+            if (nGTKeys > 1) {
+                gtAttributes = new HashMap<String, String>(nGTKeys - 1);
+                for (int i = 1; i < nGTKeys; i++) {
+                    if (i >= GTValueSplitSize)
+                        gtAttributes.put(genotypeKeyArray[i],".");
+                    else if (genotypeKeyArray[i].equals("GQ"))
+                        GTQual = parseQual(GTValueArray[i]);
+                    else if (genotypeKeyArray[i].equals("FL")) // deal with genotype filters here
+                        genotypeFilters.addAll(parseFilters(GTValueArray[i]));
+                    else
+                        gtAttributes.put(genotypeKeyArray[i], GTValueArray[i]);
+
+                }
+                // validate the format fields
+                validateFields(gtAttributes.keySet(), new ArrayList(formatFields.keySet()));
+            }
+
+            boolean phased = genotypeKeyArray[0].charAt(1) == '|';
+
+            Genotype g = new Genotype(sampleName, genotypeAlleles, GTQual, genotypeFilters, gtAttributes, phased);
+            genotypes.put(g.getSampleName(), g);
+        }
+        return genotypes;
+    }
+
    /**
     * clip the alleles, based on the reference
     * @param unclippedAlleles the list of alleles
--- a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java
@ -1,252 +0,0 @@
-package org.broadinstitute.sting.gatk.refdata;
-
-import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
-import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.StingException;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: Sep 22, 2009
- * Time: 3:19:41 PM
- * To change this template use File | Settings | File Templates.
- */
-public class rodRefSeq extends BasicReferenceOrderedDatum implements Transcript {
-
-    private String transcript_id;
-    private int strand;
-    private GenomeLoc transcript_interval;
-    private GenomeLoc transcript_coding_interval;
-    private List<GenomeLoc> exons;
-    private String gene_name;
-    private List<Integer> exon_frames;
-
-
-    public rodRefSeq(String name) {
-       super(name);
-    }
-
-    /** Returns id of the transcript (RefSeq NM_* id) */
-    public String getTranscriptId() { return transcript_id; }
-    /** Returns coding strand of the transcript, 1 or -1 for positive or negative strand, respectively */
-    public int getStrand() { return strand; }
-    /** Returns transcript's full genomic interval (includes all exons with UTRs) */
-    public GenomeLoc getLocation() { return transcript_interval; }
-    /** Returns genomic interval of the coding sequence (does not include UTRs, but still includes introns, since it's a single interval on the DNA) */
-    public GenomeLoc getCodingLocation() { return transcript_coding_interval; }
-    /** Name of the gene this transcript corresponds to (NOT gene id such as Entrez etc) */
-    public String getGeneName() { return gene_name; }
-    /** Number of exons in this transcript */
-    public int getNumExons() { return exons.size(); }
-    /** Genomic location of the n-th exon; throws an exception if n is out of bounds */
-    public GenomeLoc getExonLocation(int n) {
-        if ( n >= exons.size() || n < 0 ) throw new StingException("Index out-of-bounds. Transcript has " + exons.size() +" exons; requested: "+n);
-        return exons.get(n);
-    }
-    /** Returns the list of all exons in this transcript, as genomic intervals */
-    public List<GenomeLoc> getExons() { return exons; }
-
-    /** Returns all exons falling ::entirely:: inside an interval **/
-    public List<GenomeLoc> getExonsInInterval( GenomeLoc interval ) {
-        List<GenomeLoc> relevantExons = new ArrayList<GenomeLoc>(exons.size());
-        for ( GenomeLoc exon : getExons() ) {
-            if ( interval.containsP(exon) ) {
-                relevantExons.add(exon);
-            }
-        }
-
-        return relevantExons;
-    }
-
-    /** convenience method; returns the numbers of the exons in the interval **/
-    public List<Integer> getExonNumbersInInterval( GenomeLoc interval ) {
-        List<Integer> numbers = new ArrayList<Integer>();
-        int iNo = 0;
-        for ( GenomeLoc exon : getExons() ) {
-            if ( interval.containsP(exon) ) {
-                numbers.add(iNo);
-            }
-            iNo++;
-        }
-
-        return numbers;
-    }
-
-    public String getTranscriptUniqueGeneName() {
-        return String.format("%s(%s)",getGeneName(),getTranscriptId());
-    }
-
-    public String getOverlapString(GenomeLoc position) {
-        boolean is_exon = false;
-        StringBuilder overlapString = new StringBuilder();
-        int exonNo = 1;
-
-        for ( GenomeLoc exon : exons ) {
-            if ( exon.containsP(position) ) {
-                overlapString.append(String.format("exon_%d",exonNo));
-                is_exon = true;
-                break;
-            }
-            exonNo ++;
-        }
-
-        if ( ! is_exon ) {
-            if ( overlapsCodingP(position) ) {
-                overlapString.append("Intron");
-            } else {
-                overlapString.append("UTR");
-            }
-        }
-
-        return overlapString.toString();
-    }
-
-    /** Returns true if the specified interval 'that' overlaps with the full genomic interval of this transcript */
-    public boolean overlapsP (GenomeLoc that) {
-        return transcript_interval.overlapsP(that);
-    }
-
-    /** Returns true if the specified interval 'that' overlaps with the coding genomic interval of this transcript.
-     * NOTE: since "coding interval" is still a single genomic interval, it will not contain UTRs of the outermost exons,
-     * but it will still contain introns and/or exons internal to this genomic locus that are not spliced into this transcript.
-     * @see #overlapsExonP
-     */
-    public boolean overlapsCodingP (GenomeLoc that) {
-        return transcript_coding_interval.overlapsP(that);
-    }
-
-    /** Returns true if the specified interval 'that' overlaps with any of the exons actually spliced into this transcript */
-    public boolean overlapsExonP (GenomeLoc that) {
-        for ( GenomeLoc e : exons ) {
-            if ( e.overlapsP(that) ) return true;
-        }
-        return false;
-    }
-
-    /** Fills this object from a text line in RefSeq (UCSC) text dump file */
-    @Override
-    public boolean parseLine(final Object header, String[] fields) {
-        transcript_id = fields[1];
-        if ( fields[3].length()==1 && fields[3].charAt(0)=='+') strand = 1;
-        else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') strand = -1;
-        else throw new StingException("Expected strand symbol (+/-), found: "+fields[3]);
-
-        String contig_name = fields[2];
-        transcript_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]));
-        transcript_coding_interval = GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7]));
-        gene_name = fields[12];
-        String[] exon_starts = fields[9].split(",");
-        String[] exon_stops = fields[10].split(",");
-        String[] eframes = fields[15].split(",");
-
-        assert exon_starts.length == exon_stops.length : "Data format error: numbers of exon start and stop positions differ";
-        assert exon_starts.length == eframes.length : "Data format error: numbers of exons and exon frameshifts differ";
-
-        exons = new ArrayList<GenomeLoc>(exon_starts.length);
-        exon_frames = new ArrayList<Integer>(eframes.length);
-
-        for ( int i = 0 ; i < exon_starts.length  ; i++ ) {
-            exons.add(GenomeLocParser.parseGenomeLoc(contig_name, Integer.parseInt(exon_starts[i])+1, Integer.parseInt(exon_stops[i]) ) );
-            exon_frames.add(Integer.decode(eframes[i]));
-        }
-        return true;
-    }
-
-    public String toString() {
-        StringBuilder b = new StringBuilder("000\t"); // first field is unused but required in th ecurrent format; just set to something
-        b.append(transcript_id);   // #1
-        b.append('\t');
-        b.append(transcript_interval.getContig()); // #2
-        b.append('\t');
-        b.append( (strand==1?'+':'-') ); // #3
-        b.append('\t');
-        b.append( (transcript_interval.getStart() - 1) ); // #4
-        b.append('\t');
-        b.append( transcript_interval.getStop());  // #5
-        b.append('\t');
-        b.append( (transcript_coding_interval.getStart() - 1) ); // #6
-        b.append('\t');
-        b.append( transcript_coding_interval.getStop());  // #7
-        b.append('\t');
-        b.append(exons.size()); // #8
-        b.append('\t');
-        for ( GenomeLoc loc : exons ) { b.append( (loc.getStart()-1) ); b.append(','); } // #9
-        b.append('\t');
-        for ( GenomeLoc loc : exons ) { b.append( loc.getStop() ); b.append(','); } // #10
-        b.append("\t0\t"); // # 11 - unused?
-        b.append(gene_name); // # 12
-        b.append("\tcmpl\tcmpl\t"); // #13, #14 - unused?
-        for ( Integer f : exon_frames ) { b.append( f ); b.append(','); } // #15
-
-
-        return b.toString();
-    }
-
-    /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
-     * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
-     * this position is fully whithin an exon of <i>any</i> of those transcripts. Passing null is safe (will return false).
-     * NOTE: position can be still within a UTR, see #isCoding
-     * @return true if it's an exon
-     */
-    public static boolean isExon(RODRecordList l) {
-
-        if ( l == null ) return false;
-
-        GenomeLoc loc = l.getLocation();
-
-        for ( GATKFeature t : l ) {
-            if ( ((rodRefSeq)t.getUnderlyingObject()).overlapsExonP(loc) ) return true;
-        }
-        return false;
-
-    }
-
-    /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
-     * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
-     * this position is fully whithin a coding region of <i>any</i> of those transcripts.
-     * Passing null is safe (will return false).
-     * NOTE: "coding" interval is defined as a single genomic interval, so it
-	 * does not include the UTRs of the outermost exons, but it includes introns between exons spliced into a
-	 * transcript, or internal exons that are not spliced into a given transcript. To check that a position is
-	 * indeed within an exon but not in UTR, use #isCodingExon().
-     * @return
-     */
-    public static boolean isCoding(RODRecordList l) {
-
-        if ( l == null ) return false;
-
-        GenomeLoc loc = l.getLocation();
-
-        for ( GATKFeature t : l ) {
-            if ( ((rodRefSeq)t.getUnderlyingObject()).overlapsCodingP(loc) ) return true;
-        }
-        return false;
-
-    }
-
-    /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to
-     * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether
-     * this position is fully whithin a coding exon portion (i.e. true coding sequence) of <i>any</i> of those transcripts.
-     * Passing null is safe (will return false). In other words, this method returns true if the list contains a transcript,
-     * for which the current position is within an exon <i>and</i> within a coding interval simultaneously.
-     * @return
-     */
-    public static boolean isCodingExon(RODRecordList l) {
-
-        if ( l == null ) return false;
-
-        GenomeLoc loc = l.getLocation();
-
-        for ( GATKFeature t : l ) {
-            if ( ((rodRefSeq)t.getUnderlyingObject()).overlapsCodingP(loc) && ((rodRefSeq)t.getUnderlyingObject()).overlapsExonP(loc) ) return true;
-        }
-        return false;
-
-    }
-
-}
--- a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java
@ -54,7 +54,6 @@ public class RODTrackBuilder implements RMDTrackBuilder {
    static {
        // All known ROD types
        Types.put("GELI", rodGELI.class);
-        Types.put("RefSeq", rodRefSeq.class);
        Types.put("Table", TabularROD.class);
        Types.put("HapMap", HapMapROD.class);
        Types.put("Intervals", IntervalRod.class);
--- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelGenotyperV2Walker.java
@ -29,11 +29,18 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
+import org.broad.tribble.FeatureReader;
+import org.broad.tribble.dbsnp.DbSNPCodec;
 import org.broadinstitute.sting.gatk.filters.Platform454Filter;
 import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
 import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
 import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
 import org.broadinstitute.sting.gatk.refdata.*;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
+import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
+import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
 import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
 import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
@ -46,6 +53,7 @@ import org.broadinstitute.sting.utils.collections.CircularArray;
 import org.broadinstitute.sting.utils.collections.PrimitivePair;
 import org.broadinstitute.sting.commandline.Argument;

+import java.io.File;
 import java.io.IOException;
 import java.util.*;

@ -136,11 +144,15 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
 		normal_context = new WindowContext(0,WINDOW_SIZE);

 		if ( RefseqFileName != null ) {
-			ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
-					new java.io.File(RefseqFileName), rodRefSeq.class);
+			TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
+            FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;

-			refseqIterator = new SeekableRODIterator(new GATKFeatureIterator(refseq.iterator()));
-			logger.info("Using RefSeq annotations from "+RefseqFileName);
+            try {
+                refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
+            } catch (IOException e) {
+                throw new StingException("Unable to open file " + RefseqFileName, e);
+            }
+            logger.info("Using RefSeq annotations from "+RefseqFileName);
 		}

 		if ( refseqIterator == null ) logger.info("No annotations available");
@ -627,11 +639,11 @@ public class IndelGenotyperV2Walker extends ReadWalker<Integer,Integer> {
        else {
            StringBuilder b = new StringBuilder();

-            if ( rodRefSeq.isExon(ann) ) {
-                if ( rodRefSeq.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
+            if ( RefSeqFeature.isExon(ann) ) {
+                if ( RefSeqFeature.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
                else b.append(annUTR); // exon but not coding = UTR
            } else {
-                if ( rodRefSeq.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
+                if ( RefSeqFeature.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
                else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon...
            }
            b.append('\t');
--- a/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRodWalker.java
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
+import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
 import org.broadinstitute.sting.utils.collections.Pair;
@ -44,13 +45,26 @@ import java.util.LinkedList;
 * Prints out counts of the number of reference ordered data objects are
 * each locus for debugging RodWalkers.
 */
-public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> {
+public class CountRodWalker extends RodWalker<CountRodWalker.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
    @Argument(fullName = "verbose", shortName = "v", doc="If true, Countrod will print out detailed information about the rods it finds and locations", required = false)
    public boolean verbose = false;

    @Argument(fullName = "showSkipped", shortName = "s", doc="If true, CountRod will print out the skippped locations", required = false)
    public boolean showSkipped = false;

+    @Override
+    public Pair<ExpandingArrayList<Long>, Long> treeReduce(Pair<ExpandingArrayList<Long>, Long> lhs, Pair<ExpandingArrayList<Long>, Long> rhs) {
+        ExpandingArrayList<Long> nt = new ExpandingArrayList<Long>();
+        nt.addAll(lhs.first);
+        int index = 0;
+        for (Long l : rhs.first)
+            if (nt.get(index) == null)
+                nt.add(l);
+            else
+                nt.set(index,nt.get(index) + l);
+        return new Pair<ExpandingArrayList<Long>, Long>(nt, lhs.second + rhs.second);
+    }
+
    public class Datum {
        public long nRodsAtThisLocation = 0;
        public long nSkippedBases =0;
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/DesignFileGeneratorWalker.java
@ -1,21 +1,15 @@
 package org.broadinstitute.sting.oneoffprojects.walkers;

 import org.broad.tribble.bed.BEDFeature;
-import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.*;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
-import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
-import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
-import org.broadinstitute.sting.gatk.walkers.RefWalker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.genotype.vcf.*;

 import java.util.*;

@ -31,7 +25,7 @@ import java.util.*;
 public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {

    private HashMap<GenomeLoc,IntervalInfoBuilder> intervalBuffer = new HashMap<GenomeLoc,IntervalInfoBuilder>();
-    private HashSet<rodRefSeq> refseqBuffer = new HashSet<rodRefSeq>();
+    private HashSet<RefSeqFeature> refseqBuffer = new HashSet<RefSeqFeature>();
    private HashMap<String,BEDFeature> currentBedFeatures = new HashMap<String,BEDFeature>();

    public Long map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
@ -59,8 +53,8 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {

        if ( refseqList != null && refseqList.size() > 0 ) {
            for ( Object seq : refseqList ) {
-                if ( ! refseqBuffer.contains( (rodRefSeq) seq) ) {
-                    refseqBuffer.add( (rodRefSeq) seq );
+                if ( ! refseqBuffer.contains( (RefSeqFeature) seq) ) {
+                    refseqBuffer.add( (RefSeqFeature) seq );
                }
            }
        }
@ -84,7 +78,7 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
    private long process() {
        long nUpdate = 0l;
        for ( GenomeLoc interval : intervalBuffer.keySet() ) {
-            for ( rodRefSeq refseq : refseqBuffer ) {
+            for ( RefSeqFeature refseq : refseqBuffer ) {
                if ( interval.overlapsP(refseq.getLocation()) &&
                     ! intervalBuffer.get(interval).geneNames.contains(refseq.getTranscriptUniqueGeneName()) ) {
                    // if the interval overlaps the gene transcript; and the gene is not already represented in the interval
@ -119,14 +113,14 @@ public class DesignFileGeneratorWalker extends RodWalker<Long,Long> {
     * @return diddly
     */
    public void cleanup(ReferenceContext ref) {
-        List<rodRefSeq> toRemove = new ArrayList<rodRefSeq>();
-        for ( rodRefSeq refseq : refseqBuffer ) {
+        List<RefSeqFeature> toRemove = new ArrayList<RefSeqFeature>();
+        for ( RefSeqFeature refseq : refseqBuffer ) {
            if ( refseq.getLocation().isBefore(ref.getLocus()) ) {
                toRemove.add(refseq);
            }
        }

-        for ( rodRefSeq refseq : toRemove ) {
+        for ( RefSeqFeature refseq : toRemove ) {
            refseqBuffer.remove(refseq);
        }

--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/IndelAnnotator.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.oneoffprojects.walkers;

+import org.broad.tribble.FeatureReader;
 import org.broad.tribble.vcf.VCFHeader;
 import org.broad.tribble.vcf.VCFHeaderLine;
 import org.broad.tribble.vcf.VCFInfoHeaderLine;
@ -8,6 +9,10 @@ import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.*;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
+import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
 import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
@ -15,6 +20,8 @@ import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.StingException;
 import org.broadinstitute.sting.utils.genotype.vcf.*;

+import java.io.File;
+import java.io.IOException;
 import java.util.*;

 /**
@ -42,11 +49,11 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{
        else {
            StringBuilder b = new StringBuilder();

-            if ( rodRefSeq.isExon(ann) ) {
-                if ( rodRefSeq.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
+            if ( RefSeqFeature.isExon(ann) ) {
+                if ( RefSeqFeature.isCodingExon(ann) ) b.append(annCoding); // both exon and coding = coding exon sequence
                else b.append(annUTR); // exon but not coding = UTR
            } else {
-                if ( rodRefSeq.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
+                if ( RefSeqFeature.isCoding(ann) ) b.append(annIntron); // not in exon, but within the coding region = intron
                else b.append(annUnknown); // we have no idea what this is. this may actually happen when we have a fully non-coding exon...
            }
            b.append('\t');
@ -57,10 +64,15 @@ public class IndelAnnotator extends RodWalker<Integer,Long>{

    public void initialize() {
        if ( RefseqFileName != null ) {
-            ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
-                    new java.io.File(RefseqFileName), rodRefSeq.class);
+            TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
+            FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class,new File(RefseqFileName)).first;
+
+            try {
+                refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(),"refseq"));
+            } catch (IOException e) {
+                throw new StingException("Unable to open file " + RefseqFileName, e);
+            }

-            refseqIterator = new SeekableRODIterator(new GATKFeatureIterator(refseq.iterator()));
            logger.info("Using RefSeq annotations from "+RefseqFileName);
        }

--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/hybridselection/HybSelPerformanceWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/hybridselection/HybSelPerformanceWalker.java
@ -31,21 +31,22 @@ import net.sf.picard.util.IntervalList;
 import net.sf.picard.util.OverlapDetector;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.util.StringUtil;
+import org.broad.tribble.FeatureReader;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
 import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
-import org.broadinstitute.sting.gatk.refdata.rodRefSeq;
-import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
-import org.broadinstitute.sting.gatk.refdata.utils.GATKFeatureIterator;
-import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
-import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqCodec;
+import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature;
+import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder;
+import org.broadinstitute.sting.gatk.refdata.utils.*;
 import org.broadinstitute.sting.gatk.walkers.By;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
@ -108,10 +109,15 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
    @Override
    public void initialize() {
        if ( REFSEQ_FILE != null ) {
-            ReferenceOrderedData<rodRefSeq> refseq = new ReferenceOrderedData<rodRefSeq>("refseq",
-                    new java.io.File(REFSEQ_FILE), rodRefSeq.class);
+            TribbleRMDTrackBuilder builder = new TribbleRMDTrackBuilder();
+            FeatureReader refseq = builder.createFeatureReader(RefSeqCodec.class, new File(REFSEQ_FILE)).first;
+
+            try {
+                refseqIterator = new SeekableRODIterator(new FeatureToGATKFeatureIterator(refseq.iterator(), "refseq"));
+            } catch (IOException e) {
+                throw new StingException("Unable to open file " + REFSEQ_FILE, e);
+            }

-            refseqIterator = new SeekableRODIterator(new GATKFeatureIterator(refseq.iterator()));
            logger.info("Using RefSeq annotations from "+REFSEQ_FILE);
        }

@ -282,8 +288,8 @@ public class HybSelPerformanceWalker extends LocusWalker<Integer, HybSelPerforma
        if (annotationList == null) { return "UNKNOWN"; }
        
        for(GATKFeature rec : annotationList) {
-            if ( ((rodRefSeq)rec.getUnderlyingObject()).overlapsExonP(target) ) {
-                return ((rodRefSeq)rec.getUnderlyingObject()).getGeneName();
+            if ( ((RefSeqFeature)rec.getUnderlyingObject()).overlapsExonP(target) ) {
+                return ((RefSeqFeature)rec.getUnderlyingObject()).getGeneName();
            }
        }

--- a/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java
@ -173,9 +173,9 @@ public class VCF4UnitTest extends BaseTest {
    }


-    // test too many info fields 
+    // test too many info fields - NOT a valid test with validation turned off in the VCF4 reader
    String twoManyInfoLine = "20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2;HH\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.";
-    @Test(expected=StingException.class)
+    //@Test(expected=StingException.class)
    public void testCheckTooManyInfoFields() {
        TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile);
        testSetup.codec.decode(twoManyInfoLine);