diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java new file mode 100644 index 000000000..600e7d60e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/RODMetaDataContainer.java @@ -0,0 +1,49 @@ +package org.broadinstitute.sting.gatk.datasources.providers; + +import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; +import org.broadinstitute.sting.utils.Pair; + +import java.util.*; + + +/** + * + * @author aaron + * + * Class RODMetaDataContainer + * + * stores both the name and the class for each ROD. This class assumes that: + * + * -Names must be unique + * -Classes are allowed to have dupplicates + * + * This class encapsulates the ref data associations, and provides lookup by name and by + * class type. + * + */ +public class RODMetaDataContainer { + // we only allow non-dupplicate ROD names, a HashMap is fine + private final HashMap nameMap = new HashMap(); + + // we do allow duplicate class entries, so we need to store pairs of data + private final List> classMap = new ArrayList>(); + + public void addEntry(ReferenceOrderedDatum data) { + nameMap.put(data.getName(),data); + classMap.add(new Pair(data.getClass(),data)); + } + + public Collection getSet(String name) { + if (name == null) return nameMap.values(); + Set set = new HashSet(); + if (nameMap.containsKey(name)) set.add(nameMap.get(name)); + return set; + } + // the brute force (n) search ended up being faster than sorting and binary search in all but the most extreme cases (thousands of RODs at a location). + public Collection getSet(Class cls) { + Collection ret = new ArrayList(); + for (Pair pair: classMap) + if (pair.first.equals(cls)) ret.add(pair.second); + return ret; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index f99ef3a3c..b8e43ecf9 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -72,7 +72,7 @@ public class ReadBasedReferenceOrderedView implements View { /** stores a window of data, dropping RODs if we've passed the new reads start point. */ class WindowedData { // the queue of possibly in-frame RODs; RODs are dropped removed as soon as they are out of scope - private final TreeMap> mapping = new TreeMap>(); + private final TreeMap mapping = new TreeMap(); // our current location from the last read we processed private GenomeLoc currentLoc; @@ -149,8 +149,8 @@ class WindowedData { RODRecordList list = state.iterator.next(); for (ReferenceOrderedDatum datum : list) { if (!mapping.containsKey(list.getLocation().getStart())) - mapping.put(list.getLocation().getStart(), new HashSet()); - mapping.get(list.getLocation().getStart()).add(datum); + mapping.put(list.getLocation().getStart(), new RODMetaDataContainer()); + mapping.get(list.getLocation().getStart()).addEntry(datum); } } } @@ -178,4 +178,4 @@ class RMDDataState { this.dataSource = dataSource; this.iterator = iterator; } -} \ No newline at end of file +} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java index b2f57cfc6..103abc0a4 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTracker.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -39,7 +40,9 @@ import java.util.*; */ public class ReadMetaDataTracker { private final SAMRecord record; - private final TreeMap> mapping; + + // the buffer of positions and RODs we've stored + private final TreeMap mapping; /** * create a read meta data tracker, given the read and a queue of RODatum positions @@ -47,7 +50,7 @@ public class ReadMetaDataTracker { * @param record the read to create offset from * @param mapping the mapping of reference ordered datum */ - public ReadMetaDataTracker(SAMRecord record, TreeMap> mapping) { + public ReadMetaDataTracker(SAMRecord record, TreeMap mapping) { this.record = record; this.mapping = mapping; } @@ -62,14 +65,22 @@ public class ReadMetaDataTracker { * * @return a mapping from the position in the read to the reference ordered datum */ - private Map> createReadAlignment(SAMRecord record, TreeMap> queue, Class cl, String name) { - Map> ret = new LinkedHashMap>(); + private Map> createReadAlignment(SAMRecord record, TreeMap queue, Class cl, String name) { + if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified"); + Map> ret = new LinkedHashMap>(); GenomeLoc location = GenomeLocParser.createGenomeLoc(record); int length = record.getReadLength(); for (Long loc : queue.keySet()) { - //if (location.containsP(loc)) { - long position = loc - location.getStart(); - if (position >= 0 && position < length) ret.put((int)(position),queue.get(loc)); + Long position = loc - location.getStart(); + if (position >= 0 && position < length) { + Collection set; + if (cl != null) + set = queue.get(loc).getSet(cl); + else + set = queue.get(loc).getSet(name); + if (set != null && set.size() > 0) + ret.put(position,set); + } } return ret; @@ -80,12 +91,16 @@ public class ReadMetaDataTracker { * * @return a mapping from the position in the read to the reference ordered datum */ - private Map> createGenomeLocAlignment(SAMRecord record, TreeMap> mapping, Class cl, String name) { - Map> ret = new LinkedHashMap>(); + private Map> createGenomeLocAlignment(SAMRecord record, TreeMap mapping, Class cl, String name) { + Map> ret = new LinkedHashMap>(); int start = record.getAlignmentStart(); int stop = record.getAlignmentEnd(); for (Long location : mapping.keySet()) { - if (location >= start && location <= stop) ret.put(location,mapping.get(location)); + if (location >= start && location <= stop) + if (cl != null) + ret.put(location,mapping.get(location).getSet(cl)); + else + ret.put(location,mapping.get(location).getSet(name)); } return ret; } @@ -95,7 +110,7 @@ public class ReadMetaDataTracker { * * @return a mapping of read offset to ROD(s) */ - public Map> getPositionMapping() { + public Map> getPositionMapping() { return createReadAlignment(record, mapping, null, null); } @@ -104,7 +119,43 @@ public class ReadMetaDataTracker { * * @return a mapping of genome loc position to ROD(s) */ - public Map> getGenomeLocMapping() { + public Map> getGenomeLocMapping() { return createGenomeLocAlignment(record, mapping, null, null); } + + /** + * get the position mapping, from read offset to ROD + * + * @return a mapping of read offset to ROD(s) + */ + public Map> getPositionMapping(String name) { + return createReadAlignment(record, mapping, null, name); + } + + /** + * get the position mapping, from read offset to ROD + * + * @return a mapping of genome loc position to ROD(s) + */ + public Map> getGenomeLocMapping(String name) { + return createGenomeLocAlignment(record, mapping, null, name); + } + + /** + * get the position mapping, from read offset to ROD + * + * @return a mapping of read offset to ROD(s) + */ + public Map> getPositionMapping(Class cl) { + return createReadAlignment(record, mapping, cl, null); + } + + /** + * get the position mapping, from read offset to ROD + * + * @return a mapping of genome loc position to ROD(s) + */ + public Map> getGenomeLocMapping(Class cl) { + return createGenomeLocAlignment(record, mapping, cl, null); + } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java index 82022b885..346967431 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/ReferenceOrderedData.java @@ -23,7 +23,7 @@ import java.util.*; * Time: 10:47:14 AM * To change this template use File | Settings | File Templates. */ -public class ReferenceOrderedData implements Iterable { // }, RMDTrackBuilder { +public class ReferenceOrderedData implements Iterable { private String name; private File file = null; // private String fieldDelimiter; @@ -36,32 +36,6 @@ public class ReferenceOrderedData implements /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(ReferenceOrderedData.class); - /** @return a map of all available tracks we currently have access to create */ - //@Override - public Map getAvailableTrackNamesAndTypes() { - Map ret = new HashMap(); - for (RODBinding binding: Types.values()) - ret.put(binding.name, binding.type); - return ret; - } - - /** - * create a RMDTrack of the specified type - * - * @param targetClass the target class of track - * @param name what to call the track - * @param inputFile the input file - * - * @return an instance of the track - * @throws org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException - * if we don't know of the target class or we couldn't create it - */ - //@Override - public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException { - return new RODRMDTrack(targetClass, name, inputFile, parse1Binding(name,targetClass.getName(),inputFile.getAbsolutePath())); - } - - // ---------------------------------------------------------------------- // // Static ROD type management @@ -191,7 +165,7 @@ public class ReferenceOrderedData implements * @param fileName * @return */ - private static ReferenceOrderedData parse1Binding(final String trackName, final String typeName, final String fileName) { + public static ReferenceOrderedData parse1Binding(final String trackName, final String typeName, final String fileName) { // Gracefully fail if we don't have the type if (ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null) Utils.scareUser(String.format("Unknown ROD type: %s", typeName)); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java index 43f778b19..bd5bf61ba 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/SeekableRODIterator.java @@ -258,8 +258,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator { "the iterator's current contig"); if ( interval.getContigIndex() == curr_contig ) { if ( interval.getStart() < curr_position ) - throw new StingException("Out of order query: query position "+interval.getStart()+" is located before "+ - "the iterator's current position "+curr_position); + throw new StingException("Out of order query: query position "+interval +" is located before "+ + "the iterator's current position "+curr_contig + ":" + curr_position); if ( interval.getStop() < curr_query_end ) throw new StingException("Unsupported querying sequence: current query interval " + interval+" ends before the end of previous query interval ("+curr_query_end+")"); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java new file mode 100644 index 000000000..0be6acf60 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/refdata/tracks/builders/RODTrackBuilder.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.gatk.refdata.tracks.builders; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; +import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException; +import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack; +import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + + +/** + * @author aaron + *

+ * Class RODTrackBuilder + *

+ * the builder for tracks of the current ROD system, a holdover until Tribble supports binary and multi-line formats + */ +public class RODTrackBuilder implements RMDTrackBuilder { + + /** our log, which we want to capture anything from this class */ + private static Logger logger = Logger.getLogger(ReferenceOrderedData.class); + + public static HashMap Types = new HashMap(); + + public static void addModule(final String name, final Class rodType) { + final String boundName = name.toLowerCase(); + if (Types.containsKey(boundName)) { + throw new RuntimeException(String.format("GATK BUG: adding ROD module %s that is already bound", boundName)); + } + logger.info(String.format("* Adding rod class %s", name)); + Types.put(boundName, new ReferenceOrderedData.RODBinding(name, rodType)); + } + + static { + // All known ROD types + addModule("GFF", RodGenotypeChipAsGFF.class); + //addModule("dbSNP", rodDbSNP.class); + addModule("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class); + addModule("SAMPileup", rodSAMPileup.class); + addModule("GELI", rodGELI.class); + addModule("RefSeq", rodRefSeq.class); + addModule("Table", TabularROD.class); + addModule("PooledEM", PooledEMSNPROD.class); + addModule("CleanedOutSNP", CleanedOutSNPROD.class); + addModule("Sequenom", SequenomROD.class); + addModule("SangerSNP", SangerSNPROD.class); + addModule("SimpleIndel", SimpleIndelROD.class); + addModule("PointIndel", PointIndelROD.class); + addModule("HapMapGenotype", HapMapGenotypeROD.class); + addModule("Intervals", IntervalRod.class); + addModule("Variants", RodGeliText.class); + addModule("GLF", RodGLF.class); + addModule("VCF", RodVCF.class); + addModule("PicardDbSNP", rodPicardDbSNP.class); + addModule("HapmapVCF", HapmapVCFROD.class); + addModule("Beagle", BeagleROD.class); + addModule("Plink", PlinkRod.class); + } + + /** + * create a RMDTrack of the specified type + * + * @param targetClass the target class of track + * @param name what to call the track + * @param inputFile the input file + * + * @return an instance of the track + * @throws org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException + * if we don't know of the target class or we couldn't create it + */ + //@Override + public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException { + return new RODRMDTrack(targetClass, name, inputFile, ReferenceOrderedData.parse1Binding(name,targetClass.getName(),inputFile.getAbsolutePath())); + } + +/** @return a map of all available tracks we currently have access to create */ + //@Override + public Map getAvailableTrackNamesAndTypes() { + Map ret = new HashMap(); + for (ReferenceOrderedData.RODBinding binding: Types.values()) + ret.put(binding.name, binding.type); + return ret; + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java index ce3eb045e..7f05ccd59 100644 --- a/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java +++ b/java/test/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedViewTest.java @@ -77,13 +77,13 @@ public class ReadBasedReferenceOrderedViewTest extends BaseTest { } GenomeLoc start = GenomeLocParser.createGenomeLoc(0,0,0); List list = new ArrayList(); - list.add(new RMDDataState(null, new FakePeekingRODIterator(start))); + list.add(new RMDDataState(null, new FakePeekingRODIterator(start,"fakeName"))); ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list)); for (SAMRecord rec : records) { ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec); - Map> map = tracker.getPositionMapping(); - for (Integer i : map.keySet()) { + Map> map = tracker.getPositionMapping(); + for (Long i : map.keySet()) { Assert.assertEquals(1,map.get(i).size()); } Assert.assertEquals(10,map.keySet().size()); @@ -99,8 +99,9 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { // current location private GenomeLoc location; private ReadMetaDataTrackerTest.FakeRODatum curROD; - - public FakePeekingRODIterator(GenomeLoc startingLoc) { + private final String name; + public FakePeekingRODIterator(GenomeLoc startingLoc, String name) { + this.name = name; this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);; } @@ -130,7 +131,7 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator { @Override public RODRecordList next() { System.err.println("Next -> " + location); - curROD = new ReadMetaDataTrackerTest.FakeRODatum(location); + curROD = new ReadMetaDataTrackerTest.FakeRODatum(location,name); location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1); FakeRODRecordList list = new FakeRODRecordList(); list.add(curROD); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java index 3ef325905..1d4093777 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/ReadMetaDataTrackerTest.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; @@ -37,10 +38,7 @@ import org.junit.Test; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; -import java.util.HashSet; -import java.util.PriorityQueue; -import java.util.Set; -import java.util.TreeMap; +import java.util.*; /** @@ -56,6 +54,7 @@ public class ReadMetaDataTrackerTest extends BaseTest { private static int readCount = 100; private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH; private static SAMFileHeader header; + private Set nameSet; @BeforeClass public static void beforeClass() { @@ -65,15 +64,32 @@ public class ReadMetaDataTrackerTest extends BaseTest { @Before public void beforeEach() { + nameSet = new TreeSet(); + nameSet.add("default"); + } + + @Test + public void twoRodsAtEachReadBase() { + nameSet.add("default2"); + ReadMetaDataTracker tracker = getRMDT(1, nameSet, true); + + // count the positions + int count = 0; + for (Long x : tracker.getPositionMapping().keySet()) { + count++; + Assert.assertEquals(2, tracker.getPositionMapping().get(x).size()); + } + Assert.assertEquals(10, count); } @Test public void rodAtEachReadBase() { - ReadMetaDataTracker tracker = getRMDT(1); + + ReadMetaDataTracker tracker = getRMDT(1, nameSet, true); // count the positions int count = 0; - for (int x : tracker.getPositionMapping().keySet()) { + for (Long x : tracker.getPositionMapping().keySet()) { count++; Assert.assertEquals(1, tracker.getPositionMapping().get(x).size()); } @@ -81,12 +97,78 @@ public class ReadMetaDataTrackerTest extends BaseTest { } @Test - public void sparceRODsForRead() { - ReadMetaDataTracker tracker = getRMDT(7); + public void filterByName() { + nameSet.add("default2"); + ReadMetaDataTracker tracker = getRMDT(1, nameSet, true); // count the positions int count = 0; - for (int x : tracker.getPositionMapping().keySet()) { + Map> map = tracker.getPositionMapping("default"); + for (Long x : map.keySet()) { + count++; + Assert.assertEquals(1, map.get(x).size()); + } + Assert.assertEquals(10, count); + } + + @Test + public void filterByDupType() { + nameSet.add("default2"); + ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type + // count the positions + int count = 0; + Map> map = tracker.getPositionMapping(FakeRODatum.class); + for (Long x : map.keySet()) { + count++; + Assert.assertEquals(2, map.get(x).size()); + } + Assert.assertEquals(10, count); + } + + // @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system + public void filterByMassiveDupType() { + + for (int y = 0; y < 20; y++) { + nameSet.add("default" + String.valueOf(y)); + long firstTime = System.currentTimeMillis(); + for (int lp = 0; lp < 1000; lp++) { + ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type + // count the positions + int count = 0; + Map> map = tracker.getPositionMapping(FakeRODatum.class); + for (Long x : map.keySet()) { + count++; + Assert.assertEquals(y + 2, map.get(x).size()); + } + Assert.assertEquals(10, count); + } + System.err.println(y + " = " + (System.currentTimeMillis() - firstTime)); + } + } + + + @Test + public void filterByType() { + nameSet.add("default2"); + ReadMetaDataTracker tracker = getRMDT(1, nameSet, true); + + // count the positions + int count = 0; + Map> map = tracker.getPositionMapping(Fake2RODatum.class); + for (long x : map.keySet()) { + count++; + Assert.assertEquals(1, map.get(x).size()); + } + Assert.assertEquals(10, count); + } + + @Test + public void sparceRODsForRead() { + ReadMetaDataTracker tracker = getRMDT(7, nameSet, true); + + // count the positions + int count = 0; + for (Long x : tracker.getPositionMapping().keySet()) { count++; Assert.assertEquals(1, tracker.getPositionMapping().get(x).size()); } @@ -95,7 +177,7 @@ public class ReadMetaDataTrackerTest extends BaseTest { @Test public void rodByGenomeLoc() { - ReadMetaDataTracker tracker = getRMDT(1); + ReadMetaDataTracker tracker = getRMDT(1, nameSet, true); // count the positions int count = 0; @@ -106,32 +188,62 @@ public class ReadMetaDataTrackerTest extends BaseTest { Assert.assertEquals(10, count); } - private ReadMetaDataTracker getRMDT(int incr) { - SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10); - TreeMap> data = new TreeMap>(); - for (int x = 0; x < record.getAlignmentEnd(); x+=incr) { + + /** + * create a ReadMetaDataTracker given: + * + * @param incr the spacing between site locations + * @param names the names of the reference ordered data to create: one will be created at every location for each name + * + * @return a ReadMetaDataTracker + */ + private ReadMetaDataTracker getRMDT(int incr, Set names, boolean alternateTypes) { + SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10); + TreeMap data = new TreeMap(); + for (int x = 0; x < record.getAlignmentEnd(); x += incr) { GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x); - Set set = new HashSet(); - set.add(new FakeRODatum(loc)); - data.put((long)record.getAlignmentStart() + x,set); + RODMetaDataContainer set = new RODMetaDataContainer(); + + int cnt = 0; + for (String name : names) { + if (alternateTypes) + set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name)); + else + set.addEntry(new FakeRODatum(loc, name)); + cnt++; + } + data.put((long) record.getAlignmentStart() + x, set); } ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data); return tracker; } + /** + * for testing, we want a fake rod with a different classname, for the get-by-class-name functions + */ + static public class Fake2RODatum extends FakeRODatum { + + public Fake2RODatum(GenomeLoc location, String name) { + super(location, name); + } + } + + /** for testing only */ static public class FakeRODatum implements ReferenceOrderedDatum { final GenomeLoc location; + final String name; - public FakeRODatum(GenomeLoc location) { + public FakeRODatum(GenomeLoc location, String name) { this.location = location; + this.name = name; } @Override public String getName() { - return "false"; + return name; } @Override diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java index 9f0f8e831..2e0022da5 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackManagerTest.java @@ -23,13 +23,18 @@ package org.broadinstitute.sting.gatk.refdata.tracks; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; +import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; @@ -57,10 +62,7 @@ public class RMDTrackManagerTest extends BaseTest { @Test public void testBuilderQuery() { - - for (RMDTrack t : tracks) { - System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile()); int count = 0; Iterator fIter; @@ -75,24 +77,62 @@ public class RMDTrackManagerTest extends BaseTest { } Assert.assertEquals(100, count); } - } @Test public void testBuilderIterator() { for (RMDTrack t : tracks) { - System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile()); int count = 0; - Iterator fIter = null; - fIter = t.getIterator(); + Iterator fIter = t.getIterator(); while (fIter.hasNext()) { fIter.next(); count++; } Assert.assertEquals(100, count); } + } + // @Test used only to determine how fast queries are, don't uncomment! (unless you know what you're doing). + public void testSpeedOfRealQuery() { + IndexedFastaSequenceFile file = null; + try { + file = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta")); + } catch (FileNotFoundException e) { + Assert.assertTrue(false); + } + final int intervalSize = 10000000; + GenomeLocParser.setupRefContigOrdering(file.getSequenceDictionary()); + RMDTrackManager manager = new RMDTrackManager(); + // add our db snp data + triplets.clear(); + triplets.add("db"); + triplets.add("DBSNP"); + triplets.add("../../GATK_Data/dbsnp_130_b36.rod"); + Assert.assertEquals(1, manager.getReferenceMetaDataSources(triplets).size()); + RMDTrack t = manager.getReferenceMetaDataSources(triplets).get(0); + // make sure we have a single track + // lets test the first and 20th contigs of the human reference + + for (int loop = 1; loop <= 22; loop++) { + SAMSequenceRecord seqRec = GenomeLocParser.getContigInfo(String.valueOf(loop)); + String name = seqRec.getSequenceName(); + Iterator fIter; + for (int x = 1; x < seqRec.getSequenceLength() - intervalSize; x += intervalSize) { + long firstTime = System.currentTimeMillis(); + long count = 0; + try { + fIter = ((FeatureReaderTrack) t).query("1", x, x + intervalSize); + } catch (IOException e) { + throw new StingException("blah I/O exception"); + } + while (fIter.hasNext()) { + fIter.next(); + count++; + } + System.err.println(name + "," + count + "," + (System.currentTimeMillis() - firstTime)); + } + } } }