adding methods to get RODs by name or type in read traversals, performance improvements to RODs for Reads in general, and some more Tribble infrastructure.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2984 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-03-11 21:13:39 +00:00
parent 18ba9929f9
commit 661a043cef
9 changed files with 393 additions and 78 deletions

View File

@ -0,0 +1,49 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.utils.Pair;
import java.util.*;
/**
*
* @author aaron
*
* Class RODMetaDataContainer
*
* stores both the name and the class for each ROD. This class assumes that:
*
* -Names must be unique
* -Classes are allowed to have dupplicates
*
* This class encapsulates the ref data associations, and provides lookup by name and by
* class type.
*
*/
public class RODMetaDataContainer {
// we only allow non-dupplicate ROD names, a HashMap is fine
private final HashMap<String, ReferenceOrderedDatum> nameMap = new HashMap<String, ReferenceOrderedDatum>();
// we do allow duplicate class entries, so we need to store pairs of data
private final List<Pair<Class, ReferenceOrderedDatum>> classMap = new ArrayList<Pair<Class, ReferenceOrderedDatum>>();
public void addEntry(ReferenceOrderedDatum data) {
nameMap.put(data.getName(),data);
classMap.add(new Pair<Class, ReferenceOrderedDatum>(data.getClass(),data));
}
public Collection<ReferenceOrderedDatum> getSet(String name) {
if (name == null) return nameMap.values();
Set<ReferenceOrderedDatum> set = new HashSet<ReferenceOrderedDatum>();
if (nameMap.containsKey(name)) set.add(nameMap.get(name));
return set;
}
// the brute force (n) search ended up being faster than sorting and binary search in all but the most extreme cases (thousands of RODs at a location).
public Collection<ReferenceOrderedDatum> getSet(Class cls) {
Collection<ReferenceOrderedDatum> ret = new ArrayList<ReferenceOrderedDatum>();
for (Pair<Class, ReferenceOrderedDatum> pair: classMap)
if (pair.first.equals(cls)) ret.add(pair.second);
return ret;
}
}

View File

@ -72,7 +72,7 @@ public class ReadBasedReferenceOrderedView implements View {
/** stores a window of data, dropping RODs if we've passed the new reads start point. */
class WindowedData {
// the queue of possibly in-frame RODs; RODs are dropped removed as soon as they are out of scope
private final TreeMap<Long, Set<ReferenceOrderedDatum>> mapping = new TreeMap<Long, Set<ReferenceOrderedDatum>>();
private final TreeMap<Long, RODMetaDataContainer> mapping = new TreeMap<Long, RODMetaDataContainer>();
// our current location from the last read we processed
private GenomeLoc currentLoc;
@ -149,8 +149,8 @@ class WindowedData {
RODRecordList list = state.iterator.next();
for (ReferenceOrderedDatum datum : list) {
if (!mapping.containsKey(list.getLocation().getStart()))
mapping.put(list.getLocation().getStart(), new HashSet<ReferenceOrderedDatum>());
mapping.get(list.getLocation().getStart()).add(datum);
mapping.put(list.getLocation().getStart(), new RODMetaDataContainer());
mapping.get(list.getLocation().getStart()).addEntry(datum);
}
}
}
@ -178,4 +178,4 @@ class RMDDataState {
this.dataSource = dataSource;
this.iterator = iterator;
}
}
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
@ -39,7 +40,9 @@ import java.util.*;
*/
public class ReadMetaDataTracker {
private final SAMRecord record;
private final TreeMap<Long, Set<ReferenceOrderedDatum>> mapping;
// the buffer of positions and RODs we've stored
private final TreeMap<Long, RODMetaDataContainer> mapping;
/**
* create a read meta data tracker, given the read and a queue of RODatum positions
@ -47,7 +50,7 @@ public class ReadMetaDataTracker {
* @param record the read to create offset from
* @param mapping the mapping of reference ordered datum
*/
public ReadMetaDataTracker(SAMRecord record, TreeMap<Long, Set<ReferenceOrderedDatum>> mapping) {
public ReadMetaDataTracker(SAMRecord record, TreeMap<Long, RODMetaDataContainer> mapping) {
this.record = record;
this.mapping = mapping;
}
@ -62,14 +65,22 @@ public class ReadMetaDataTracker {
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Integer, Set<ReferenceOrderedDatum>> createReadAlignment(SAMRecord record, TreeMap<Long, Set<ReferenceOrderedDatum>> queue, Class cl, String name) {
Map<Integer, Set<ReferenceOrderedDatum>> ret = new LinkedHashMap<Integer, Set<ReferenceOrderedDatum>>();
private Map<Long, Collection<ReferenceOrderedDatum>> createReadAlignment(SAMRecord record, TreeMap<Long, RODMetaDataContainer> queue, Class cl, String name) {
if (name != null && cl != null) throw new IllegalStateException("Both a class and name cannot be specified");
Map<Long, Collection<ReferenceOrderedDatum>> ret = new LinkedHashMap<Long, Collection<ReferenceOrderedDatum>>();
GenomeLoc location = GenomeLocParser.createGenomeLoc(record);
int length = record.getReadLength();
for (Long loc : queue.keySet()) {
//if (location.containsP(loc)) {
long position = loc - location.getStart();
if (position >= 0 && position < length) ret.put((int)(position),queue.get(loc));
Long position = loc - location.getStart();
if (position >= 0 && position < length) {
Collection<ReferenceOrderedDatum> set;
if (cl != null)
set = queue.get(loc).getSet(cl);
else
set = queue.get(loc).getSet(name);
if (set != null && set.size() > 0)
ret.put(position,set);
}
}
return ret;
@ -80,12 +91,16 @@ public class ReadMetaDataTracker {
*
* @return a mapping from the position in the read to the reference ordered datum
*/
private Map<Long, Set<ReferenceOrderedDatum>> createGenomeLocAlignment(SAMRecord record, TreeMap<Long, Set<ReferenceOrderedDatum>> mapping, Class cl, String name) {
Map<Long, Set<ReferenceOrderedDatum>> ret = new LinkedHashMap<Long, Set<ReferenceOrderedDatum>>();
private Map<Long, Collection<ReferenceOrderedDatum>> createGenomeLocAlignment(SAMRecord record, TreeMap<Long, RODMetaDataContainer> mapping, Class cl, String name) {
Map<Long, Collection<ReferenceOrderedDatum>> ret = new LinkedHashMap<Long, Collection<ReferenceOrderedDatum>>();
int start = record.getAlignmentStart();
int stop = record.getAlignmentEnd();
for (Long location : mapping.keySet()) {
if (location >= start && location <= stop) ret.put(location,mapping.get(location));
if (location >= start && location <= stop)
if (cl != null)
ret.put(location,mapping.get(location).getSet(cl));
else
ret.put(location,mapping.get(location).getSet(name));
}
return ret;
}
@ -95,7 +110,7 @@ public class ReadMetaDataTracker {
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Integer, Set<ReferenceOrderedDatum>> getPositionMapping() {
public Map<Long, Collection<ReferenceOrderedDatum>> getPositionMapping() {
return createReadAlignment(record, mapping, null, null);
}
@ -104,7 +119,43 @@ public class ReadMetaDataTracker {
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Set<ReferenceOrderedDatum>> getGenomeLocMapping() {
public Map<Long, Collection<ReferenceOrderedDatum>> getGenomeLocMapping() {
return createGenomeLocAlignment(record, mapping, null, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Long, Collection<ReferenceOrderedDatum>> getPositionMapping(String name) {
return createReadAlignment(record, mapping, null, name);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Collection<ReferenceOrderedDatum>> getGenomeLocMapping(String name) {
return createGenomeLocAlignment(record, mapping, null, name);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of read offset to ROD(s)
*/
public Map<Long, Collection<ReferenceOrderedDatum>> getPositionMapping(Class cl) {
return createReadAlignment(record, mapping, cl, null);
}
/**
* get the position mapping, from read offset to ROD
*
* @return a mapping of genome loc position to ROD(s)
*/
public Map<Long, Collection<ReferenceOrderedDatum>> getGenomeLocMapping(Class cl) {
return createGenomeLocAlignment(record, mapping, cl, null);
}
}

View File

@ -23,7 +23,7 @@ import java.util.*;
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<RODRecordList> { // }, RMDTrackBuilder {
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<RODRecordList> {
private String name;
private File file = null;
// private String fieldDelimiter;
@ -36,32 +36,6 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
/** @return a map of all available tracks we currently have access to create */
//@Override
public Map<String, Class> getAvailableTrackNamesAndTypes() {
Map<String, Class> ret = new HashMap<String, Class>();
for (RODBinding binding: Types.values())
ret.put(binding.name, binding.type);
return ret;
}
/**
* create a RMDTrack of the specified type
*
* @param targetClass the target class of track
* @param name what to call the track
* @param inputFile the input file
*
* @return an instance of the track
* @throws org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/
//@Override
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
return new RODRMDTrack(targetClass, name, inputFile, parse1Binding(name,targetClass.getName(),inputFile.getAbsolutePath()));
}
// ----------------------------------------------------------------------
//
// Static ROD type management
@ -191,7 +165,7 @@ public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements
* @param fileName
* @return
*/
private static ReferenceOrderedData<?> parse1Binding(final String trackName, final String typeName, final String fileName) {
public static ReferenceOrderedData<?> parse1Binding(final String trackName, final String typeName, final String fileName) {
// Gracefully fail if we don't have the type
if (ReferenceOrderedData.Types.get(typeName.toLowerCase()) == null)
Utils.scareUser(String.format("Unknown ROD type: %s", typeName));

View File

@ -258,8 +258,8 @@ public class SeekableRODIterator implements LocationAwareSeekableRODIterator {
"the iterator's current contig");
if ( interval.getContigIndex() == curr_contig ) {
if ( interval.getStart() < curr_position )
throw new StingException("Out of order query: query position "+interval.getStart()+" is located before "+
"the iterator's current position "+curr_position);
throw new StingException("Out of order query: query position "+interval +" is located before "+
"the iterator's current position "+curr_contig + ":" + curr_position);
if ( interval.getStop() < curr_query_end )
throw new StingException("Unsupported querying sequence: current query interval " +
interval+" ends before the end of previous query interval ("+curr_query_end+")");

View File

@ -0,0 +1,88 @@
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.gatk.refdata.tracks.RODRMDTrack;
import org.broadinstitute.sting.oneoffprojects.refdata.HapmapVCFROD;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
/**
* @author aaron
* <p/>
* Class RODTrackBuilder
* <p/>
* the builder for tracks of the current ROD system, a holdover until Tribble supports binary and multi-line formats
*/
public class RODTrackBuilder implements RMDTrackBuilder {
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
public static HashMap<String, ReferenceOrderedData.RODBinding> Types = new HashMap<String, ReferenceOrderedData.RODBinding>();
public static void addModule(final String name, final Class<? extends ReferenceOrderedDatum> rodType) {
final String boundName = name.toLowerCase();
if (Types.containsKey(boundName)) {
throw new RuntimeException(String.format("GATK BUG: adding ROD module %s that is already bound", boundName));
}
logger.info(String.format("* Adding rod class %s", name));
Types.put(boundName, new ReferenceOrderedData.RODBinding(name, rodType));
}
static {
// All known ROD types
addModule("GFF", RodGenotypeChipAsGFF.class);
//addModule("dbSNP", rodDbSNP.class);
addModule("HapMapAlleleFrequencies", HapMapAlleleFrequenciesROD.class);
addModule("SAMPileup", rodSAMPileup.class);
addModule("GELI", rodGELI.class);
addModule("RefSeq", rodRefSeq.class);
addModule("Table", TabularROD.class);
addModule("PooledEM", PooledEMSNPROD.class);
addModule("CleanedOutSNP", CleanedOutSNPROD.class);
addModule("Sequenom", SequenomROD.class);
addModule("SangerSNP", SangerSNPROD.class);
addModule("SimpleIndel", SimpleIndelROD.class);
addModule("PointIndel", PointIndelROD.class);
addModule("HapMapGenotype", HapMapGenotypeROD.class);
addModule("Intervals", IntervalRod.class);
addModule("Variants", RodGeliText.class);
addModule("GLF", RodGLF.class);
addModule("VCF", RodVCF.class);
addModule("PicardDbSNP", rodPicardDbSNP.class);
addModule("HapmapVCF", HapmapVCFROD.class);
addModule("Beagle", BeagleROD.class);
addModule("Plink", PlinkRod.class);
}
/**
* create a RMDTrack of the specified type
*
* @param targetClass the target class of track
* @param name what to call the track
* @param inputFile the input file
*
* @return an instance of the track
* @throws org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/
//@Override
public RMDTrack createInstanceOfTrack(Class targetClass, String name, File inputFile) throws RMDTrackCreationException {
return new RODRMDTrack(targetClass, name, inputFile, ReferenceOrderedData.parse1Binding(name,targetClass.getName(),inputFile.getAbsolutePath()));
}
/** @return a map of all available tracks we currently have access to create */
//@Override
public Map<String, Class> getAvailableTrackNamesAndTypes() {
Map<String, Class> ret = new HashMap<String, Class>();
for (ReferenceOrderedData.RODBinding binding: Types.values())
ret.put(binding.name, binding.type);
return ret;
}
}

View File

@ -77,13 +77,13 @@ public class ReadBasedReferenceOrderedViewTest extends BaseTest {
}
GenomeLoc start = GenomeLocParser.createGenomeLoc(0,0,0);
List<RMDDataState> list = new ArrayList<RMDDataState>();
list.add(new RMDDataState(null, new FakePeekingRODIterator(start)));
list.add(new RMDDataState(null, new FakePeekingRODIterator(start,"fakeName")));
ReadBasedReferenceOrderedView view = new ReadBasedReferenceOrderedView(new WindowedData(list));
for (SAMRecord rec : records) {
ReadMetaDataTracker tracker = view.getReferenceOrderedDataForRead(rec);
Map<Integer, Set<ReferenceOrderedDatum>> map = tracker.getPositionMapping();
for (Integer i : map.keySet()) {
Map<Long, Collection<ReferenceOrderedDatum>> map = tracker.getPositionMapping();
for (Long i : map.keySet()) {
Assert.assertEquals(1,map.get(i).size());
}
Assert.assertEquals(10,map.keySet().size());
@ -99,8 +99,9 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
// current location
private GenomeLoc location;
private ReadMetaDataTrackerTest.FakeRODatum curROD;
public FakePeekingRODIterator(GenomeLoc startingLoc) {
private final String name;
public FakePeekingRODIterator(GenomeLoc startingLoc, String name) {
this.name = name;
this.location = GenomeLocParser.createGenomeLoc(startingLoc.getContigIndex(),startingLoc.getStart()+1,startingLoc.getStop()+1);;
}
@ -130,7 +131,7 @@ class FakePeekingRODIterator implements LocationAwareSeekableRODIterator {
@Override
public RODRecordList next() {
System.err.println("Next -> " + location);
curROD = new ReadMetaDataTrackerTest.FakeRODatum(location);
curROD = new ReadMetaDataTrackerTest.FakeRODatum(location,name);
location = GenomeLocParser.createGenomeLoc(location.getContigIndex(),location.getStart()+1,location.getStop()+1);
FakeRODRecordList list = new FakeRODRecordList();
list.add(curROD);

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
@ -37,10 +38,7 @@ import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.TreeMap;
import java.util.*;
/**
@ -56,6 +54,7 @@ public class ReadMetaDataTrackerTest extends BaseTest {
private static int readCount = 100;
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
private static SAMFileHeader header;
private Set<String> nameSet;
@BeforeClass
public static void beforeClass() {
@ -65,15 +64,32 @@ public class ReadMetaDataTrackerTest extends BaseTest {
@Before
public void beforeEach() {
nameSet = new TreeSet<String>();
nameSet.add("default");
}
@Test
public void twoRodsAtEachReadBase() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (Long x : tracker.getPositionMapping().keySet()) {
count++;
Assert.assertEquals(2, tracker.getPositionMapping().get(x).size());
}
Assert.assertEquals(10, count);
}
@Test
public void rodAtEachReadBase() {
ReadMetaDataTracker tracker = getRMDT(1);
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (int x : tracker.getPositionMapping().keySet()) {
for (Long x : tracker.getPositionMapping().keySet()) {
count++;
Assert.assertEquals(1, tracker.getPositionMapping().get(x).size());
}
@ -81,12 +97,78 @@ public class ReadMetaDataTrackerTest extends BaseTest {
}
@Test
public void sparceRODsForRead() {
ReadMetaDataTracker tracker = getRMDT(7);
public void filterByName() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
for (int x : tracker.getPositionMapping().keySet()) {
Map<Long, Collection<ReferenceOrderedDatum>> map = tracker.getPositionMapping("default");
for (Long x : map.keySet()) {
count++;
Assert.assertEquals(1, map.get(x).size());
}
Assert.assertEquals(10, count);
}
@Test
public void filterByDupType() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
// count the positions
int count = 0;
Map<Long, Collection<ReferenceOrderedDatum>> map = tracker.getPositionMapping(FakeRODatum.class);
for (Long x : map.keySet()) {
count++;
Assert.assertEquals(2, map.get(x).size());
}
Assert.assertEquals(10, count);
}
// @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system
public void filterByMassiveDupType() {
for (int y = 0; y < 20; y++) {
nameSet.add("default" + String.valueOf(y));
long firstTime = System.currentTimeMillis();
for (int lp = 0; lp < 1000; lp++) {
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
// count the positions
int count = 0;
Map<Long, Collection<ReferenceOrderedDatum>> map = tracker.getPositionMapping(FakeRODatum.class);
for (Long x : map.keySet()) {
count++;
Assert.assertEquals(y + 2, map.get(x).size());
}
Assert.assertEquals(10, count);
}
System.err.println(y + " = " + (System.currentTimeMillis() - firstTime));
}
}
@Test
public void filterByType() {
nameSet.add("default2");
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
Map<Long, Collection<ReferenceOrderedDatum>> map = tracker.getPositionMapping(Fake2RODatum.class);
for (long x : map.keySet()) {
count++;
Assert.assertEquals(1, map.get(x).size());
}
Assert.assertEquals(10, count);
}
@Test
public void sparceRODsForRead() {
ReadMetaDataTracker tracker = getRMDT(7, nameSet, true);
// count the positions
int count = 0;
for (Long x : tracker.getPositionMapping().keySet()) {
count++;
Assert.assertEquals(1, tracker.getPositionMapping().get(x).size());
}
@ -95,7 +177,7 @@ public class ReadMetaDataTrackerTest extends BaseTest {
@Test
public void rodByGenomeLoc() {
ReadMetaDataTracker tracker = getRMDT(1);
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
// count the positions
int count = 0;
@ -106,32 +188,62 @@ public class ReadMetaDataTrackerTest extends BaseTest {
Assert.assertEquals(10, count);
}
private ReadMetaDataTracker getRMDT(int incr) {
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
TreeMap<Long, Set<ReferenceOrderedDatum>> data = new TreeMap<Long, Set<ReferenceOrderedDatum>>();
for (int x = 0; x < record.getAlignmentEnd(); x+=incr) {
/**
* create a ReadMetaDataTracker given:
*
* @param incr the spacing between site locations
* @param names the names of the reference ordered data to create: one will be created at every location for each name
*
* @return a ReadMetaDataTracker
*/
private ReadMetaDataTracker getRMDT(int incr, Set<String> names, boolean alternateTypes) {
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
TreeMap<Long, RODMetaDataContainer> data = new TreeMap<Long, RODMetaDataContainer>();
for (int x = 0; x < record.getAlignmentEnd(); x += incr) {
GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
Set<ReferenceOrderedDatum> set = new HashSet<ReferenceOrderedDatum>();
set.add(new FakeRODatum(loc));
data.put((long)record.getAlignmentStart() + x,set);
RODMetaDataContainer set = new RODMetaDataContainer();
int cnt = 0;
for (String name : names) {
if (alternateTypes)
set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name));
else
set.addEntry(new FakeRODatum(loc, name));
cnt++;
}
data.put((long) record.getAlignmentStart() + x, set);
}
ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data);
return tracker;
}
/**
* for testing, we want a fake rod with a different classname, for the get-by-class-name functions
*/
static public class Fake2RODatum extends FakeRODatum {
public Fake2RODatum(GenomeLoc location, String name) {
super(location, name);
}
}
/** for testing only */
static public class FakeRODatum implements ReferenceOrderedDatum {
final GenomeLoc location;
final String name;
public FakeRODatum(GenomeLoc location) {
public FakeRODatum(GenomeLoc location, String name) {
this.location = location;
this.name = name;
}
@Override
public String getName() {
return "false";
return name;
}
@Override

View File

@ -23,13 +23,18 @@
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
@ -57,10 +62,7 @@ public class RMDTrackManagerTest extends BaseTest {
@Test
public void testBuilderQuery() {
for (RMDTrack t : tracks) {
System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile());
int count = 0;
Iterator<GATKFeature> fIter;
@ -75,24 +77,62 @@ public class RMDTrackManagerTest extends BaseTest {
}
Assert.assertEquals(100, count);
}
}
@Test
public void testBuilderIterator() {
for (RMDTrack t : tracks) {
System.err.println("name = " + t.getName() + " type = " + t.getType().getSimpleName() + " file = " + t.getFile());
int count = 0;
Iterator<GATKFeature> fIter = null;
fIter = t.getIterator();
Iterator<GATKFeature> fIter = t.getIterator();
while (fIter.hasNext()) {
fIter.next();
count++;
}
Assert.assertEquals(100, count);
}
}
// @Test used only to determine how fast queries are, don't uncomment! (unless you know what you're doing).
public void testSpeedOfRealQuery() {
IndexedFastaSequenceFile file = null;
try {
file = new IndexedFastaSequenceFile(new File("/broad/1KG/reference/human_b36_both.fasta"));
} catch (FileNotFoundException e) {
Assert.assertTrue(false);
}
final int intervalSize = 10000000;
GenomeLocParser.setupRefContigOrdering(file.getSequenceDictionary());
RMDTrackManager manager = new RMDTrackManager();
// add our db snp data
triplets.clear();
triplets.add("db");
triplets.add("DBSNP");
triplets.add("../../GATK_Data/dbsnp_130_b36.rod");
Assert.assertEquals(1, manager.getReferenceMetaDataSources(triplets).size());
RMDTrack t = manager.getReferenceMetaDataSources(triplets).get(0);
// make sure we have a single track
// lets test the first and 20th contigs of the human reference
for (int loop = 1; loop <= 22; loop++) {
SAMSequenceRecord seqRec = GenomeLocParser.getContigInfo(String.valueOf(loop));
String name = seqRec.getSequenceName();
Iterator<GATKFeature> fIter;
for (int x = 1; x < seqRec.getSequenceLength() - intervalSize; x += intervalSize) {
long firstTime = System.currentTimeMillis();
long count = 0;
try {
fIter = ((FeatureReaderTrack) t).query("1", x, x + intervalSize);
} catch (IOException e) {
throw new StingException("blah I/O exception");
}
while (fIter.hasNext()) {
fIter.next();
count++;
}
System.err.println(name + "," + count + "," + (System.currentTimeMillis() - firstTime));
}
}
}
}