2010-03-03 23:56:44 +08:00
|
|
|
/*
|
|
|
|
|
* Copyright (c) 2010. The Broad Institute
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person
|
|
|
|
|
* obtaining a copy of this software and associated documentation
|
|
|
|
|
* files (the "Software"), to deal in the Software without
|
|
|
|
|
* restriction, including without limitation the rights to use,
|
|
|
|
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
* copies of the Software, and to permit persons to whom the
|
|
|
|
|
* Software is furnished to do so, subject to the following
|
|
|
|
|
* conditions:
|
|
|
|
|
*
|
|
|
|
|
* The above copyright notice and this permission notice shall be
|
|
|
|
|
* included in all copies or substantial portions of the Software.
|
|
|
|
|
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
|
|
|
* OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
package org.broadinstitute.sting.gatk.refdata;
|
|
|
|
|
|
|
|
|
|
import net.sf.samtools.SAMFileHeader;
|
|
|
|
|
import net.sf.samtools.SAMRecord;
|
2010-11-02 05:31:44 +08:00
|
|
|
import org.testng.Assert;
|
2010-03-03 23:56:44 +08:00
|
|
|
import org.broadinstitute.sting.BaseTest;
|
2010-03-12 05:13:39 +08:00
|
|
|
import org.broadinstitute.sting.gatk.datasources.providers.RODMetaDataContainer;
|
2010-04-01 06:39:56 +08:00
|
|
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
2010-03-03 23:56:44 +08:00
|
|
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
|
|
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
|
|
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
2010-11-02 05:31:44 +08:00
|
|
|
|
|
|
|
|
import org.testng.annotations.BeforeMethod;
|
|
|
|
|
|
|
|
|
|
import org.testng.annotations.BeforeClass;
|
|
|
|
|
import org.testng.annotations.Test;
|
2010-03-03 23:56:44 +08:00
|
|
|
|
2010-03-12 05:13:39 +08:00
|
|
|
import java.util.*;
|
2010-03-03 23:56:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* @author aaron
|
|
|
|
|
* <p/>
|
2010-04-08 14:14:15 +08:00
|
|
|
* Class ReadMetaDataTrackerUnitTest
|
2010-03-03 23:56:44 +08:00
|
|
|
* <p/>
|
|
|
|
|
* test out the ReadMetaDataTracker
|
|
|
|
|
*/
|
2010-04-08 14:14:15 +08:00
|
|
|
public class ReadMetaDataTrackerUnitTest extends BaseTest {
|
2010-03-03 23:56:44 +08:00
|
|
|
private static int startingChr = 1;
|
|
|
|
|
private static int endingChr = 2;
|
|
|
|
|
private static int readCount = 100;
|
|
|
|
|
private static int DEFAULT_READ_LENGTH = ArtificialSAMUtils.DEFAULT_READ_LENGTH;
|
|
|
|
|
private static SAMFileHeader header;
|
2010-03-12 05:13:39 +08:00
|
|
|
private Set<String> nameSet;
|
2010-03-03 23:56:44 +08:00
|
|
|
|
|
|
|
|
@BeforeClass
|
2010-11-02 05:31:44 +08:00
|
|
|
public void beforeClass() {
|
2010-03-03 23:56:44 +08:00
|
|
|
header = ArtificialSAMUtils.createArtificialSamHeader((endingChr - startingChr) + 1, startingChr, readCount + DEFAULT_READ_LENGTH);
|
|
|
|
|
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
|
|
|
|
}
|
|
|
|
|
|
2010-11-02 05:31:44 +08:00
|
|
|
@BeforeMethod
|
2010-03-03 23:56:44 +08:00
|
|
|
public void beforeEach() {
|
2010-03-12 05:13:39 +08:00
|
|
|
nameSet = new TreeSet<String>();
|
|
|
|
|
nameSet.add("default");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void twoRodsAtEachReadBase() {
|
|
|
|
|
nameSet.add("default2");
|
|
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
|
|
|
|
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-03-25 00:13:31 +08:00
|
|
|
for (Long x : tracker.getReadOffsetMapping().keySet()) {
|
2010-03-12 05:13:39 +08:00
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 2);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void rodAtEachReadBase() {
|
2010-03-12 05:13:39 +08:00
|
|
|
|
|
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
2010-03-03 23:56:44 +08:00
|
|
|
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-03-25 00:13:31 +08:00
|
|
|
for (Long x : tracker.getReadOffsetMapping().keySet()) {
|
2010-03-03 23:56:44 +08:00
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-12 05:13:39 +08:00
|
|
|
@Test
|
|
|
|
|
public void filterByName() {
|
|
|
|
|
nameSet.add("default2");
|
|
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
|
|
|
|
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-04-01 06:39:56 +08:00
|
|
|
Map<Long, Collection<GATKFeature>> map = tracker.getReadOffsetMapping("default");
|
2010-03-12 05:13:39 +08:00
|
|
|
for (Long x : map.keySet()) {
|
|
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(map.get(x).size(), 1);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void filterByDupType() {
|
|
|
|
|
nameSet.add("default2");
|
|
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-04-01 06:39:56 +08:00
|
|
|
Map<Long, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
|
2010-03-12 05:13:39 +08:00
|
|
|
for (Long x : map.keySet()) {
|
|
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(map.get(x).size(), 2);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// @Test this test can be uncommented to determine the speed impacts of any changes to the RODs for reads system
|
2010-03-25 00:13:31 +08:00
|
|
|
|
2010-03-12 05:13:39 +08:00
|
|
|
public void filterByMassiveDupType() {
|
|
|
|
|
|
|
|
|
|
for (int y = 0; y < 20; y++) {
|
|
|
|
|
nameSet.add("default" + String.valueOf(y));
|
|
|
|
|
long firstTime = System.currentTimeMillis();
|
|
|
|
|
for (int lp = 0; lp < 1000; lp++) {
|
|
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, false); // create both RODs of the same type
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-04-01 06:39:56 +08:00
|
|
|
Map<Long, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(FakeRODatum.class);
|
2010-03-12 05:13:39 +08:00
|
|
|
for (Long x : map.keySet()) {
|
|
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(map.get(x).size(), y + 2);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
|
|
|
|
System.err.println(y + " = " + (System.currentTimeMillis() - firstTime));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void filterByType() {
|
|
|
|
|
nameSet.add("default2");
|
|
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
|
|
|
|
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-04-01 06:39:56 +08:00
|
|
|
Map<Long, Collection<GATKFeature>> map = tracker.getReadOffsetMapping(Fake2RODatum.class);
|
2010-03-12 05:13:39 +08:00
|
|
|
for (long x : map.keySet()) {
|
|
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(map.get(x).size(), 1);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-12 05:13:39 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-03 23:56:44 +08:00
|
|
|
@Test
|
|
|
|
|
public void sparceRODsForRead() {
|
2010-03-12 05:13:39 +08:00
|
|
|
ReadMetaDataTracker tracker = getRMDT(7, nameSet, true);
|
2010-03-03 23:56:44 +08:00
|
|
|
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-03-25 00:13:31 +08:00
|
|
|
for (Long x : tracker.getReadOffsetMapping().keySet()) {
|
2010-03-03 23:56:44 +08:00
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(tracker.getReadOffsetMapping().get(x).size(), 1);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 2);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Test
|
|
|
|
|
public void rodByGenomeLoc() {
|
2010-03-12 05:13:39 +08:00
|
|
|
ReadMetaDataTracker tracker = getRMDT(1, nameSet, true);
|
2010-03-03 23:56:44 +08:00
|
|
|
|
|
|
|
|
// count the positions
|
|
|
|
|
int count = 0;
|
2010-03-25 00:13:31 +08:00
|
|
|
for (Long x : tracker.getContigOffsetMapping().keySet()) {
|
2010-03-03 23:56:44 +08:00
|
|
|
count++;
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(tracker.getContigOffsetMapping().get(x).size(), 1);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
Assert.assertEquals(count, 10);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
2010-03-12 05:13:39 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* create a ReadMetaDataTracker given:
|
|
|
|
|
*
|
|
|
|
|
* @param incr the spacing between site locations
|
|
|
|
|
* @param names the names of the reference ordered data to create: one will be created at every location for each name
|
|
|
|
|
*
|
|
|
|
|
* @return a ReadMetaDataTracker
|
|
|
|
|
*/
|
|
|
|
|
private ReadMetaDataTracker getRMDT(int incr, Set<String> names, boolean alternateTypes) {
|
|
|
|
|
SAMRecord record = ArtificialSAMUtils.createArtificialRead(header, "name", 0, 1, 10);
|
|
|
|
|
TreeMap<Long, RODMetaDataContainer> data = new TreeMap<Long, RODMetaDataContainer>();
|
|
|
|
|
for (int x = 0; x < record.getAlignmentEnd(); x += incr) {
|
2010-03-03 23:56:44 +08:00
|
|
|
GenomeLoc loc = GenomeLocParser.createGenomeLoc(record.getReferenceIndex(), record.getAlignmentStart() + x, record.getAlignmentStart() + x);
|
2010-03-12 05:13:39 +08:00
|
|
|
RODMetaDataContainer set = new RODMetaDataContainer();
|
|
|
|
|
|
|
|
|
|
int cnt = 0;
|
|
|
|
|
for (String name : names) {
|
|
|
|
|
if (alternateTypes)
|
|
|
|
|
set.addEntry((cnt % 2 == 0) ? new FakeRODatum(loc, name) : new Fake2RODatum(loc, name));
|
|
|
|
|
else
|
|
|
|
|
set.addEntry(new FakeRODatum(loc, name));
|
|
|
|
|
cnt++;
|
|
|
|
|
}
|
|
|
|
|
data.put((long) record.getAlignmentStart() + x, set);
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
ReadMetaDataTracker tracker = new ReadMetaDataTracker(record, data);
|
|
|
|
|
return tracker;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2010-03-25 00:13:31 +08:00
|
|
|
/** for testing, we want a fake rod with a different classname, for the get-by-class-name functions */
|
2010-03-12 05:13:39 +08:00
|
|
|
static public class Fake2RODatum extends FakeRODatum {
|
|
|
|
|
|
|
|
|
|
public Fake2RODatum(GenomeLoc location, String name) {
|
|
|
|
|
super(location, name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2010-03-03 23:56:44 +08:00
|
|
|
/** for testing only */
|
2010-04-01 06:39:56 +08:00
|
|
|
static public class FakeRODatum extends GATKFeature {
|
2010-03-03 23:56:44 +08:00
|
|
|
|
|
|
|
|
final GenomeLoc location;
|
2010-03-12 05:13:39 +08:00
|
|
|
final String name;
|
2010-03-03 23:56:44 +08:00
|
|
|
|
2010-03-12 05:13:39 +08:00
|
|
|
public FakeRODatum(GenomeLoc location, String name) {
|
2010-04-01 06:39:56 +08:00
|
|
|
super(name);
|
2010-03-03 23:56:44 +08:00
|
|
|
this.location = location;
|
2010-03-12 05:13:39 +08:00
|
|
|
this.name = name;
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
|
|
public String getName() {
|
2010-03-12 05:13:39 +08:00
|
|
|
return name;
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
2010-04-01 06:39:56 +08:00
|
|
|
public GenomeLoc getLocation() {
|
|
|
|
|
return this.location;
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
2010-04-01 06:39:56 +08:00
|
|
|
public Object getUnderlyingObject() {
|
|
|
|
|
return null; //To change body of implemented methods use File | Settings | File Templates.
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
2010-04-01 06:39:56 +08:00
|
|
|
public String getChr() {
|
|
|
|
|
return location.getContig();
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
2010-04-01 06:39:56 +08:00
|
|
|
public int getStart() {
|
|
|
|
|
return (int)this.location.getStart();
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@Override
|
2010-04-01 06:39:56 +08:00
|
|
|
public int getEnd() {
|
|
|
|
|
return (int)this.location.getStop();
|
2010-03-03 23:56:44 +08:00
|
|
|
}
|
|
|
|
|
}
|
2010-11-02 05:31:44 +08:00
|
|
|
}
|