fixing a bug that Eric found with RODs for reads, where some records could be omitted. Sorry Eric!

Also putting more tolerance into the timing on the tibble index tests (that check to make sure we're deleting out of date indexes, and not deleting perfectly good indexes).  It seems that some of the farm nodes aren't great with a stopwatch.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3674 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-06-29 21:38:55 +00:00
parent 101c27294d
commit 844cb2ed33
4 changed files with 113 additions and 4 deletions

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
@ -86,6 +87,11 @@ class WindowedData {
// the provider; where we get all our information
private final ShardDataProvider provider;
/**
* our log, which we want to capture anything from this class
*/
private static Logger logger = Logger.getLogger(WindowedData.class);
/**
* create a WindowedData given a shard provider
*
@ -102,10 +108,21 @@ class WindowedData {
* @param rec the current read
*/
private void getStates(ShardDataProvider provider, SAMRecord rec) {
long stop = Integer.MAX_VALUE;
// figure out the appropriate alignment stop
if (provider.hasReference()) {
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
}
// calculate the range of positions we need to look at
GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(),
rec.getAlignmentStart(),
stop);
states = new ArrayList<RMDDataState>();
if (provider != null && provider.getReferenceOrderedData() != null)
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
states.add(new RMDDataState(dataSource, dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))));
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
}
/**

View File

@ -0,0 +1,56 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* validate the rods for reads
*/
public class ValidateRODForReads extends ReadWalker<Integer, Integer> {
// a mapping of the position to the count of rods
HashMap<GenomeLoc, Integer> map = new LinkedHashMap<GenomeLoc, Integer>();
@Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
if (tracker != null) {
Map<Long, Collection<GATKFeature>> mapping = tracker.getContigOffsetMapping();
for (Map.Entry<Long, Collection<GATKFeature>> entry : mapping.entrySet()) {
GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey());
if (!map.containsKey(location)) {
map.put(location,0);
}
map.put(location,map.get(location)+1);
}
return mapping.size();
}
return 0;
}
@Override
public Integer reduceInit() {
return 0;
}
@Override
public Integer reduce(Integer value, Integer sum) {
return sum + value;
}
public void onTraversalDone(Integer result) {
out.println("[REDUCE RESULT] Traversal result is: " + result + " ROD entries seen");
for (GenomeLoc location : map.keySet()) {
out.println(location + " -> " + map.get(location));
}
}
}

View File

@ -128,13 +128,13 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
tmpIndex.deleteOnExit();
// sleep, to make sure the timestamps are different
Thread.sleep(1000);
Thread.sleep(2000);
// copy the vcf (tribble) file to the tmp file location
copyFile(tribbleFile,tmpFile);
// sleep again, to make sure the timestamps are different (vcf vrs updated index file)
Thread.sleep(1000);
Thread.sleep(2000);
return tmpFile;
@ -161,7 +161,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
copyFile(tribbleFile,tmpFile);
// sleep again, to make sure the timestamps are different (vcf vrs updated index file)
Thread.sleep(1000);
Thread.sleep(2000);
// create a fake index, before we copy so it's out of date
File tmpIndex = new File(tmpFile.getAbsolutePath() + ".idx");

View File

@ -0,0 +1,36 @@
package org.broadinstitute.sting.oneoffprojects.walkers;
import org.broadinstitute.sting.WalkerTest;
import org.junit.Test;
import java.util.Arrays;
/**
* check that we're getting the expected results from the RODs for reads for a variety of input types
*/
public class ValidateRODForReadsIntegrationTest extends WalkerTest {
private final String vcfFile = validationDataLocation + "rodForReadsVCFCheck.vcf";
private final String dbSNPFile = GATKDataLocation + "dbsnp_129_hg18.rod";
public static String baseTestString1KG() {
return "-T ValidateRODForReads -o %s -R testdata/exampleFASTA.fasta -I testdata/exampleBAM.bam";
}
@Test
public void testSimpleVCFPileup() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString1KG() + " -B vcf,vcf," + vcfFile, 1,
Arrays.asList("f7919e9dc156fb5d3ad0541666864ea5"));
executeTest("testSimpleVCFPileup", spec);
}
@Test
public void testSimpleDbSNPPileup() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString1KG() + " -B dbsnp,dbsnp," + dbSNPFile, 1,
Arrays.asList("c63b8ef9291a450f0519c73ac9cae189"));
executeTest("testSimpleDbSNPPileup", spec);
}
}