fixing a bug that Eric found with RODs for reads, where some records could be omitted. Sorry Eric!
Also putting more tolerance into the timing on the tibble index tests (that check to make sure we're deleting out of date indexes, and not deleting perfectly good indexes). It seems that some of the farm nodes aren't great with a stopwatch. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3674 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
101c27294d
commit
844cb2ed33
|
|
@ -24,6 +24,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
|
|
@ -86,6 +87,11 @@ class WindowedData {
|
|||
// the provider; where we get all our information
|
||||
private final ShardDataProvider provider;
|
||||
|
||||
/**
|
||||
* our log, which we want to capture anything from this class
|
||||
*/
|
||||
private static Logger logger = Logger.getLogger(WindowedData.class);
|
||||
|
||||
/**
|
||||
* create a WindowedData given a shard provider
|
||||
*
|
||||
|
|
@ -102,10 +108,21 @@ class WindowedData {
|
|||
* @param rec the current read
|
||||
*/
|
||||
private void getStates(ShardDataProvider provider, SAMRecord rec) {
|
||||
|
||||
long stop = Integer.MAX_VALUE;
|
||||
// figure out the appropriate alignment stop
|
||||
if (provider.hasReference()) {
|
||||
stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength();
|
||||
}
|
||||
|
||||
// calculate the range of positions we need to look at
|
||||
GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(),
|
||||
rec.getAlignmentStart(),
|
||||
stop);
|
||||
states = new ArrayList<RMDDataState>();
|
||||
if (provider != null && provider.getReferenceOrderedData() != null)
|
||||
for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData())
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart()))));
|
||||
states.add(new RMDDataState(dataSource, dataSource.seek(range)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -0,0 +1,56 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* validate the rods for reads
|
||||
*/
|
||||
public class ValidateRODForReads extends ReadWalker<Integer, Integer> {
|
||||
// a mapping of the position to the count of rods
|
||||
HashMap<GenomeLoc, Integer> map = new LinkedHashMap<GenomeLoc, Integer>();
|
||||
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
|
||||
if (tracker != null) {
|
||||
Map<Long, Collection<GATKFeature>> mapping = tracker.getContigOffsetMapping();
|
||||
for (Map.Entry<Long, Collection<GATKFeature>> entry : mapping.entrySet()) {
|
||||
GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey());
|
||||
if (!map.containsKey(location)) {
|
||||
map.put(location,0);
|
||||
}
|
||||
map.put(location,map.get(location)+1);
|
||||
}
|
||||
|
||||
return mapping.size();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return sum + value;
|
||||
}
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
out.println("[REDUCE RESULT] Traversal result is: " + result + " ROD entries seen");
|
||||
for (GenomeLoc location : map.keySet()) {
|
||||
out.println(location + " -> " + map.get(location));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -128,13 +128,13 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
|
|||
tmpIndex.deleteOnExit();
|
||||
|
||||
// sleep, to make sure the timestamps are different
|
||||
Thread.sleep(1000);
|
||||
Thread.sleep(2000);
|
||||
|
||||
// copy the vcf (tribble) file to the tmp file location
|
||||
copyFile(tribbleFile,tmpFile);
|
||||
|
||||
// sleep again, to make sure the timestamps are different (vcf vrs updated index file)
|
||||
Thread.sleep(1000);
|
||||
Thread.sleep(2000);
|
||||
|
||||
return tmpFile;
|
||||
|
||||
|
|
@ -161,7 +161,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest {
|
|||
copyFile(tribbleFile,tmpFile);
|
||||
|
||||
// sleep again, to make sure the timestamps are different (vcf vrs updated index file)
|
||||
Thread.sleep(1000);
|
||||
Thread.sleep(2000);
|
||||
|
||||
// create a fake index, before we copy so it's out of date
|
||||
File tmpIndex = new File(tmpFile.getAbsolutePath() + ".idx");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
package org.broadinstitute.sting.oneoffprojects.walkers;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* check that we're getting the expected results from the RODs for reads for a variety of input types
|
||||
*/
|
||||
public class ValidateRODForReadsIntegrationTest extends WalkerTest {
|
||||
|
||||
private final String vcfFile = validationDataLocation + "rodForReadsVCFCheck.vcf";
|
||||
private final String dbSNPFile = GATKDataLocation + "dbsnp_129_hg18.rod";
|
||||
|
||||
public static String baseTestString1KG() {
|
||||
return "-T ValidateRODForReads -o %s -R testdata/exampleFASTA.fasta -I testdata/exampleBAM.bam";
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testSimpleVCFPileup() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString1KG() + " -B vcf,vcf," + vcfFile, 1,
|
||||
Arrays.asList("f7919e9dc156fb5d3ad0541666864ea5"));
|
||||
executeTest("testSimpleVCFPileup", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleDbSNPPileup() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString1KG() + " -B dbsnp,dbsnp," + dbSNPFile, 1,
|
||||
Arrays.asList("c63b8ef9291a450f0519c73ac9cae189"));
|
||||
executeTest("testSimpleDbSNPPileup", spec);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue