From 844cb2ed3319e741bfe13044032a8be98117c4e5 Mon Sep 17 00:00:00 2001 From: aaron Date: Tue, 29 Jun 2010 21:38:55 +0000 Subject: [PATCH] fixing a bug that Eric found with RODs for reads, where some records could be omitted. Sorry Eric! Also putting more tolerance into the timing on the tibble index tests (that check to make sure we're deleting out of date indexes, and not deleting perfectly good indexes). It seems that some of the farm nodes aren't great with a stopwatch. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3674 348d0f76-0448-11de-a6fe-93d51630548a --- .../ReadBasedReferenceOrderedView.java | 19 ++++++- .../walkers/ValidateRODForReads.java | 56 +++++++++++++++++++ .../TribbleRMDTrackBuilderUnitTest.java | 6 +- .../ValidateRODForReadsIntegrationTest.java | 36 ++++++++++++ 4 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java create mode 100644 java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index e213677ae..034472676 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; @@ -86,6 +87,11 @@ class WindowedData { // the provider; where we get all our information private final ShardDataProvider provider; + /** + * our log, which we want to capture anything from this class + */ + private static Logger logger = Logger.getLogger(WindowedData.class); + /** * create a WindowedData given a shard provider * @@ -102,10 +108,21 @@ class WindowedData { * @param rec the current read */ private void getStates(ShardDataProvider provider, SAMRecord rec) { + + long stop = Integer.MAX_VALUE; + // figure out the appropriate alignment stop + if (provider.hasReference()) { + stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength(); + } + + // calculate the range of positions we need to look at + GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), + rec.getAlignmentStart(), + stop); states = new ArrayList(); if (provider != null && provider.getReferenceOrderedData() != null) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) - states.add(new RMDDataState(dataSource, dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart())))); + states.add(new RMDDataState(dataSource, dataSource.seek(range))); } /** diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java new file mode 100644 index 000000000..e01272287 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * validate the rods for reads + */ +public class ValidateRODForReads extends ReadWalker { + // a mapping of the position to the count of rods + HashMap map = new LinkedHashMap(); + + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { + if (tracker != null) { + Map> mapping = tracker.getContigOffsetMapping(); + for (Map.Entry> entry : mapping.entrySet()) { + GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey()); + if (!map.containsKey(location)) { + map.put(location,0); + } + map.put(location,map.get(location)+1); + } + + return mapping.size(); + } + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + public void onTraversalDone(Integer result) { + out.println("[REDUCE RESULT] Traversal result is: " + result + " ROD entries seen"); + for (GenomeLoc location : map.keySet()) { + out.println(location + " -> " + map.get(location)); + } + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java index 63c22dd37..9f97fbf71 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java @@ -128,13 +128,13 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest { tmpIndex.deleteOnExit(); // sleep, to make sure the timestamps are different - Thread.sleep(1000); + Thread.sleep(2000); // copy the vcf (tribble) file to the tmp file location copyFile(tribbleFile,tmpFile); // sleep again, to make sure the timestamps are different (vcf vrs updated index file) - Thread.sleep(1000); + Thread.sleep(2000); return tmpFile; @@ -161,7 +161,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest { copyFile(tribbleFile,tmpFile); // sleep again, to make sure the timestamps are different (vcf vrs updated index file) - Thread.sleep(1000); + Thread.sleep(2000); // create a fake index, before we copy so it's out of date File tmpIndex = new File(tmpFile.getAbsolutePath() + ".idx"); diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java new file mode 100644 index 000000000..14411d19e --- /dev/null +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +/** + * check that we're getting the expected results from the RODs for reads for a variety of input types + */ +public class ValidateRODForReadsIntegrationTest extends WalkerTest { + + private final String vcfFile = validationDataLocation + "rodForReadsVCFCheck.vcf"; + private final String dbSNPFile = GATKDataLocation + "dbsnp_129_hg18.rod"; + + public static String baseTestString1KG() { + return "-T ValidateRODForReads -o %s -R testdata/exampleFASTA.fasta -I testdata/exampleBAM.bam"; + } + + + @Test + public void testSimpleVCFPileup() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString1KG() + " -B vcf,vcf," + vcfFile, 1, + Arrays.asList("f7919e9dc156fb5d3ad0541666864ea5")); + executeTest("testSimpleVCFPileup", spec); + } + + @Test + public void testSimpleDbSNPPileup() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString1KG() + " -B dbsnp,dbsnp," + dbSNPFile, 1, + Arrays.asList("c63b8ef9291a450f0519c73ac9cae189")); + executeTest("testSimpleDbSNPPileup", spec); + } +}