diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java index e213677ae..034472676 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/providers/ReadBasedReferenceOrderedView.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.providers; import net.sf.samtools.SAMRecord; +import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; @@ -86,6 +87,11 @@ class WindowedData { // the provider; where we get all our information private final ShardDataProvider provider; + /** + * our log, which we want to capture anything from this class + */ + private static Logger logger = Logger.getLogger(WindowedData.class); + /** * create a WindowedData given a shard provider * @@ -102,10 +108,21 @@ class WindowedData { * @param rec the current read */ private void getStates(ShardDataProvider provider, SAMRecord rec) { + + long stop = Integer.MAX_VALUE; + // figure out the appropriate alignment stop + if (provider.hasReference()) { + stop = provider.getReference().getSequenceDictionary().getSequence(rec.getReferenceIndex()).getSequenceLength(); + } + + // calculate the range of positions we need to look at + GenomeLoc range = GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), + rec.getAlignmentStart(), + stop); states = new ArrayList(); if (provider != null && provider.getReferenceOrderedData() != null) for (ReferenceOrderedDataSource dataSource : provider.getReferenceOrderedData()) - states.add(new RMDDataState(dataSource, dataSource.seek(GenomeLocParser.createGenomeLoc(rec.getReferenceIndex(), rec.getAlignmentStart())))); + states.add(new RMDDataState(dataSource, dataSource.seek(range))); } /** diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java new file mode 100644 index 000000000..e01272287 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReads.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; +import org.broadinstitute.sting.gatk.walkers.ReadWalker; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.Collection; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * validate the rods for reads + */ +public class ValidateRODForReads extends ReadWalker { + // a mapping of the position to the count of rods + HashMap map = new LinkedHashMap(); + + @Override + public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) { + if (tracker != null) { + Map> mapping = tracker.getContigOffsetMapping(); + for (Map.Entry> entry : mapping.entrySet()) { + GenomeLoc location = GenomeLocParser.createGenomeLoc(read.getReferenceIndex(),entry.getKey()); + if (!map.containsKey(location)) { + map.put(location,0); + } + map.put(location,map.get(location)+1); + } + + return mapping.size(); + } + return 0; + } + + @Override + public Integer reduceInit() { + return 0; + } + + @Override + public Integer reduce(Integer value, Integer sum) { + return sum + value; + } + + public void onTraversalDone(Integer result) { + out.println("[REDUCE RESULT] Traversal result is: " + result + " ROD entries seen"); + for (GenomeLoc location : map.keySet()) { + out.println(location + " -> " + map.get(location)); + } + } +} diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java index 63c22dd37..9f97fbf71 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java @@ -128,13 +128,13 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest { tmpIndex.deleteOnExit(); // sleep, to make sure the timestamps are different - Thread.sleep(1000); + Thread.sleep(2000); // copy the vcf (tribble) file to the tmp file location copyFile(tribbleFile,tmpFile); // sleep again, to make sure the timestamps are different (vcf vrs updated index file) - Thread.sleep(1000); + Thread.sleep(2000); return tmpFile; @@ -161,7 +161,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest { copyFile(tribbleFile,tmpFile); // sleep again, to make sure the timestamps are different (vcf vrs updated index file) - Thread.sleep(1000); + Thread.sleep(2000); // create a fake index, before we copy so it's out of date File tmpIndex = new File(tmpFile.getAbsolutePath() + ".idx"); diff --git a/java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java new file mode 100644 index 000000000..14411d19e --- /dev/null +++ b/java/test/org/broadinstitute/sting/oneoffprojects/walkers/ValidateRODForReadsIntegrationTest.java @@ -0,0 +1,36 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +/** + * check that we're getting the expected results from the RODs for reads for a variety of input types + */ +public class ValidateRODForReadsIntegrationTest extends WalkerTest { + + private final String vcfFile = validationDataLocation + "rodForReadsVCFCheck.vcf"; + private final String dbSNPFile = GATKDataLocation + "dbsnp_129_hg18.rod"; + + public static String baseTestString1KG() { + return "-T ValidateRODForReads -o %s -R testdata/exampleFASTA.fasta -I testdata/exampleBAM.bam"; + } + + + @Test + public void testSimpleVCFPileup() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString1KG() + " -B vcf,vcf," + vcfFile, 1, + Arrays.asList("f7919e9dc156fb5d3ad0541666864ea5")); + executeTest("testSimpleVCFPileup", spec); + } + + @Test + public void testSimpleDbSNPPileup() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString1KG() + " -B dbsnp,dbsnp," + dbSNPFile, 1, + Arrays.asList("c63b8ef9291a450f0519c73ac9cae189")); + executeTest("testSimpleDbSNPPileup", spec); + } +}