diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index ba5577730..84b8e39d3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -52,6 +52,7 @@ import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation; import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.samples.SampleDBBuilder; +import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.classloader.PluginManager; @@ -842,6 +843,8 @@ public class GenomeAnalysisEngine { if (argCollection.keepProgramRecords) removeProgramRecords = false; + final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker && TraverseActiveRegions.KEEP_READS_IN_LIBS; + return new SAMDataSource( samReaderIDs, threadAllocation, @@ -856,7 +859,8 @@ public class GenomeAnalysisEngine { readTransformers, includeReadsWithDeletionAtLoci(), argCollection.defaultBaseQualities, - removeProgramRecords); + removeProgramRecords, + keepReadsInLIBS); } /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java index 409b08e5d..1ca0a8a46 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java +++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java @@ -61,6 +61,7 @@ public class ReadProperties { private final ValidationExclusion exclusionList; private final Collection supplementalFilters; private final List readTransformers; + private final boolean keepUniqueReadListInLIBS; private final boolean includeReadsWithDeletionAtLoci; private final boolean useOriginalBaseQualities; private final byte defaultBaseQualities; @@ -74,6 +75,10 @@ public class ReadProperties { return includeReadsWithDeletionAtLoci; } + public boolean keepUniqueReadListInLIBS() { + return keepUniqueReadListInLIBS; + } + /** * Gets a list of the files acting as sources of reads. * @return A list of files storing reads data. @@ -161,6 +166,8 @@ public class ReadProperties { * will explicitly list reads with deletion over the current reference base; otherwise, only observed * bases will be seen in the pileups, and the deletions will be skipped silently. * @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality. + * @param keepUniqueReadListInLIBS If true, we will tell LocusIteratorByState to track the unique reads it sees + * This is really useful for ActiveRegionTraversals */ public ReadProperties( Collection samFiles, SAMFileHeader header, @@ -172,7 +179,8 @@ public class ReadProperties { Collection supplementalFilters, List readTransformers, boolean includeReadsWithDeletionAtLoci, - byte defaultBaseQualities) { + byte defaultBaseQualities, + final boolean keepUniqueReadListInLIBS) { this.readers = samFiles; this.header = header; this.sortOrder = sortOrder; @@ -184,5 +192,6 @@ public class ReadProperties { this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.useOriginalBaseQualities = useOriginalBaseQualities; this.defaultBaseQualities = defaultBaseQualities; + this.keepUniqueReadListInLIBS = keepUniqueReadListInLIBS; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index cb47ffe4c..c9a3b0df0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -158,6 +158,9 @@ public class SAMDataSource { /** * Create a new SAM data source given the supplied read metadata. + * + * For testing purposes + * * @param samFiles list of reads files. */ public SAMDataSource(Collection samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) { @@ -177,6 +180,8 @@ public class SAMDataSource { /** * See complete constructor. Does not enable BAQ by default. + * + * For testing purposes */ public SAMDataSource( Collection samFiles, @@ -203,6 +208,7 @@ public class SAMDataSource { Collections.emptyList(), includeReadsWithDeletionAtLoci, (byte) -1, + false, false); } @@ -219,6 +225,7 @@ public class SAMDataSource { * will explicitly list reads with deletion over the current reference base; otherwise, only observed * bases will be seen in the pileups, and the deletions will be skipped silently. * @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality. + * @param keepReadsInLIBS should we keep a unique list of reads in LIBS? */ public SAMDataSource( Collection samFiles, @@ -234,7 +241,8 @@ public class SAMDataSource { List readTransformers, boolean includeReadsWithDeletionAtLoci, byte defaultBaseQualities, - boolean removeProgramRecords) { + boolean removeProgramRecords, + final boolean keepReadsInLIBS) { this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -306,7 +314,8 @@ public class SAMDataSource { supplementalFilters, readTransformers, includeReadsWithDeletionAtLoci, - defaultBaseQualities); + defaultBaseQualities, + keepReadsInLIBS); // cache the read group id (original) -> read group id (merged) // and read group id (merged) -> read group id (original) mappings. diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 34fa704c1..2d439544d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -51,6 +51,11 @@ import java.util.*; */ public class TraverseActiveRegions extends TraversalEngine,LocusShardDataProvider> { + // TODO + // TODO -- remove me when ART uses the LIBS traversal + // TODO + public static final boolean KEEP_READS_IN_LIBS = false; + /** * our log, which we want to capture anything from this class */ diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java index 82e22efa7..bb88a1e75 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByState.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.utils.locusiterator; +import com.google.java.contract.Ensures; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; import net.sf.samtools.SAMRecord; @@ -63,7 +64,6 @@ public class LocusIteratorByState extends LocusIterator { private final GenomeLocParser genomeLocParser; private final ArrayList samples; private final ReadStateManager readStates; - private final boolean keepSubmittedReads; private final boolean includeReadsWithDeletionAtLoci; private AlignmentContext nextAlignmentContext; @@ -82,19 +82,20 @@ public class LocusIteratorByState extends LocusIterator { toDownsamplingInfo(readInformation), readInformation.includeReadsWithDeletionAtLoci(), genomeLocParser, - samples); + samples, + readInformation.keepUniqueReadListInLIBS()); } protected LocusIteratorByState(final Iterator samIterator, final LIBSDownsamplingInfo downsamplingInfo, final boolean includeReadsWithDeletionAtLoci, final GenomeLocParser genomeLocParser, - final Collection samples) { + final Collection samples, + final boolean maintainUniqueReadsList ) { this.includeReadsWithDeletionAtLoci = includeReadsWithDeletionAtLoci; this.genomeLocParser = genomeLocParser; this.samples = new ArrayList(samples); - this.keepSubmittedReads = false; // TODO -- HOOK UP SYSTEM - this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, keepSubmittedReads); + this.readStates = new ReadStateManager(samIterator, this.samples, downsamplingInfo, maintainUniqueReadsList); // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when // there's no read data. So we need to throw this error only when samIterator.hasNext() is true @@ -237,6 +238,51 @@ public class LocusIteratorByState extends LocusIterator { } } + // ----------------------------------------------------------------------------------------------------------------- + // + // getting the list of reads + // + // ----------------------------------------------------------------------------------------------------------------- + + /** + * Transfer current list of all unique reads that have ever been used in any pileup, clearing old list + * + * This list is guaranteed to only contain unique reads, even across calls to the this function. It is + * literally the unique set of reads ever seen. + * + * The list occurs in the same order as they are encountered in the underlying iterator. + * + * Takes the maintained list of submitted reads, and transfers it to the caller of this + * function. The old list of set to a new, cleanly allocated list so the caller officially + * owns the list returned by this call. This is the only way to clear the tracking + * of submitted reads, if enabled. + * + * The purpose of this function is allow users of LIBS to keep track of all of the reads pulled off the + * underlying SAMRecord iterator and that appeared at any point in the list of SAMRecordAlignmentState for + * any reads. This function is intended to allow users to efficiently reconstruct the unique set of reads + * used across all pileups. This is necessary for LIBS to handle because attempting to do + * so from the pileups coming out of LIBS is extremely expensive. + * + * This functionality is only available if LIBS was created with the argument to track the reads + * + * @throws UnsupportedOperationException if called when keepingSubmittedReads is false + * + * @return the current list + */ + @Ensures("result != null") + public List transferReadsFromAllPreviousPileups() { + return readStates.transferSubmittedReads(); + } + + /** + * Get the underlying list of tracked reads. For testing only + * @return a non-null list + */ + @Ensures("result != null") + protected List getReadsFromAllPreviousPileups() { + return readStates.getSubmittedReads(); + } + // ----------------------------------------------------------------------------------------------------------------- // // utility functions diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java index 9400b5cf5..b650bf21f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java @@ -206,7 +206,7 @@ class ReadStateManager { * interact with ReadStateManager in some way to make work unit * readsUsedInPileup = transferSubmittedReads) * - * @throws UnsupportedOperationException if called when keepingSubmittedReads is false + * @throws UnsupportedOperationException if called when keepSubmittedReads is false * * @return the current list of submitted reads */ @@ -223,6 +223,14 @@ class ReadStateManager { return prevSubmittedReads; } + /** + * Are we keeping submitted reads, or not? + * @return true if we are keeping them, false otherwise + */ + public boolean isKeepingSubmittedReads() { + return keepSubmittedReads; + } + /** * Obtain a pointer to the list of submitted reads. * @@ -232,11 +240,11 @@ class ReadStateManager { * * For testing purposes only. * - * Will always be empty if we are are not keepingSubmittedReads + * Will always be empty if we are are not keepSubmittedReads * * @return a non-null list of reads that have been submitted to this ReadStateManager */ - @Ensures({"result != null","keepingSubmittedReads || result.isEmpty()"}) + @Ensures({"result != null","keepSubmittedReads || result.isEmpty()"}) protected List getSubmittedReads() { return submittedReads; } diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java index 9db9f4b8e..82001cf26 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java @@ -335,16 +335,17 @@ public class ArtificialSAMUtils { * @return a collection of stackSize reads all sharing the above properties */ public static List createReadStream( final int nReadsPerLocus, - final int nLoci, - final SAMFileHeader header, - final int alignmentStart, - final int length ) { - final String name = "readName"; + final int nLoci, + final SAMFileHeader header, + final int alignmentStart, + final int length ) { + final String baseName = "read"; List reads = new ArrayList(nReadsPerLocus*nLoci); for ( int locus = 0; locus < nLoci; locus++ ) { for ( int readI = 0; readI < nReadsPerLocus; readI++ ) { for ( final SAMReadGroupRecord rg : header.getReadGroups() ) { - final GATKSAMRecord read = createArtificialRead(header, name, 0, alignmentStart, length); + final String readName = String.format("%s.%d.%d.%s", baseName, locus, readI, rg.getId()); + final GATKSAMRecord read = createArtificialRead(header, readName, 0, alignmentStart + locus, length); read.setReadGroup(new GATKSAMReadGroupRecord(rg)); reads.add(read); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java index 8109fb61e..461bbe37b 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java @@ -80,7 +80,8 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark { Collections.emptyList(), Collections.emptyList(), false, - (byte)0); + (byte)0, + false); GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary()); // Filter unmapped reads. TODO: is this always strictly necessary? Who in the GATK normally filters these out? diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 15e86f30e..23720e60d 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -182,7 +182,8 @@ public class SAMDataSourceUnitTest extends BaseTest { Collections.emptyList(), false, (byte) -1, - removeProgramRecords); + removeProgramRecords, + false); List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); @@ -201,7 +202,8 @@ public class SAMDataSourceUnitTest extends BaseTest { Collections.emptyList(), false, (byte) -1, - removeProgramRecords); + removeProgramRecords, + false); List doRemoveProgramRecords = data.getHeader().getProgramRecords(); assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java index e02aa7a48..448b3489e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -82,10 +82,10 @@ public class LocusIteratorByStateBaseTest extends BaseTest { } protected static ReadProperties createTestReadProperties() { - return createTestReadProperties(null); + return createTestReadProperties(null, false); } - protected static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) { + protected static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod, final boolean keepReads ) { return new ReadProperties( Collections.emptyList(), new SAMFileHeader(), @@ -97,8 +97,8 @@ public class LocusIteratorByStateBaseTest extends BaseTest { Collections.emptyList(), Collections.emptyList(), false, - (byte) -1 - ); + (byte) -1, + keepReads); } protected static class FakeCloseableIterator implements CloseableIterator { diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java index 6f407f613..29d7c0d9a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateUnitTest.java @@ -28,6 +28,8 @@ package org.broadinstitute.sting.utils.locusiterator; import net.sf.samtools.*; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.downsampling.DownsampleType; +import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; import org.broadinstitute.sting.utils.NGSPlatform; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -345,11 +347,20 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { public Object[][] makeLIBSKeepSubmittedReads() { final List tests = new LinkedList(); - for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) { - for ( final int nLoci : Arrays.asList(1, 10, 100, 1000) ) { - for ( final int nSamples : Arrays.asList(1, 2, 100) ) { - for ( final boolean keepReads : Arrays.asList(true, false) ) { - tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads}); + for ( final boolean doSampling : Arrays.asList(true, false) ) { + for ( final int nReadsPerLocus : Arrays.asList(1, 10) ) { + for ( final int nLoci : Arrays.asList(1, 10, 25) ) { + for ( final int nSamples : Arrays.asList(1, 2, 10) ) { + for ( final boolean keepReads : Arrays.asList(true, false) ) { + for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true, false) ) { +// for ( final int nReadsPerLocus : Arrays.asList(1) ) { +// for ( final int nLoci : Arrays.asList(10) ) { +// for ( final int nSamples : Arrays.asList(1) ) { +// for ( final boolean keepReads : Arrays.asList(true) ) { +// for ( final boolean grabReadsAfterEachCycle : Arrays.asList(true) ) { + tests.add(new Object[]{nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, doSampling}); + } + } } } } @@ -358,27 +369,117 @@ public class LocusIteratorByStateUnitTest extends LocusIteratorByStateBaseTest { return tests.toArray(new Object[][]{}); } - @Test(enabled = false, dataProvider = "LIBSKeepSubmittedReads") - public void testLIBSKeepSubmittedReads(final int nReadsPerLocus, final int nLoci, final int nSamples, final boolean keepReads) { + @Test(enabled = true, dataProvider = "LIBSKeepSubmittedReads") + public void testLIBSKeepSubmittedReads(final int nReadsPerLocus, + final int nLoci, + final int nSamples, + final boolean keepReads, + final boolean grabReadsAfterEachCycle, + final boolean downsample) { + logger.warn(String.format("testLIBSKeepSubmittedReads %d %d %d %b %b %b", nReadsPerLocus, nLoci, nSamples, keepReads, grabReadsAfterEachCycle, downsample)); final int readLength = 10; final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100000); + final List samples = new ArrayList(nSamples); for ( int i = 0; i < nSamples; i++ ) { final GATKSAMReadGroupRecord rg = new GATKSAMReadGroupRecord("rg" + i); - rg.setSample("sample" + i); + final String sample = "sample" + i; + samples.add(sample); + rg.setSample(sample); rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform()); header.addReadGroup(rg); } + final int maxCoveragePerSampleAtLocus = nReadsPerLocus * readLength / 2; + final int maxDownsampledCoverage = Math.max(maxCoveragePerSampleAtLocus / 2, 1); + final DownsamplingMethod downsampler = downsample + ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, maxDownsampledCoverage, null, false) + : new DownsamplingMethod(DownsampleType.NONE, null, null, false); final List reads = ArtificialSAMUtils.createReadStream(nReadsPerLocus, nLoci, header, 1, readLength); - li = makeLTBS(reads, createTestReadProperties()); + li = new LocusIteratorByState(new FakeCloseableIterator(reads.iterator()), + createTestReadProperties(downsampler, keepReads), + genomeLocParser, + samples); + final Set seenSoFar = new HashSet(); + final Set keptReads = new HashSet(); int bpVisited = 0; while ( li.hasNext() ) { bpVisited++; + final AlignmentContext alignmentContext = li.next(); + final ReadBackedPileup p = alignmentContext.getBasePileup(); + + if ( downsample ) { + // just not a safe test + //Assert.assertTrue(p.getNumberOfElements() <= maxDownsampledCoverage * nSamples, "Too many reads at locus after downsampling"); + } else { + final int minPileupSize = nReadsPerLocus * nSamples; + Assert.assertTrue(p.getNumberOfElements() >= minPileupSize); + } + + seenSoFar.addAll(p.getReads()); + if ( keepReads && grabReadsAfterEachCycle ) { + final List locusReads = li.transferReadsFromAllPreviousPileups(); + + // the number of reads starting here + int nReadsStartingHere = 0; + for ( final SAMRecord read : p.getReads() ) + if ( read.getAlignmentStart() == alignmentContext.getPosition() ) + nReadsStartingHere++; + + if ( downsample ) + // with downsampling we might have some reads here that were downsampled away + // in the pileup + Assert.assertTrue(locusReads.size() >= nReadsStartingHere); + else + Assert.assertEquals(locusReads.size(), nReadsStartingHere); + keptReads.addAll(locusReads); + + // check that all reads we've seen so far are in our keptReads + for ( final SAMRecord read : seenSoFar ) { + Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read); + } + } + + if ( ! keepReads ) + Assert.assertTrue(li.getReadsFromAllPreviousPileups().isEmpty(), "Not keeping reads but the underlying list of reads isn't empty"); } - final int expectedBpToVisit = nLoci + readLength; - Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); + if ( keepReads && ! grabReadsAfterEachCycle ) + keptReads.addAll(li.transferReadsFromAllPreviousPileups()); + + if ( ! downsample ) { // downsampling may drop loci + final int expectedBpToVisit = nLoci + readLength - 1; + Assert.assertEquals(bpVisited, expectedBpToVisit, "Didn't visit the expected number of bp"); + } + + if ( keepReads ) { + // check we have the right number of reads + final int totalReads = nLoci * nReadsPerLocus * nSamples; + if ( ! downsample ) { // downsampling may drop reads + Assert.assertEquals(keptReads.size(), totalReads, "LIBS didn't keep the right number of reads during the traversal"); + + // check that the order of reads is the same as in our read list + for ( int i = 0; i < reads.size(); i++ ) { + final SAMRecord inputRead = reads.get(i); + final SAMRecord keptRead = reads.get(i); + Assert.assertSame(keptRead, inputRead, "Input reads and kept reads differ at position " + i); + } + } else { + Assert.assertTrue(keptReads.size() <= totalReads, "LIBS didn't keep the right number of reads during the traversal"); + } + + // check uniqueness + final Set readNames = new HashSet(); + for ( final SAMRecord read : keptReads ) { + Assert.assertFalse(readNames.contains(read.getReadName()), "Found duplicate reads in the kept reads"); + readNames.add(read.getReadName()); + } + + // check that all reads we've seen are in our keptReads + for ( final SAMRecord read : seenSoFar ) { + Assert.assertTrue(keptReads.contains(read), "A read that appeared in a pileup wasn't found in the kept reads: " + read); + } + } } } diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java index fd43adabc..7b792462c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/ReadStateManagerUnitTest.java @@ -25,25 +25,10 @@ package org.broadinstitute.sting.utils.locusiterator; -import net.sf.samtools.*; -import net.sf.samtools.util.CloseableIterator; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.ReadProperties; -import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; -import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod; -import org.broadinstitute.sting.gatk.filters.ReadFilter; -import org.broadinstitute.sting.gatk.iterators.ReadTransformer; -import org.broadinstitute.sting.utils.GenomeLocParser; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.pileup.PileupElement; -import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.testng.Assert; -import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -79,14 +64,9 @@ public class ReadStateManagerUnitTest extends LocusIteratorByStateBaseTest { public void run() { final List samples = sampleListForSAMWithoutReadGroups(); final Iterator iterator = new LinkedList().iterator(); - ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING); + ReadStateManager readStateManager = new ReadStateManager(iterator, samples, LIBSDownsamplingInfo.NO_DOWNSAMPLING, false); ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = readStateManager.new PerSampleReadStateManager(LIBSDownsamplingInfo.NO_DOWNSAMPLING); -// ReadStateManager readStateManager = -// libs.new ReadStateManager(new ArrayList().iterator()); -// ReadStateManager.PerSampleReadStateManager perSampleReadStateManager = -// readStateManager.new PerSampleReadStateManager(); - makeReads(); for ( ArrayList stackRecordStates : recordStatesByAlignmentStart ) {