More RR-related updates and tests.

- ReduceReads by default now sets up-front ReadWalker downsampling to 40x per start position.
   - This is the value I used in my tests with Picard to show that memory issues pretty much disappeared.
   - This should hopefully take care of the memory issues being reported on the forum.
- Added javadocs to SlidingWindow (the main RR class) to follow GATK conventions.
- Added more unit tests to increase coverage of BaseCounts class.
- Added more unit tests to test I/D operators in the SlidingWindow class.
This commit is contained in:
Eric Banks 2013-02-04 12:57:43 -05:00
parent 971ded341b
commit 2d518f3063
4 changed files with 167 additions and 59 deletions

View File

@ -56,13 +56,11 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
import org.broadinstitute.sting.gatk.filters.*; import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.PartitionType;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.clipping.ReadClipper;
@ -107,6 +105,7 @@ import java.util.*;
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
@PartitionBy(PartitionType.CONTIG) @PartitionBy(PartitionType.CONTIG)
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class}) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> { public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
@Output @Output

View File

@ -198,8 +198,10 @@ public class SlidingWindow {
* sliding process. * sliding process.
* *
* @param read the read * @param read the read
* @return a list of reads that have been finished by sliding the window. * @return a non-null list of reads (in the CompressionStash) that have been finished by sliding the window.
*/ */
@Requires({"read != null"})
@Ensures("result != null")
public CompressionStash addRead(GATKSAMRecord read) { public CompressionStash addRead(GATKSAMRecord read) {
addToHeader(windowHeader, read); // update the window header counts addToHeader(windowHeader, read); // update the window header counts
readsInWindow.add(read); // add read to sliding reads readsInWindow.add(read); // add read to sliding reads
@ -210,8 +212,8 @@ public class SlidingWindow {
* Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive) * Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive)
* but converted to global coordinates. * but converted to global coordinates.
* *
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates * @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
* @param to end window header index of the search window (exclusive); note that this uses local coordinates * @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
* @param variantSite boolean array with true marking variant regions * @param variantSite boolean array with true marking variant regions
* @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it? * @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it?
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global. * @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global.
@ -238,8 +240,8 @@ public class SlidingWindow {
/** /**
* Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive) * Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
* *
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates * @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
* @param to end window header index of the search window (exclusive); note that this uses local coordinates * @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
* @param variantSite boolean array with true marking variant regions * @param variantSite boolean array with true marking variant regions
* @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates * @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates
*/ */
@ -395,10 +397,14 @@ public class SlidingWindow {
* *
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
* *
* @param header the window header
* @param start the first header index to add to consensus * @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus * @param end the first header index NOT TO add to consensus
* @return a list of consensus reads generated by this call. Empty list if no consensus was generated. * @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
*/ */
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) { protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>(); LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
if (start < end) { if (start < end) {
@ -450,7 +456,7 @@ public class SlidingWindow {
* Finalizes one or more synthetic reads. * Finalizes one or more synthetic reads.
* *
* @param type the synthetic reads you want to close * @param type the synthetic reads you want to close
* @return the GATKSAMRecords generated by finalizing the synthetic reads * @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
*/ */
private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) { private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
GATKSAMRecord read = null; GATKSAMRecord read = null;
@ -479,7 +485,7 @@ public class SlidingWindow {
* *
* @param start beginning of the filtered region * @param start beginning of the filtered region
* @param upTo limit to search for another consensus element * @param upTo limit to search for another consensus element
* @return next position with consensus data or empty * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
*/ */
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) { private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
Iterator<HeaderElement> headerElementIterator = header.listIterator(start); Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
@ -501,7 +507,7 @@ public class SlidingWindow {
* *
* @param start beginning of the region * @param start beginning of the region
* @param upTo limit to search for * @param upTo limit to search for
* @return next position with no filtered data * @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
*/ */
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) { private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
Iterator<HeaderElement> headerElementIterator = header.listIterator(start); Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
@ -523,7 +529,7 @@ public class SlidingWindow {
* *
* @param start beginning of the region * @param start beginning of the region
* @param upTo limit to search for * @param upTo limit to search for
* @return next position with non-empty element * @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
*/ */
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) { private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start); ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
@ -544,12 +550,16 @@ public class SlidingWindow {
/** /**
* Adds bases to the filtered data synthetic read. * Adds bases to the filtered data synthetic read.
* *
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
* bases.
* *
* @param header the window header
* @param start the first header index to add to consensus * @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus * @param end the first header index NOT TO add to consensus
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
*/ */
@Requires({"start >= 0 && (end >= start || end == 0)"})
@Ensures("result != null")
private List<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) { private List<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
List<GATKSAMRecord> result = new ArrayList<GATKSAMRecord>(0); List<GATKSAMRecord> result = new ArrayList<GATKSAMRecord>(0);
@ -585,9 +595,12 @@ public class SlidingWindow {
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
* bases. * bases.
* *
* @param header the window header
* @param start the first header index to add to consensus * @param start the first header index to add to consensus
* @param end the first header index NOT TO add to consensus * @param end the first header index NOT TO add to consensus
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
*/ */
@Requires({"start >= 0 && (end >= start || end == 0)"})
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) { private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
if (runningConsensus == null) if (runningConsensus == null)
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand); runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
@ -621,6 +634,16 @@ public class SlidingWindow {
syntheticRead.add(base, count, qual, insQual, delQual, rms); syntheticRead.add(base, count, qual, insQual, delQual, rms);
} }
/**
* Method to compress a variant region and return the associated reduced reads
*
* @param start the first window header index in the variant region (inclusive)
* @param stop the last window header index of the variant region (inclusive)
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
* @return a non-null list of all reads contained in the variant region
*/
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
@Ensures("result != null")
private List<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) { private List<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>(); List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
@ -686,9 +709,11 @@ public class SlidingWindow {
* *
* @param start the first window header index in the variant region (inclusive) * @param start the first window header index in the variant region (inclusive)
* @param stop the last window header index of the variant region (inclusive) * @param stop the last window header index of the variant region (inclusive)
* @return all reads contained in the variant region plus any adjacent synthetic reads * @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
* @return a non-null list of all reads contained in the variant region plus any adjacent synthetic reads
*/ */
@Requires("start <= stop") @Requires({"start >= 0 && (stop >= start || stop == 0)"})
@Ensures("result != null")
protected List<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) { protected List<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
List<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition); List<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition);
@ -733,9 +758,11 @@ public class SlidingWindow {
* *
* It will use the downsampling strategy defined by the SlidingWindow * It will use the downsampling strategy defined by the SlidingWindow
* *
* @param allReads the reads to select from (all reads that cover the window) * @param allReads a non-null list of reads to select from (all reads that cover the window)
* @return a list of reads selected by the downsampler to cover the window to at least the desired coverage * @return a non-null list of reads selected by the downsampler to cover the window to at least the desired coverage
*/ */
@Requires({"allReads != null"})
@Ensures("result != null")
protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) { protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
int nReads = allReads.size(); int nReads = allReads.size();
if (nReads == 0) if (nReads == 0)
@ -755,8 +782,9 @@ public class SlidingWindow {
* regions that still exist regardless of being able to fulfill the * regions that still exist regardless of being able to fulfill the
* context size requirement in the end. * context size requirement in the end.
* *
* @return All reads generated * @return A non-null set/list of all reads generated
*/ */
@Ensures("result != null")
public Pair<Set<GATKSAMRecord>, CompressionStash> close() { public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
// mark variant regions // mark variant regions
Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator()); Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
@ -780,7 +808,7 @@ public class SlidingWindow {
/** /**
* generates the SAM record for the running consensus read and resets it (to null) * generates the SAM record for the running consensus read and resets it (to null)
* *
* @return the read contained in the running consensus * @return the read contained in the running consensus or null
*/ */
protected GATKSAMRecord finalizeRunningConsensus() { protected GATKSAMRecord finalizeRunningConsensus() {
GATKSAMRecord finalizedRead = null; GATKSAMRecord finalizedRead = null;
@ -798,7 +826,7 @@ public class SlidingWindow {
/** /**
* generates the SAM record for the filtered data consensus and resets it (to null) * generates the SAM record for the filtered data consensus and resets it (to null)
* *
* @return the read contained in the running consensus * @return the read contained in the running consensus or null
*/ */
protected GATKSAMRecord finalizeFilteredDataConsensus() { protected GATKSAMRecord finalizeFilteredDataConsensus() {
GATKSAMRecord finalizedRead = null; GATKSAMRecord finalizedRead = null;
@ -813,9 +841,19 @@ public class SlidingWindow {
return finalizedRead; return finalizedRead;
} }
/**
* Finalizes a variant region, any adjacent synthetic reads.
private List<GATKSAMRecord> createPolyploidConsensus(int start, int stop, int nHaplotypes, int hetRefPosition) { *
* @param start the first window header index in the variant region (inclusive)
* @param stop the last window header index of the variant region (inclusive)
* @param nHaplotypes the number of haplotypes to use
* @param hetRefPosition reference position (in global coordinates) of the het site
* @return a non-null list of all reads contained in the variant region as a polyploid consensus
*/
// TODO -- Why do we need the nHaplotypes argument? It is not enforced at all... [EB]
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
@Ensures("result != null")
private List<GATKSAMRecord> createPolyploidConsensus(final int start, final int stop, final int nHaplotypes, final int hetRefPosition) {
// we will create two (positive strand, negative strand) headers for each contig // we will create two (positive strand, negative strand) headers for each contig
List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>(); List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>();
List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>(); List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>();
@ -902,7 +940,7 @@ public class SlidingWindow {
* @param read the incoming read to be added to the sliding window * @param read the incoming read to be added to the sliding window
* @param removeRead if we are removing the read from the header or adding * @param removeRead if we are removing the read from the header or adding
*/ */
private void updateHeaderCounts(LinkedList<HeaderElement> header, GATKSAMRecord read, boolean removeRead) { private void updateHeaderCounts(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead) {
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();
byte[] quals = read.getBaseQualities(); byte[] quals = read.getBaseQualities();
byte[] insQuals = read.getExistingBaseInsertionQualities(); byte[] insQuals = read.getExistingBaseInsertionQualities();

View File

@ -47,9 +47,6 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads; package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
// the imports for unit testing.
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
import org.testng.Assert; import org.testng.Assert;
import org.testng.annotations.DataProvider; import org.testng.annotations.DataProvider;
@ -62,12 +59,12 @@ import java.util.List;
* Basic unit test for BaseCounts in reduced reads * Basic unit test for BaseCounts in reduced reads
*/ */
public class BaseCountsUnitTest extends BaseTest { public class BaseCountsUnitTest extends BaseTest {
private class SingleTest { private class BaseCountsTest {
public String bases; public String bases;
public byte mostCountBase; public byte mostCountBase;
public int mostCommonCount; public int mostCommonCount;
private SingleTest(String bases, char mostCountBase, int mostCommonCount) { private BaseCountsTest(String bases, char mostCountBase, int mostCommonCount) {
this.mostCommonCount = mostCommonCount; this.mostCommonCount = mostCommonCount;
this.mostCountBase = (byte)mostCountBase; this.mostCountBase = (byte)mostCountBase;
this.bases = bases; this.bases = bases;
@ -77,30 +74,28 @@ public class BaseCountsUnitTest extends BaseTest {
@DataProvider(name = "data") @DataProvider(name = "data")
public Object[][] createData1() { public Object[][] createData1() {
List<SingleTest> params = new ArrayList<SingleTest>(); List<BaseCountsTest> params = new ArrayList<BaseCountsTest>();
params.add(new SingleTest("A", 'A', 1 )); params.add(new BaseCountsTest("A", 'A', 1 ));
params.add(new SingleTest("AA", 'A', 2 )); params.add(new BaseCountsTest("AA", 'A', 2 ));
params.add(new SingleTest("AC", 'A', 1 )); params.add(new BaseCountsTest("AC", 'A', 1 ));
params.add(new SingleTest("AAC", 'A', 2 )); params.add(new BaseCountsTest("AAC", 'A', 2 ));
params.add(new SingleTest("AAA", 'A', 3 )); params.add(new BaseCountsTest("AAA", 'A', 3 ));
params.add(new SingleTest("AAAN", 'A', 3 )); params.add(new BaseCountsTest("AAAN", 'A', 3 ));
params.add(new SingleTest("AAANNNN", 'N', 4 )); params.add(new BaseCountsTest("AAANNNN", 'N', 4 ));
params.add(new SingleTest("AACTG", 'A', 2 )); params.add(new BaseCountsTest("AACTG", 'A', 2 ));
params.add(new SingleTest("D", 'D', 1 )); params.add(new BaseCountsTest("D", 'D', 1 ));
params.add(new SingleTest("DDAAD", 'D', 3)); params.add(new BaseCountsTest("DDAAD", 'D', 3));
params.add(new SingleTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 )); params.add(new BaseCountsTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
params.add(new SingleTest("AAIIIAI", 'I', 4 )); params.add(new BaseCountsTest("AAIIIAI", 'I', 4 ));
List<Object[]> params2 = new ArrayList<Object[]>(); List<Object[]> params2 = new ArrayList<Object[]>();
for ( SingleTest x : params ) params2.add(new Object[]{x}); for ( BaseCountsTest x : params ) params2.add(new Object[]{x});
return params2.toArray(new Object[][]{}); return params2.toArray(new Object[][]{});
} }
@Test(dataProvider = "data", enabled = true) @Test(dataProvider = "data", enabled = true)
public void testCounting(SingleTest params) { public void testCounting(BaseCountsTest params) {
BaseCounts counts = new BaseCounts(); BaseCounts counts = new BaseCounts();
for ( byte base : params.bases.getBytes() ) for ( byte base : params.bases.getBytes() )
@ -110,5 +105,44 @@ public class BaseCountsUnitTest extends BaseTest {
Assert.assertEquals(counts.totalCount(), params.bases.length(), name); Assert.assertEquals(counts.totalCount(), params.bases.length(), name);
Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name); Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name);
Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name); Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name);
// test the static creation
final int[] countsArray = new int[] { counts.countOfBase(BaseIndex.A), counts.countOfBase(BaseIndex.C),
counts.countOfBase(BaseIndex.G), counts.countOfBase(BaseIndex.T)};
final BaseCounts countsFromArray = BaseCounts.createWithCounts(countsArray);
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
// test addition
counts.add(countsFromArray);
Assert.assertEquals(counts.countOfBase(BaseIndex.A), 2 * countsFromArray.countOfBase(BaseIndex.A));
Assert.assertEquals(counts.countOfBase(BaseIndex.C), 2 * countsFromArray.countOfBase(BaseIndex.C));
Assert.assertEquals(counts.countOfBase(BaseIndex.G), 2 * countsFromArray.countOfBase(BaseIndex.G));
Assert.assertEquals(counts.countOfBase(BaseIndex.T), 2 * countsFromArray.countOfBase(BaseIndex.T));
Assert.assertEquals(ACGTcounts(counts), 2 * countsFromArray.totalCount());
// test subtraction
counts.sub(countsFromArray);
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
// test decrementing
if ( counts.countOfBase(BaseIndex.A) > 0 ) {
counts.decr((byte)'A');
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A) - 1);
} }
}
private static int ACGTcounts(final BaseCounts baseCounts) {
return baseCounts.totalCountWithoutIndels() - baseCounts.countOfBase(BaseIndex.N);
}
} }

View File

@ -47,6 +47,9 @@
package org.broadinstitute.sting.gatk.walkers.compression.reducereads; package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.BaseTest;
@ -258,24 +261,48 @@ public class SlidingWindowUnitTest extends BaseTest {
// then add the permuted reads // then add the permuted reads
for ( final GenomeLoc loc : locs ) for ( final GenomeLoc loc : locs )
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality)); myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
} }
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality) { private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads) {
this.expectedNumberOfReads = expectedNumberOfReads;
// first, add the basic reads to the collection
myReads.addAll(basicReads);
// then add the permuted reads
for ( final GenomeLoc loc : locs )
myReads.add(createVariantRead(loc, false, false, operator));
}
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality,
final boolean variantBaseShouldBeLowQuality, final CigarOperator operator) {
final int startPos = loc.getStart() - 50; final int startPos = loc.getStart() - 50;
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + startPos, 0, startPos, readLength); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + startPos, 0, startPos, readLength);
final byte[] bases = Utils.dupBytes((byte) 'A', readLength); final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
// create a mismatch // create a mismatch if requested
if ( operator == CigarOperator.M )
bases[50] = 'C'; bases[50] = 'C';
read.setReadBases(bases); read.setReadBases(bases);
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength); final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
if ( variantBaseShouldBeLowQuality ) if ( variantBaseShouldBeLowQuality )
baseQuals[50] = (byte)10; baseQuals[50] = (byte)10;
read.setBaseQualities(baseQuals); read.setBaseQualities(baseQuals);
final byte mappingQual = readShouldBeLowQuality ? (byte)10 : (byte)30; final byte mappingQual = readShouldBeLowQuality ? (byte)10 : (byte)30;
read.setMappingQuality(mappingQual); read.setMappingQuality(mappingQual);
if ( operator != CigarOperator.M ) {
final List<CigarElement> elements = new ArrayList<CigarElement>(3);
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 51, CigarOperator.M));
elements.add(new CigarElement(1, operator));
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 48, CigarOperator.M));
read.setCigar(new Cigar(elements));
}
return read; return read;
} }
} }
@ -315,6 +342,16 @@ public class SlidingWindowUnitTest extends BaseTest {
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3)}); tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3)});
// test I/D operators
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10)});
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11)});
return tests.toArray(new Object[][]{}); return tests.toArray(new Object[][]{});
} }