More RR-related updates and tests.
- ReduceReads by default now sets up-front ReadWalker downsampling to 40x per start position. - This is the value I used in my tests with Picard to show that memory issues pretty much disappeared. - This should hopefully take care of the memory issues being reported on the forum. - Added javadocs to SlidingWindow (the main RR class) to follow GATK conventions. - Added more unit tests to increase coverage of BaseCounts class. - Added more unit tests to test I/D operators in the SlidingWindow class.
This commit is contained in:
parent
971ded341b
commit
2d518f3063
|
|
@ -56,13 +56,11 @@ import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||||
import org.broadinstitute.sting.gatk.filters.*;
|
import org.broadinstitute.sting.gatk.filters.*;
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||||
|
|
@ -107,6 +105,7 @@ import java.util.*;
|
||||||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||||
@PartitionBy(PartitionType.CONTIG)
|
@PartitionBy(PartitionType.CONTIG)
|
||||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||||
|
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
|
||||||
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
|
|
|
||||||
|
|
@ -198,8 +198,10 @@ public class SlidingWindow {
|
||||||
* sliding process.
|
* sliding process.
|
||||||
*
|
*
|
||||||
* @param read the read
|
* @param read the read
|
||||||
* @return a list of reads that have been finished by sliding the window.
|
* @return a non-null list of reads (in the CompressionStash) that have been finished by sliding the window.
|
||||||
*/
|
*/
|
||||||
|
@Requires({"read != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
public CompressionStash addRead(GATKSAMRecord read) {
|
public CompressionStash addRead(GATKSAMRecord read) {
|
||||||
addToHeader(windowHeader, read); // update the window header counts
|
addToHeader(windowHeader, read); // update the window header counts
|
||||||
readsInWindow.add(read); // add read to sliding reads
|
readsInWindow.add(read); // add read to sliding reads
|
||||||
|
|
@ -210,8 +212,8 @@ public class SlidingWindow {
|
||||||
* Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive)
|
* Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive)
|
||||||
* but converted to global coordinates.
|
* but converted to global coordinates.
|
||||||
*
|
*
|
||||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
* @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
|
||||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
* @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
|
||||||
* @param variantSite boolean array with true marking variant regions
|
* @param variantSite boolean array with true marking variant regions
|
||||||
* @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it?
|
* @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it?
|
||||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global.
|
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global.
|
||||||
|
|
@ -238,8 +240,8 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
|
* Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
|
||||||
*
|
*
|
||||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
* @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
|
||||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
* @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
|
||||||
* @param variantSite boolean array with true marking variant regions
|
* @param variantSite boolean array with true marking variant regions
|
||||||
* @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates
|
* @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates
|
||||||
*/
|
*/
|
||||||
|
|
@ -395,10 +397,14 @@ public class SlidingWindow {
|
||||||
*
|
*
|
||||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||||
*
|
*
|
||||||
* @param start the first header index to add to consensus
|
* @param header the window header
|
||||||
* @param end the first header index NOT TO add to consensus
|
* @param start the first header index to add to consensus
|
||||||
* @return a list of consensus reads generated by this call. Empty list if no consensus was generated.
|
* @param end the first header index NOT TO add to consensus
|
||||||
|
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||||
|
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||||
*/
|
*/
|
||||||
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
|
@Ensures("result != null")
|
||||||
protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||||
LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||||
if (start < end) {
|
if (start < end) {
|
||||||
|
|
@ -450,7 +456,7 @@ public class SlidingWindow {
|
||||||
* Finalizes one or more synthetic reads.
|
* Finalizes one or more synthetic reads.
|
||||||
*
|
*
|
||||||
* @param type the synthetic reads you want to close
|
* @param type the synthetic reads you want to close
|
||||||
* @return the GATKSAMRecords generated by finalizing the synthetic reads
|
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||||
*/
|
*/
|
||||||
private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
||||||
GATKSAMRecord read = null;
|
GATKSAMRecord read = null;
|
||||||
|
|
@ -479,7 +485,7 @@ public class SlidingWindow {
|
||||||
*
|
*
|
||||||
* @param start beginning of the filtered region
|
* @param start beginning of the filtered region
|
||||||
* @param upTo limit to search for another consensus element
|
* @param upTo limit to search for another consensus element
|
||||||
* @return next position with consensus data or empty
|
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||||
*/
|
*/
|
||||||
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
|
@ -501,7 +507,7 @@ public class SlidingWindow {
|
||||||
*
|
*
|
||||||
* @param start beginning of the region
|
* @param start beginning of the region
|
||||||
* @param upTo limit to search for
|
* @param upTo limit to search for
|
||||||
* @return next position with no filtered data
|
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
|
||||||
*/
|
*/
|
||||||
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
|
@ -523,7 +529,7 @@ public class SlidingWindow {
|
||||||
*
|
*
|
||||||
* @param start beginning of the region
|
* @param start beginning of the region
|
||||||
* @param upTo limit to search for
|
* @param upTo limit to search for
|
||||||
* @return next position with non-empty element
|
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
|
||||||
*/
|
*/
|
||||||
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||||
|
|
@ -544,12 +550,16 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* Adds bases to the filtered data synthetic read.
|
* Adds bases to the filtered data synthetic read.
|
||||||
*
|
*
|
||||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
|
||||||
* bases.
|
|
||||||
*
|
*
|
||||||
* @param start the first header index to add to consensus
|
* @param header the window header
|
||||||
* @param end the first header index NOT TO add to consensus
|
* @param start the first header index to add to consensus
|
||||||
|
* @param end the first header index NOT TO add to consensus
|
||||||
|
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||||
|
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
|
||||||
*/
|
*/
|
||||||
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
|
@Ensures("result != null")
|
||||||
private List<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
private List<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||||
List<GATKSAMRecord> result = new ArrayList<GATKSAMRecord>(0);
|
List<GATKSAMRecord> result = new ArrayList<GATKSAMRecord>(0);
|
||||||
|
|
||||||
|
|
@ -585,9 +595,12 @@ public class SlidingWindow {
|
||||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||||
* bases.
|
* bases.
|
||||||
*
|
*
|
||||||
|
* @param header the window header
|
||||||
* @param start the first header index to add to consensus
|
* @param start the first header index to add to consensus
|
||||||
* @param end the first header index NOT TO add to consensus
|
* @param end the first header index NOT TO add to consensus
|
||||||
|
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||||
*/
|
*/
|
||||||
|
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||||
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||||
if (runningConsensus == null)
|
if (runningConsensus == null)
|
||||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
||||||
|
|
@ -621,6 +634,16 @@ public class SlidingWindow {
|
||||||
syntheticRead.add(base, count, qual, insQual, delQual, rms);
|
syntheticRead.add(base, count, qual, insQual, delQual, rms);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method to compress a variant region and return the associated reduced reads
|
||||||
|
*
|
||||||
|
* @param start the first window header index in the variant region (inclusive)
|
||||||
|
* @param stop the last window header index of the variant region (inclusive)
|
||||||
|
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
|
||||||
|
* @return a non-null list of all reads contained in the variant region
|
||||||
|
*/
|
||||||
|
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||||
|
@Ensures("result != null")
|
||||||
private List<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
private List<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||||
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
||||||
|
|
||||||
|
|
@ -684,11 +707,13 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* Finalizes a variant region, any adjacent synthetic reads.
|
* Finalizes a variant region, any adjacent synthetic reads.
|
||||||
*
|
*
|
||||||
* @param start the first window header index in the variant region (inclusive)
|
* @param start the first window header index in the variant region (inclusive)
|
||||||
* @param stop the last window header index of the variant region (inclusive)
|
* @param stop the last window header index of the variant region (inclusive)
|
||||||
* @return all reads contained in the variant region plus any adjacent synthetic reads
|
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
|
||||||
|
* @return a non-null list of all reads contained in the variant region plus any adjacent synthetic reads
|
||||||
*/
|
*/
|
||||||
@Requires("start <= stop")
|
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||||
|
@Ensures("result != null")
|
||||||
protected List<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
protected List<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||||
List<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition);
|
List<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition);
|
||||||
|
|
||||||
|
|
@ -733,9 +758,11 @@ public class SlidingWindow {
|
||||||
*
|
*
|
||||||
* It will use the downsampling strategy defined by the SlidingWindow
|
* It will use the downsampling strategy defined by the SlidingWindow
|
||||||
*
|
*
|
||||||
* @param allReads the reads to select from (all reads that cover the window)
|
* @param allReads a non-null list of reads to select from (all reads that cover the window)
|
||||||
* @return a list of reads selected by the downsampler to cover the window to at least the desired coverage
|
* @return a non-null list of reads selected by the downsampler to cover the window to at least the desired coverage
|
||||||
*/
|
*/
|
||||||
|
@Requires({"allReads != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
|
protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
|
||||||
int nReads = allReads.size();
|
int nReads = allReads.size();
|
||||||
if (nReads == 0)
|
if (nReads == 0)
|
||||||
|
|
@ -755,8 +782,9 @@ public class SlidingWindow {
|
||||||
* regions that still exist regardless of being able to fulfill the
|
* regions that still exist regardless of being able to fulfill the
|
||||||
* context size requirement in the end.
|
* context size requirement in the end.
|
||||||
*
|
*
|
||||||
* @return All reads generated
|
* @return A non-null set/list of all reads generated
|
||||||
*/
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
||||||
// mark variant regions
|
// mark variant regions
|
||||||
Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||||
|
|
@ -780,7 +808,7 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* generates the SAM record for the running consensus read and resets it (to null)
|
* generates the SAM record for the running consensus read and resets it (to null)
|
||||||
*
|
*
|
||||||
* @return the read contained in the running consensus
|
* @return the read contained in the running consensus or null
|
||||||
*/
|
*/
|
||||||
protected GATKSAMRecord finalizeRunningConsensus() {
|
protected GATKSAMRecord finalizeRunningConsensus() {
|
||||||
GATKSAMRecord finalizedRead = null;
|
GATKSAMRecord finalizedRead = null;
|
||||||
|
|
@ -798,7 +826,7 @@ public class SlidingWindow {
|
||||||
/**
|
/**
|
||||||
* generates the SAM record for the filtered data consensus and resets it (to null)
|
* generates the SAM record for the filtered data consensus and resets it (to null)
|
||||||
*
|
*
|
||||||
* @return the read contained in the running consensus
|
* @return the read contained in the running consensus or null
|
||||||
*/
|
*/
|
||||||
protected GATKSAMRecord finalizeFilteredDataConsensus() {
|
protected GATKSAMRecord finalizeFilteredDataConsensus() {
|
||||||
GATKSAMRecord finalizedRead = null;
|
GATKSAMRecord finalizedRead = null;
|
||||||
|
|
@ -813,9 +841,19 @@ public class SlidingWindow {
|
||||||
return finalizedRead;
|
return finalizedRead;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finalizes a variant region, any adjacent synthetic reads.
|
||||||
private List<GATKSAMRecord> createPolyploidConsensus(int start, int stop, int nHaplotypes, int hetRefPosition) {
|
*
|
||||||
|
* @param start the first window header index in the variant region (inclusive)
|
||||||
|
* @param stop the last window header index of the variant region (inclusive)
|
||||||
|
* @param nHaplotypes the number of haplotypes to use
|
||||||
|
* @param hetRefPosition reference position (in global coordinates) of the het site
|
||||||
|
* @return a non-null list of all reads contained in the variant region as a polyploid consensus
|
||||||
|
*/
|
||||||
|
// TODO -- Why do we need the nHaplotypes argument? It is not enforced at all... [EB]
|
||||||
|
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
private List<GATKSAMRecord> createPolyploidConsensus(final int start, final int stop, final int nHaplotypes, final int hetRefPosition) {
|
||||||
// we will create two (positive strand, negative strand) headers for each contig
|
// we will create two (positive strand, negative strand) headers for each contig
|
||||||
List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>();
|
List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||||
List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>();
|
List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||||
|
|
@ -902,7 +940,7 @@ public class SlidingWindow {
|
||||||
* @param read the incoming read to be added to the sliding window
|
* @param read the incoming read to be added to the sliding window
|
||||||
* @param removeRead if we are removing the read from the header or adding
|
* @param removeRead if we are removing the read from the header or adding
|
||||||
*/
|
*/
|
||||||
private void updateHeaderCounts(LinkedList<HeaderElement> header, GATKSAMRecord read, boolean removeRead) {
|
private void updateHeaderCounts(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead) {
|
||||||
byte[] bases = read.getReadBases();
|
byte[] bases = read.getReadBases();
|
||||||
byte[] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
byte[] insQuals = read.getExistingBaseInsertionQualities();
|
byte[] insQuals = read.getExistingBaseInsertionQualities();
|
||||||
|
|
|
||||||
|
|
@ -47,9 +47,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||||
|
|
||||||
|
|
||||||
// the imports for unit testing.
|
|
||||||
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.testng.Assert;
|
import org.testng.Assert;
|
||||||
import org.testng.annotations.DataProvider;
|
import org.testng.annotations.DataProvider;
|
||||||
|
|
@ -62,12 +59,12 @@ import java.util.List;
|
||||||
* Basic unit test for BaseCounts in reduced reads
|
* Basic unit test for BaseCounts in reduced reads
|
||||||
*/
|
*/
|
||||||
public class BaseCountsUnitTest extends BaseTest {
|
public class BaseCountsUnitTest extends BaseTest {
|
||||||
private class SingleTest {
|
private class BaseCountsTest {
|
||||||
public String bases;
|
public String bases;
|
||||||
public byte mostCountBase;
|
public byte mostCountBase;
|
||||||
public int mostCommonCount;
|
public int mostCommonCount;
|
||||||
|
|
||||||
private SingleTest(String bases, char mostCountBase, int mostCommonCount) {
|
private BaseCountsTest(String bases, char mostCountBase, int mostCommonCount) {
|
||||||
this.mostCommonCount = mostCommonCount;
|
this.mostCommonCount = mostCommonCount;
|
||||||
this.mostCountBase = (byte)mostCountBase;
|
this.mostCountBase = (byte)mostCountBase;
|
||||||
this.bases = bases;
|
this.bases = bases;
|
||||||
|
|
@ -77,30 +74,28 @@ public class BaseCountsUnitTest extends BaseTest {
|
||||||
|
|
||||||
@DataProvider(name = "data")
|
@DataProvider(name = "data")
|
||||||
public Object[][] createData1() {
|
public Object[][] createData1() {
|
||||||
List<SingleTest> params = new ArrayList<SingleTest>();
|
List<BaseCountsTest> params = new ArrayList<BaseCountsTest>();
|
||||||
|
|
||||||
params.add(new SingleTest("A", 'A', 1 ));
|
params.add(new BaseCountsTest("A", 'A', 1 ));
|
||||||
params.add(new SingleTest("AA", 'A', 2 ));
|
params.add(new BaseCountsTest("AA", 'A', 2 ));
|
||||||
params.add(new SingleTest("AC", 'A', 1 ));
|
params.add(new BaseCountsTest("AC", 'A', 1 ));
|
||||||
params.add(new SingleTest("AAC", 'A', 2 ));
|
params.add(new BaseCountsTest("AAC", 'A', 2 ));
|
||||||
params.add(new SingleTest("AAA", 'A', 3 ));
|
params.add(new BaseCountsTest("AAA", 'A', 3 ));
|
||||||
params.add(new SingleTest("AAAN", 'A', 3 ));
|
params.add(new BaseCountsTest("AAAN", 'A', 3 ));
|
||||||
params.add(new SingleTest("AAANNNN", 'N', 4 ));
|
params.add(new BaseCountsTest("AAANNNN", 'N', 4 ));
|
||||||
params.add(new SingleTest("AACTG", 'A', 2 ));
|
params.add(new BaseCountsTest("AACTG", 'A', 2 ));
|
||||||
params.add(new SingleTest("D", 'D', 1 ));
|
params.add(new BaseCountsTest("D", 'D', 1 ));
|
||||||
params.add(new SingleTest("DDAAD", 'D', 3));
|
params.add(new BaseCountsTest("DDAAD", 'D', 3));
|
||||||
params.add(new SingleTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
|
params.add(new BaseCountsTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
|
||||||
params.add(new SingleTest("AAIIIAI", 'I', 4 ));
|
params.add(new BaseCountsTest("AAIIIAI", 'I', 4 ));
|
||||||
|
|
||||||
List<Object[]> params2 = new ArrayList<Object[]>();
|
List<Object[]> params2 = new ArrayList<Object[]>();
|
||||||
for ( SingleTest x : params ) params2.add(new Object[]{x});
|
for ( BaseCountsTest x : params ) params2.add(new Object[]{x});
|
||||||
return params2.toArray(new Object[][]{});
|
return params2.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Test(dataProvider = "data", enabled = true)
|
@Test(dataProvider = "data", enabled = true)
|
||||||
public void testCounting(SingleTest params) {
|
public void testCounting(BaseCountsTest params) {
|
||||||
BaseCounts counts = new BaseCounts();
|
BaseCounts counts = new BaseCounts();
|
||||||
|
|
||||||
for ( byte base : params.bases.getBytes() )
|
for ( byte base : params.bases.getBytes() )
|
||||||
|
|
@ -110,5 +105,44 @@ public class BaseCountsUnitTest extends BaseTest {
|
||||||
Assert.assertEquals(counts.totalCount(), params.bases.length(), name);
|
Assert.assertEquals(counts.totalCount(), params.bases.length(), name);
|
||||||
Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name);
|
Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name);
|
||||||
Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name);
|
Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name);
|
||||||
|
|
||||||
|
// test the static creation
|
||||||
|
final int[] countsArray = new int[] { counts.countOfBase(BaseIndex.A), counts.countOfBase(BaseIndex.C),
|
||||||
|
counts.countOfBase(BaseIndex.G), counts.countOfBase(BaseIndex.T)};
|
||||||
|
final BaseCounts countsFromArray = BaseCounts.createWithCounts(countsArray);
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
|
||||||
|
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
|
||||||
|
|
||||||
|
// test addition
|
||||||
|
counts.add(countsFromArray);
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.A), 2 * countsFromArray.countOfBase(BaseIndex.A));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.C), 2 * countsFromArray.countOfBase(BaseIndex.C));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.G), 2 * countsFromArray.countOfBase(BaseIndex.G));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.T), 2 * countsFromArray.countOfBase(BaseIndex.T));
|
||||||
|
Assert.assertEquals(ACGTcounts(counts), 2 * countsFromArray.totalCount());
|
||||||
|
|
||||||
|
// test subtraction
|
||||||
|
counts.sub(countsFromArray);
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
|
||||||
|
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
|
||||||
|
|
||||||
|
// test decrementing
|
||||||
|
if ( counts.countOfBase(BaseIndex.A) > 0 ) {
|
||||||
|
counts.decr((byte)'A');
|
||||||
|
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A) - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int ACGTcounts(final BaseCounts baseCounts) {
|
||||||
|
return baseCounts.totalCountWithoutIndels() - baseCounts.countOfBase(BaseIndex.N);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -47,6 +47,9 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||||
|
|
||||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||||
|
import net.sf.samtools.Cigar;
|
||||||
|
import net.sf.samtools.CigarElement;
|
||||||
|
import net.sf.samtools.CigarOperator;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
|
|
@ -258,24 +261,48 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
|
|
||||||
// then add the permuted reads
|
// then add the permuted reads
|
||||||
for ( final GenomeLoc loc : locs )
|
for ( final GenomeLoc loc : locs )
|
||||||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality));
|
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
|
||||||
}
|
}
|
||||||
|
|
||||||
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality) {
|
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads) {
|
||||||
|
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||||
|
|
||||||
|
// first, add the basic reads to the collection
|
||||||
|
myReads.addAll(basicReads);
|
||||||
|
|
||||||
|
// then add the permuted reads
|
||||||
|
for ( final GenomeLoc loc : locs )
|
||||||
|
myReads.add(createVariantRead(loc, false, false, operator));
|
||||||
|
}
|
||||||
|
|
||||||
|
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality,
|
||||||
|
final boolean variantBaseShouldBeLowQuality, final CigarOperator operator) {
|
||||||
|
|
||||||
final int startPos = loc.getStart() - 50;
|
final int startPos = loc.getStart() - 50;
|
||||||
|
|
||||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + startPos, 0, startPos, readLength);
|
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + startPos, 0, startPos, readLength);
|
||||||
|
|
||||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||||
// create a mismatch
|
// create a mismatch if requested
|
||||||
bases[50] = 'C';
|
if ( operator == CigarOperator.M )
|
||||||
|
bases[50] = 'C';
|
||||||
read.setReadBases(bases);
|
read.setReadBases(bases);
|
||||||
|
|
||||||
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
|
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
|
||||||
if ( variantBaseShouldBeLowQuality )
|
if ( variantBaseShouldBeLowQuality )
|
||||||
baseQuals[50] = (byte)10;
|
baseQuals[50] = (byte)10;
|
||||||
read.setBaseQualities(baseQuals);
|
read.setBaseQualities(baseQuals);
|
||||||
final byte mappingQual = readShouldBeLowQuality ? (byte)10 : (byte)30;
|
final byte mappingQual = readShouldBeLowQuality ? (byte)10 : (byte)30;
|
||||||
read.setMappingQuality(mappingQual);
|
read.setMappingQuality(mappingQual);
|
||||||
|
|
||||||
|
if ( operator != CigarOperator.M ) {
|
||||||
|
final List<CigarElement> elements = new ArrayList<CigarElement>(3);
|
||||||
|
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 51, CigarOperator.M));
|
||||||
|
elements.add(new CigarElement(1, operator));
|
||||||
|
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 48, CigarOperator.M));
|
||||||
|
read.setCigar(new Cigar(elements));
|
||||||
|
}
|
||||||
|
|
||||||
return read;
|
return read;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -315,6 +342,16 @@ public class SlidingWindowUnitTest extends BaseTest {
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2)});
|
||||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3)});
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3)});
|
||||||
|
|
||||||
|
// test I/D operators
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10)});
|
||||||
|
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11)});
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue