More RR-related updates and tests.
- ReduceReads by default now sets up-front ReadWalker downsampling to 40x per start position. - This is the value I used in my tests with Picard to show that memory issues pretty much disappeared. - This should hopefully take care of the memory issues being reported on the forum. - Added javadocs to SlidingWindow (the main RR class) to follow GATK conventions. - Added more unit tests to increase coverage of BaseCounts class. - Added more unit tests to test I/D operators in the SlidingWindow class.
This commit is contained in:
parent
971ded341b
commit
2d518f3063
|
|
@ -56,13 +56,11 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.filters.*;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionBy;
|
||||
import org.broadinstitute.sting.gatk.walkers.PartitionType;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.clipping.ReadClipper;
|
||||
|
|
@ -107,6 +105,7 @@ import java.util.*;
|
|||
@DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
|
||||
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=40)
|
||||
public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
|
||||
|
||||
@Output
|
||||
|
|
|
|||
|
|
@ -198,8 +198,10 @@ public class SlidingWindow {
|
|||
* sliding process.
|
||||
*
|
||||
* @param read the read
|
||||
* @return a list of reads that have been finished by sliding the window.
|
||||
* @return a non-null list of reads (in the CompressionStash) that have been finished by sliding the window.
|
||||
*/
|
||||
@Requires({"read != null"})
|
||||
@Ensures("result != null")
|
||||
public CompressionStash addRead(GATKSAMRecord read) {
|
||||
addToHeader(windowHeader, read); // update the window header counts
|
||||
readsInWindow.add(read); // add read to sliding reads
|
||||
|
|
@ -210,8 +212,8 @@ public class SlidingWindow {
|
|||
* Returns the next complete (or incomplete if closeLastRegion is true) variant region between 'from' (inclusive) and 'to' (exclusive)
|
||||
* but converted to global coordinates.
|
||||
*
|
||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
||||
* @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
|
||||
* @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @param closeLastRegion if the last index is variant (so it's an incomplete region), should we close (and return as an interval) the location or ignore it?
|
||||
* @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region. All coordinates returned are global.
|
||||
|
|
@ -238,8 +240,8 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
|
||||
*
|
||||
* @param from beginning window header index of the search window (inclusive); note that this uses local coordinates
|
||||
* @param to end window header index of the search window (exclusive); note that this uses local coordinates
|
||||
* @param from beginning window header index of the search window (inclusive) in local (to the windowHeader) coordinates
|
||||
* @param to end window header index of the search window (exclusive) in local (to the windowHeader) coordinates
|
||||
* @param variantSite boolean array with true marking variant regions
|
||||
* @return a list with start/stops of variant regions following findNextVariantRegion description in global coordinates
|
||||
*/
|
||||
|
|
@ -395,10 +397,14 @@ public class SlidingWindow {
|
|||
*
|
||||
* If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
|
||||
*
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @return a list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||
* @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated.
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected List<GATKSAMRecord> addToSyntheticReads(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
|
||||
if (start < end) {
|
||||
|
|
@ -450,7 +456,7 @@ public class SlidingWindow {
|
|||
* Finalizes one or more synthetic reads.
|
||||
*
|
||||
* @param type the synthetic reads you want to close
|
||||
* @return the GATKSAMRecords generated by finalizing the synthetic reads
|
||||
* @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads
|
||||
*/
|
||||
private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
|
||||
GATKSAMRecord read = null;
|
||||
|
|
@ -479,7 +485,7 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start beginning of the filtered region
|
||||
* @param upTo limit to search for another consensus element
|
||||
* @return next position with consensus data or empty
|
||||
* @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonConsensusElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -501,7 +507,7 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start beginning of the region
|
||||
* @param upTo limit to search for
|
||||
* @return next position with no filtered data
|
||||
* @return next position in local coordinates (relative to the windowHeader) with no filtered data; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonFilteredDataElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
Iterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -523,7 +529,7 @@ public class SlidingWindow {
|
|||
*
|
||||
* @param start beginning of the region
|
||||
* @param upTo limit to search for
|
||||
* @return next position with non-empty element
|
||||
* @return next position in local coordinates (relative to the windowHeader) with non-empty element; otherwise, the start position
|
||||
*/
|
||||
private int findNextNonEmptyElement(LinkedList<HeaderElement> header, int start, int upTo) {
|
||||
ListIterator<HeaderElement> headerElementIterator = header.listIterator(start);
|
||||
|
|
@ -544,12 +550,16 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Adds bases to the filtered data synthetic read.
|
||||
*
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases.
|
||||
*
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||
* @return a non-null list of GATKSAMRecords representing finalized filtered consensus data. Empty list if no consensus was generated.
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
@Ensures("result != null")
|
||||
private List<GATKSAMRecord> addToFilteredData(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
List<GATKSAMRecord> result = new ArrayList<GATKSAMRecord>(0);
|
||||
|
||||
|
|
@ -585,9 +595,12 @@ public class SlidingWindow {
|
|||
* Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
|
||||
* bases.
|
||||
*
|
||||
* @param header the window header
|
||||
* @param start the first header index to add to consensus
|
||||
* @param end the first header index NOT TO add to consensus
|
||||
* @param isNegativeStrand should the synthetic read be represented as being on the negative strand?
|
||||
*/
|
||||
@Requires({"start >= 0 && (end >= start || end == 0)"})
|
||||
private void addToRunningConsensus(LinkedList<HeaderElement> header, int start, int end, boolean isNegativeStrand) {
|
||||
if (runningConsensus == null)
|
||||
runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities, isNegativeStrand);
|
||||
|
|
@ -621,6 +634,16 @@ public class SlidingWindow {
|
|||
syntheticRead.add(base, count, qual, insQual, delQual, rms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to compress a variant region and return the associated reduced reads
|
||||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
|
||||
* @return a non-null list of all reads contained in the variant region
|
||||
*/
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
private List<GATKSAMRecord> compressVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||
List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
|
||||
|
||||
|
|
@ -684,11 +707,13 @@ public class SlidingWindow {
|
|||
/**
|
||||
* Finalizes a variant region, any adjacent synthetic reads.
|
||||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @return all reads contained in the variant region plus any adjacent synthetic reads
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param disallowPolyploidReductionAtThisPosition should we disallow polyploid (het) compression here?
|
||||
* @return a non-null list of all reads contained in the variant region plus any adjacent synthetic reads
|
||||
*/
|
||||
@Requires("start <= stop")
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
protected List<GATKSAMRecord> closeVariantRegion(final int start, final int stop, final boolean disallowPolyploidReductionAtThisPosition) {
|
||||
List<GATKSAMRecord> allReads = compressVariantRegion(start, stop, disallowPolyploidReductionAtThisPosition);
|
||||
|
||||
|
|
@ -733,9 +758,11 @@ public class SlidingWindow {
|
|||
*
|
||||
* It will use the downsampling strategy defined by the SlidingWindow
|
||||
*
|
||||
* @param allReads the reads to select from (all reads that cover the window)
|
||||
* @return a list of reads selected by the downsampler to cover the window to at least the desired coverage
|
||||
* @param allReads a non-null list of reads to select from (all reads that cover the window)
|
||||
* @return a non-null list of reads selected by the downsampler to cover the window to at least the desired coverage
|
||||
*/
|
||||
@Requires({"allReads != null"})
|
||||
@Ensures("result != null")
|
||||
protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
|
||||
int nReads = allReads.size();
|
||||
if (nReads == 0)
|
||||
|
|
@ -755,8 +782,9 @@ public class SlidingWindow {
|
|||
* regions that still exist regardless of being able to fulfill the
|
||||
* context size requirement in the end.
|
||||
*
|
||||
* @return All reads generated
|
||||
* @return A non-null set/list of all reads generated
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
|
||||
// mark variant regions
|
||||
Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
|
||||
|
|
@ -780,7 +808,7 @@ public class SlidingWindow {
|
|||
/**
|
||||
* generates the SAM record for the running consensus read and resets it (to null)
|
||||
*
|
||||
* @return the read contained in the running consensus
|
||||
* @return the read contained in the running consensus or null
|
||||
*/
|
||||
protected GATKSAMRecord finalizeRunningConsensus() {
|
||||
GATKSAMRecord finalizedRead = null;
|
||||
|
|
@ -798,7 +826,7 @@ public class SlidingWindow {
|
|||
/**
|
||||
* generates the SAM record for the filtered data consensus and resets it (to null)
|
||||
*
|
||||
* @return the read contained in the running consensus
|
||||
* @return the read contained in the running consensus or null
|
||||
*/
|
||||
protected GATKSAMRecord finalizeFilteredDataConsensus() {
|
||||
GATKSAMRecord finalizedRead = null;
|
||||
|
|
@ -813,9 +841,19 @@ public class SlidingWindow {
|
|||
return finalizedRead;
|
||||
}
|
||||
|
||||
|
||||
|
||||
private List<GATKSAMRecord> createPolyploidConsensus(int start, int stop, int nHaplotypes, int hetRefPosition) {
|
||||
/**
|
||||
* Finalizes a variant region, any adjacent synthetic reads.
|
||||
*
|
||||
* @param start the first window header index in the variant region (inclusive)
|
||||
* @param stop the last window header index of the variant region (inclusive)
|
||||
* @param nHaplotypes the number of haplotypes to use
|
||||
* @param hetRefPosition reference position (in global coordinates) of the het site
|
||||
* @return a non-null list of all reads contained in the variant region as a polyploid consensus
|
||||
*/
|
||||
// TODO -- Why do we need the nHaplotypes argument? It is not enforced at all... [EB]
|
||||
@Requires({"start >= 0 && (stop >= start || stop == 0)"})
|
||||
@Ensures("result != null")
|
||||
private List<GATKSAMRecord> createPolyploidConsensus(final int start, final int stop, final int nHaplotypes, final int hetRefPosition) {
|
||||
// we will create two (positive strand, negative strand) headers for each contig
|
||||
List<LinkedList<HeaderElement>> headersPosStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||
List<LinkedList<HeaderElement>> headersNegStrand = new ArrayList<LinkedList<HeaderElement>>();
|
||||
|
|
@ -902,7 +940,7 @@ public class SlidingWindow {
|
|||
* @param read the incoming read to be added to the sliding window
|
||||
* @param removeRead if we are removing the read from the header or adding
|
||||
*/
|
||||
private void updateHeaderCounts(LinkedList<HeaderElement> header, GATKSAMRecord read, boolean removeRead) {
|
||||
private void updateHeaderCounts(final LinkedList<HeaderElement> header, final GATKSAMRecord read, final boolean removeRead) {
|
||||
byte[] bases = read.getReadBases();
|
||||
byte[] quals = read.getBaseQualities();
|
||||
byte[] insQuals = read.getExistingBaseInsertionQualities();
|
||||
|
|
|
|||
|
|
@ -47,9 +47,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
@ -62,12 +59,12 @@ import java.util.List;
|
|||
* Basic unit test for BaseCounts in reduced reads
|
||||
*/
|
||||
public class BaseCountsUnitTest extends BaseTest {
|
||||
private class SingleTest {
|
||||
private class BaseCountsTest {
|
||||
public String bases;
|
||||
public byte mostCountBase;
|
||||
public int mostCommonCount;
|
||||
|
||||
private SingleTest(String bases, char mostCountBase, int mostCommonCount) {
|
||||
private BaseCountsTest(String bases, char mostCountBase, int mostCommonCount) {
|
||||
this.mostCommonCount = mostCommonCount;
|
||||
this.mostCountBase = (byte)mostCountBase;
|
||||
this.bases = bases;
|
||||
|
|
@ -77,30 +74,28 @@ public class BaseCountsUnitTest extends BaseTest {
|
|||
|
||||
@DataProvider(name = "data")
|
||||
public Object[][] createData1() {
|
||||
List<SingleTest> params = new ArrayList<SingleTest>();
|
||||
List<BaseCountsTest> params = new ArrayList<BaseCountsTest>();
|
||||
|
||||
params.add(new SingleTest("A", 'A', 1 ));
|
||||
params.add(new SingleTest("AA", 'A', 2 ));
|
||||
params.add(new SingleTest("AC", 'A', 1 ));
|
||||
params.add(new SingleTest("AAC", 'A', 2 ));
|
||||
params.add(new SingleTest("AAA", 'A', 3 ));
|
||||
params.add(new SingleTest("AAAN", 'A', 3 ));
|
||||
params.add(new SingleTest("AAANNNN", 'N', 4 ));
|
||||
params.add(new SingleTest("AACTG", 'A', 2 ));
|
||||
params.add(new SingleTest("D", 'D', 1 ));
|
||||
params.add(new SingleTest("DDAAD", 'D', 3));
|
||||
params.add(new SingleTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
|
||||
params.add(new SingleTest("AAIIIAI", 'I', 4 ));
|
||||
params.add(new BaseCountsTest("A", 'A', 1 ));
|
||||
params.add(new BaseCountsTest("AA", 'A', 2 ));
|
||||
params.add(new BaseCountsTest("AC", 'A', 1 ));
|
||||
params.add(new BaseCountsTest("AAC", 'A', 2 ));
|
||||
params.add(new BaseCountsTest("AAA", 'A', 3 ));
|
||||
params.add(new BaseCountsTest("AAAN", 'A', 3 ));
|
||||
params.add(new BaseCountsTest("AAANNNN", 'N', 4 ));
|
||||
params.add(new BaseCountsTest("AACTG", 'A', 2 ));
|
||||
params.add(new BaseCountsTest("D", 'D', 1 ));
|
||||
params.add(new BaseCountsTest("DDAAD", 'D', 3));
|
||||
params.add(new BaseCountsTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
|
||||
params.add(new BaseCountsTest("AAIIIAI", 'I', 4 ));
|
||||
|
||||
List<Object[]> params2 = new ArrayList<Object[]>();
|
||||
for ( SingleTest x : params ) params2.add(new Object[]{x});
|
||||
for ( BaseCountsTest x : params ) params2.add(new Object[]{x});
|
||||
return params2.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Test(dataProvider = "data", enabled = true)
|
||||
public void testCounting(SingleTest params) {
|
||||
public void testCounting(BaseCountsTest params) {
|
||||
BaseCounts counts = new BaseCounts();
|
||||
|
||||
for ( byte base : params.bases.getBytes() )
|
||||
|
|
@ -110,5 +105,44 @@ public class BaseCountsUnitTest extends BaseTest {
|
|||
Assert.assertEquals(counts.totalCount(), params.bases.length(), name);
|
||||
Assert.assertEquals(counts.countOfBase(counts.baseIndexWithMostCounts()), params.mostCommonCount, name);
|
||||
Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name);
|
||||
|
||||
// test the static creation
|
||||
final int[] countsArray = new int[] { counts.countOfBase(BaseIndex.A), counts.countOfBase(BaseIndex.C),
|
||||
counts.countOfBase(BaseIndex.G), counts.countOfBase(BaseIndex.T)};
|
||||
final BaseCounts countsFromArray = BaseCounts.createWithCounts(countsArray);
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
|
||||
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
|
||||
|
||||
// test addition
|
||||
counts.add(countsFromArray);
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), 2 * countsFromArray.countOfBase(BaseIndex.A));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.C), 2 * countsFromArray.countOfBase(BaseIndex.C));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.G), 2 * countsFromArray.countOfBase(BaseIndex.G));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.T), 2 * countsFromArray.countOfBase(BaseIndex.T));
|
||||
Assert.assertEquals(ACGTcounts(counts), 2 * countsFromArray.totalCount());
|
||||
|
||||
// test subtraction
|
||||
counts.sub(countsFromArray);
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.C), countsFromArray.countOfBase(BaseIndex.C));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.G), countsFromArray.countOfBase(BaseIndex.G));
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.T), countsFromArray.countOfBase(BaseIndex.T));
|
||||
Assert.assertEquals(ACGTcounts(counts), countsFromArray.totalCount());
|
||||
|
||||
// test decrementing
|
||||
if ( counts.countOfBase(BaseIndex.A) > 0 ) {
|
||||
counts.decr((byte)'A');
|
||||
Assert.assertEquals(counts.countOfBase(BaseIndex.A), countsFromArray.countOfBase(BaseIndex.A) - 1);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
private static int ACGTcounts(final BaseCounts baseCounts) {
|
||||
return baseCounts.totalCountWithoutIndels() - baseCounts.countOfBase(BaseIndex.N);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -47,6 +47,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
|
|
@ -258,24 +261,48 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
|
||||
// then add the permuted reads
|
||||
for ( final GenomeLoc loc : locs )
|
||||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality));
|
||||
myReads.add(createVariantRead(loc, readsShouldBeLowQuality, variantBaseShouldBeLowQuality, CigarOperator.M));
|
||||
}
|
||||
|
||||
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality, final boolean variantBaseShouldBeLowQuality) {
|
||||
private ConsensusCreationTest(final List<GenomeLoc> locs, final CigarOperator operator, final int expectedNumberOfReads) {
|
||||
this.expectedNumberOfReads = expectedNumberOfReads;
|
||||
|
||||
// first, add the basic reads to the collection
|
||||
myReads.addAll(basicReads);
|
||||
|
||||
// then add the permuted reads
|
||||
for ( final GenomeLoc loc : locs )
|
||||
myReads.add(createVariantRead(loc, false, false, operator));
|
||||
}
|
||||
|
||||
private GATKSAMRecord createVariantRead(final GenomeLoc loc, final boolean readShouldBeLowQuality,
|
||||
final boolean variantBaseShouldBeLowQuality, final CigarOperator operator) {
|
||||
|
||||
final int startPos = loc.getStart() - 50;
|
||||
|
||||
final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead" + startPos, 0, startPos, readLength);
|
||||
|
||||
final byte[] bases = Utils.dupBytes((byte) 'A', readLength);
|
||||
// create a mismatch
|
||||
bases[50] = 'C';
|
||||
// create a mismatch if requested
|
||||
if ( operator == CigarOperator.M )
|
||||
bases[50] = 'C';
|
||||
read.setReadBases(bases);
|
||||
|
||||
final byte[] baseQuals = Utils.dupBytes((byte) 30, readLength);
|
||||
if ( variantBaseShouldBeLowQuality )
|
||||
baseQuals[50] = (byte)10;
|
||||
read.setBaseQualities(baseQuals);
|
||||
final byte mappingQual = readShouldBeLowQuality ? (byte)10 : (byte)30;
|
||||
read.setMappingQuality(mappingQual);
|
||||
|
||||
if ( operator != CigarOperator.M ) {
|
||||
final List<CigarElement> elements = new ArrayList<CigarElement>(3);
|
||||
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 51, CigarOperator.M));
|
||||
elements.add(new CigarElement(1, operator));
|
||||
elements.add(new CigarElement(operator == CigarOperator.D ? 50 : 48, CigarOperator.M));
|
||||
read.setCigar(new Cigar(elements));
|
||||
}
|
||||
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -315,6 +342,16 @@ public class SlidingWindowUnitTest extends BaseTest {
|
|||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), true, false, 2)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc1100), false, true, 3)});
|
||||
|
||||
// test I/D operators
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.D, 9)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.D, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.D, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.D, 11)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290), CigarOperator.I, 9)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc295), CigarOperator.I, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc309), CigarOperator.I, 10)});
|
||||
tests.add(new Object[]{new ConsensusCreationTest(Arrays.<GenomeLoc>asList(loc290, loc310), CigarOperator.I, 11)});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue