Clear ReduceReads name cache after each set of reads produced by ReduceReadsStash.

Name cache was filling up with names of all reads in entire file, which for large file eventually
consumes all of memory.  Only keep read name cache for the reads that are together in one variant
region, so that a pair of reads within the same variant region will still be joined via read name.
Otherwise the ability to connect a read to its mate is lost.

Update MD5s in integration test to reflect altered output.
Add new integration test that confirms that pair within variant region is joined by read name.
This commit is contained in:
Alec Wysoker 2013-03-19 14:03:40 -04:00
parent c813259283
commit bccc9d79e5
2 changed files with 25 additions and 7 deletions

View File

@ -408,6 +408,13 @@ public class ReduceReads extends ReadWalker<ObjectArrayList<GATKSAMRecord>, Redu
for (GATKSAMRecord compressedRead : stash.compress(readReady))
outputRead(compressedRead);
// We only care about maintaining the link between read pairs if they are in the same variant
// region. Since an entire variant region's worth of reads is returned in a single call to
// stash.compress(), the readNameHash can be cleared after the for() loop above.
// The advantage of clearing the hash is that otherwise it holds all reads that have been encountered,
// which can use a lot of memory and cause RR to slow to a crawl and/or run out of memory.
readNameHash.clear();
}
} else
stash.add(read);

View File

@ -64,6 +64,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam";
final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam";
private void RRTest(String testName, String args, String md5) {
@ -74,29 +75,29 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testDefaultCompression() {
RRTest("testDefaultCompression ", L, "17908e8515217c4693d303ed68108ccc");
RRTest("testDefaultCompression ", L, "16d97a47b8dbfae4ea64fbdf522b693c");
}
@Test(enabled = true)
public void testInsertionsAtEdgeOfConsensus() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s ";
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("3103667fc68c3136a8cfa8e22429f94e")));
executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("f7a9a27c5eaf791b67a768fff960a9e1")));
}
@Test(enabled = true)
public void testMultipleIntervals() {
String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
RRTest("testMultipleIntervals ", intervals, "497c5e36c2beaad2fcdbd02a0b9c121b");
RRTest("testMultipleIntervals ", intervals, "8886ba383e21883241b386882e8e5063");
}
@Test(enabled = true)
public void testHighCompression() {
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "0ff4142e4d7b6a9a9c76012246ad9e2d");
RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "54253f25d363852a1182aff33e500b92");
}
@Test(enabled = true)
public void testLowCompression() {
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7890a37444a0e05b902f63a83238ce37");
RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "1d7d2d28900db57dad65a8beef64b8cb");
}
@Test(enabled = true)
@ -137,7 +138,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testCoReduction() {
String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("13c44a9afa92ae728bf55b7075cc5de3")));
executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("81312c31b9910a42bff6acb5167592ab")));
}
/**
@ -147,8 +148,18 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
@Test(enabled = true)
public void testReadOffContig() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("c57cd191dc391983131be43f6cc2e381")));
executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("b4dc66445ddf5f467f67860bed023ef8")));
}
/**
* Confirm that if both ends of pair are in same variant region, compressed names of both ends of pair are the same.
*/
@Test(enabled = true)
public void testPairedReadsInVariantRegion() {
String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) +
" -o %s --downsample_coverage 250 -dcov 50 ";
executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("9bed260b6245f5ff47db8541405504aa")));
}
}