From d804bdf2101f998311e449306fb3522dddf0ef9a Mon Sep 17 00:00:00 2001 From: asivache Date: Thu, 11 Mar 2010 21:15:03 +0000 Subject: [PATCH] New option: --maxReadsInRam . When using ON_DISK sorting option, the tool may still run out of memory in the regions of pathologically deep coverage because of the generous memory usage limit set in the underlying samtools' sorting sam writers. With this option, the user can lower the number of reads the writer keeps in memory before spilling them on disk. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2985 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/walkers/indels/IndelRealigner.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 77208e8d0..170524f5b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -69,9 +69,16 @@ public class IndelRealigner extends ReadWalker { @Argument(fullName="maxReadsForConsensuses", shortName="greedy", doc="max reads used for finding the alternate consensuses (necessary to improve performance in deep coverage)", required=false) protected int MAX_READS_FOR_CONSENSUSES = 120; - @Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment", required=false) + @Argument(fullName="maxReadsForRealignment", shortName="maxReads", doc="max reads allowed at an interval for realignment; "+ + "if this value is exceeded, realignment is not attempted and the reads are passed to the output file(s) as-is", required=false) protected int MAX_READS = 20000; + @Argument(fullName="maxReadsInRam", shortName="maxInRam", doc="max reads allowed to be kept in memory at a time "+ + "when using ON_DISK sorting option. If too low, the tool may run out of system file descriptors needed to perform sorting; "+ + "if too high, the tool may run out of memory in the regions of unusually deep coverage (consider also increasing VM heap size if this happens)", + required=false) + protected int MAX_RECORDS_IN_RAM = 500000; + @Argument(fullName="writerWindowSize", shortName="writerWindowSize", doc="the window over which the writer will store reads when --sortInMemory is enabled", required=false) protected int SORTING_WRITER_WINDOW = 100; @@ -126,6 +133,7 @@ public class IndelRealigner extends ReadWalker { writers = new HashMap(); Map> readGroupMap = getToolkit().getFileToReadGroupIdMapping(); SAMFileWriterFactory factory = new SAMFileWriterFactory(); + factory.setMaxRecordsInRam(MAX_RECORDS_IN_RAM); if ( NWAY_OUTPUT ) { List ids = getToolkit().getDataSource().getReaderIDs();