From 96344c6b62df84d4727bbbd8307d03f88da0f0c1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 31 Oct 2012 12:35:45 -0400 Subject: [PATCH 01/26] Add note to realigner docs --- .../sting/gatk/walkers/indels/IndelRealigner.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 998894fbf..8f2528e23 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -236,6 +236,8 @@ public class IndelRealigner extends ReadWalker { * then extensions (".bam" or ".sam") will be stripped from the input file names and the provided string value will be pasted on instead; 2) if the * value ends with a '.map' (e.g. input_output.map), then the two-column tab-separated file with the specified name must exist and list unique output * file name (2nd column) for each input file name (1st column). + * + * Note that some GATK arguments do NOT work in conjunction with nWayOut (e.g. --disable_bam_indexing). */ @Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file") protected String N_WAY_OUT = null; From f8af8a2355fd70bd0b07694d62081826f317e08c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 31 Oct 2012 21:28:07 -0400 Subject: [PATCH 02/26] Moving UG integration tests to protected since they use protected-only contamination filtering. Adding a new UGLite integration test to confirm that contamination filtering is ignored in lite. --- .../UnifiedGenotyperIntegrationTest.java | 0 .../UnifiedGenotyperLiteIntegrationTest.java | 35 +++++++++++++++++++ 2 files changed, 35 insertions(+) rename {public => protected}/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java (100%) create mode 100755 public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java new file mode 100755 index 000000000..f9a921a86 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java @@ -0,0 +1,35 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +// ********************************************************************************** // +// Note that this class also serves as an integration test for the VariantAnnotator! // +// ********************************************************************************** // + +public class UnifiedGenotyperLiteIntegrationTest extends WalkerTest { + + private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + + // -------------------------------------------------------------------------------------------------------------- + // + // testing contamination down-sampling gets ignored + // + // -------------------------------------------------------------------------------------------------------------- + + @Test + public void testContaminationDownsampling() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, + Arrays.asList("9addd225a985178339a0c49dc5fdc220")); + executeTest("test contamination_percentage_to_filter gets ignored", spec); + } + +} From 47a0f5859e48a64a5d466f3370ee6d69126dbf88 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 31 Oct 2012 22:56:38 -0400 Subject: [PATCH 03/26] Don't run these tests if not GAKT lite --- .../genotyper/UnifiedGenotyperLiteIntegrationTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java index f9a921a86..783a8d7fc 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLiteIntegrationTest.java @@ -1,14 +1,11 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.classloader.GATKLiteUtils; +import org.testng.SkipException; import org.testng.annotations.Test; -import java.io.File; import java.util.Arrays; -import java.util.Collections; -import java.util.List; // ********************************************************************************** // // Note that this class also serves as an integration test for the VariantAnnotator! // @@ -26,6 +23,9 @@ public class UnifiedGenotyperLiteIntegrationTest extends WalkerTest { @Test public void testContaminationDownsampling() { + if ( !GATKLiteUtils.isGATKLite() ) + throw new SkipException("Only want to test for GATK lite"); + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, Arrays.asList("9addd225a985178339a0c49dc5fdc220")); From 9cd04c335c9f5662bb02d1581ec2ad970f7abbd7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 29 Oct 2012 19:06:05 -0400 Subject: [PATCH 04/26] Work on GSA-508 / CachingIndexedFastaReader should internally upper case bases loading data -- As one might expect, CachingIndexedFastaSequenceFile now internally upper cases the FASTA reference bases. This is now done by default, unless requested explicitly to preserve the original bases. -- This is really the correct place to do this for a variety of reasons. First, you don't need to work about upper casing bases throughout the code. Second, the cache is only upper cased once, no matter how often the bases are accessed, which walkers cannot optimize themselves. Finally, this uses the fastest function for this -- Picard's toUpperCase(byte[]) which is way better than String.toUpperCase() -- Added unit tests to ensure this functionality works correct. -- Removing unnecessary upper casing of bases in some core GATK tools, now that RefContext guarentees that the reference bases are all upper case. -- Added contracts to ensure this is the case. -- Remove a ton of sh*t from BaseUtils that was so old I had no idea what it was doing any longer, and didn't have any unit tests to ensure it was correct, and wasn't used anywhere in our code --- .../sting/gatk/contexts/ReferenceContext.java | 14 +- .../gatk/walkers/indels/IndelRealigner.java | 8 +- .../broadinstitute/sting/utils/BaseUtils.java | 215 +----------------- .../CachingIndexedFastaSequenceFile.java | 140 +++++++++--- ...chingIndexedFastaSequenceFileUnitTest.java | 65 +++++- 5 files changed, 185 insertions(+), 257 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index af330bba9..c8bf1e3e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.contexts; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import net.sf.samtools.util.StringUtil; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -39,10 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; * @author hanna * @version 0.1 */ - public class ReferenceContext { - final public static boolean UPPERCASE_REFERENCE = true; - /** * Facilitates creation of new GenomeLocs. */ @@ -59,7 +55,8 @@ public class ReferenceContext { final private GenomeLoc window; /** - * The bases in the window around the current locus. If null, then bases haven't been fetched yet + * The bases in the window around the current locus. If null, then bases haven't been fetched yet. + * Bases are always upper cased */ private byte[] basesCache = null; @@ -81,7 +78,7 @@ public class ReferenceContext { * * @return */ - @Ensures("result != null") + @Ensures({"result != null", "BaseUtils.isUpperCase(result)"}) public byte[] getBases(); } @@ -146,7 +143,6 @@ public class ReferenceContext { private void fetchBasesFromProvider() { if ( basesCache == null ) { basesCache = basesProvider.getBases(); - if (UPPERCASE_REFERENCE) StringUtil.toUpperCase(basesCache); } } @@ -176,6 +172,7 @@ public class ReferenceContext { * Get the base at the given locus. * @return The base at the given locus from the reference. */ + @Ensures("BaseUtils.isUpperCase(result)") public byte getBase() { return getBases()[(locus.getStart() - window.getStart())]; } @@ -185,7 +182,7 @@ public class ReferenceContext { * @return All bases available. If the window is of size [0,0], the array will * contain only the base at the given locus. */ - @Ensures({"result != null", "result.length > 0"}) + @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"}) public byte[] getBases() { fetchBasesFromProvider(); return basesCache; @@ -194,6 +191,7 @@ public class ReferenceContext { /** * All the bases in the window from the current base forward to the end of the window. */ + @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"}) public byte[] getForwardBases() { final byte[] bases = getBases(); final int mid = locus.getStart() - window.getStart(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java index 8f2528e23..345f79b2b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.indels; -import net.sf.picard.reference.IndexedFastaSequenceFile; +import com.google.java.contract.Requires; import net.sf.samtools.*; import net.sf.samtools.util.RuntimeIOException; import net.sf.samtools.util.SequenceUtil; @@ -276,7 +276,7 @@ public class IndelRealigner extends ReadWalker { protected String OUT_SNPS = null; // fasta reference reader to supplement the edges of the reference sequence - private IndexedFastaSequenceFile referenceReader; + private CachingIndexedFastaSequenceFile referenceReader; // the intervals input by the user private Iterator intervals = null; @@ -1603,7 +1603,8 @@ public class IndelRealigner extends ReadWalker { public List getReads() { return reads; } - public byte[] getReference(IndexedFastaSequenceFile referenceReader) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) { // set up the reference if we haven't done so yet if ( reference == null ) { // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read) @@ -1611,7 +1612,6 @@ public class IndelRealigner extends ReadWalker { int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength()); loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight); reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); - StringUtil.toUpperCase(reference); } return reference; diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 69920ece4..53a49d8b2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -24,33 +24,6 @@ public class BaseUtils { public final static byte[] BASES = {'A', 'C', 'G', 'T'}; public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'}; - public enum Base { - A('A', 0), - C('C', 1), - G('G', 2), - T('T', 3); - - byte b; - int index; - - private Base(char base, int index) { - this.b = (byte) base; - this.index = index; - } - - public byte getBase() { return b; } - - public char getBaseAsChar() { return (char) b; } - - public int getIndex() { return index; } - - public boolean sameBase(byte o) { return b == o; } - - public boolean sameBase(char o) { return b == (byte) o; } - - public boolean sameBase(int i) { return index == i; } - } - static private final int[] baseIndexMap = new int[256]; static { Arrays.fill(baseIndexMap, -1); @@ -130,6 +103,17 @@ public class BaseUtils { return false; } + public static boolean isUpperCase(final byte[] bases) { + for ( byte base : bases ) + if ( ! isUpperCase(base) ) + return false; + return true; + } + + public static boolean isUpperCase(final byte base) { + return base >= 'A' && base <= 'Z'; + } + /** * Converts a IUPAC nucleotide code to a pair of bases * @@ -271,59 +255,6 @@ public class BaseUtils { } } - /** - * Converts a base index to a base index representing its cross-talk partner - * - * @param baseIndex 0, 1, 2, 3 - * @return 1, 0, 3, 2, or -1 if the index can't be understood - */ - static public int crossTalkPartnerIndex(int baseIndex) { - switch (baseIndex) { - case 0: - return 1; // A -> C - case 1: - return 0; // C -> A - case 2: - return 3; // G -> T - case 3: - return 2; // T -> G - default: - return -1; - } - } - - /** - * Converts a base to the base representing its cross-talk partner - * - * @param base [AaCcGgTt] - * @return C, A, T, G, or '.' if the base can't be understood - */ - @Deprecated - static public char crossTalkPartnerBase(char base) { - return (char) baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base))); - } - - /** - * Return the complement of a base index. - * - * @param baseIndex the base index (0:A, 1:C, 2:G, 3:T) - * @return the complementary base index - */ - static public byte complementIndex(int baseIndex) { - switch (baseIndex) { - case 0: - return 3; // a -> t - case 1: - return 2; // c -> g - case 2: - return 1; // g -> c - case 3: - return 0; // t -> a - default: - return -1; // wtf? - } - } - /** * Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base). * @@ -350,7 +281,7 @@ public class BaseUtils { } @Deprecated - static public char simpleComplement(char base) { + static private char simpleComplement(char base) { return (char) simpleComplement((byte) base); } @@ -370,22 +301,6 @@ public class BaseUtils { return rcbases; } - /** - * Complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form) - * - * @param bases the byte array of bases - * @return the complement of the base byte array - */ - static public byte[] simpleComplement(byte[] bases) { - byte[] rcbases = new byte[bases.length]; - - for (int i = 0; i < bases.length; i++) { - rcbases[i] = simpleComplement(bases[i]); - } - - return rcbases; - } - /** * Reverse complement a char array of bases * @@ -403,23 +318,6 @@ public class BaseUtils { return rcbases; } - /** - * Complement a char array of bases - * - * @param bases the char array of bases - * @return the complement of the base char array - */ - @Deprecated - static public char[] simpleComplement(char[] bases) { - char[] rcbases = new char[bases.length]; - - for (int i = 0; i < bases.length; i++) { - rcbases[i] = simpleComplement(bases[i]); - } - - return rcbases; - } - /** * Reverse complement a String of bases. Preserves ambiguous bases. * @@ -431,17 +329,6 @@ public class BaseUtils { return new String(simpleReverseComplement(bases.getBytes())); } - /** - * Complement a String of bases. Preserves ambiguous bases. - * - * @param bases the String of bases - * @return the complement of the String - */ - @Deprecated - static public String simpleComplement(String bases) { - return new String(simpleComplement(bases.getBytes())); - } - /** * Returns the uppercased version of the bases * @@ -543,82 +430,4 @@ public class BaseUtils { return randomBaseIndex; } - - /** - * Return a random base (A, C, G, T). - * - * @return a random base (A, C, G, T) - */ - @Deprecated - static public byte getRandomBase() { - return getRandomBase('.'); - } - - /** - * Return a random base, excluding some base. - * - * @param excludeBase the base to exclude - * @return a random base, excluding the one specified (A, C, G, T) - */ - @Deprecated - static public byte getRandomBase(char excludeBase) { - return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase))); - } - - /** - * Computes the smallest period >= minPeriod for the specified string. The period is defined as such p, - * that for all i = 0... seq.length-1, seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p). - * The sequence does not have to contain whole number of periods. For instance, "ACACACAC" has a period - * of 2 (it has a period of 4 as well), and so does - * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is - * the length of the string itself, and it will always be returned if no smaller period can be found in the specified period range - * or if specified minPeriod is greater than the sequence length. - * - * @param seq - * @return - */ - public static int sequencePeriod(byte[] seq, int minPeriod) { - int period = (minPeriod > seq.length ? seq.length : minPeriod); - // we assume that bases [0,period-1] repeat themselves and check this assumption - // until we find correct period - - for (int pos = period; pos < seq.length; pos++) { - - int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period' - // if our current hypothesis holds, base[pos] must be the same as base[offset] - - if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) { - - // period we have been trying so far does not work. - // two possibilities: - // A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not; - // in this case only bases from start up to the current one, inclusive, may form a repeat, if at all; - // so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance - // pos will be autoincremented and we will be checking next base - // B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one? - // hence we should first check if it matches the first base of the sequence, and to do that - // we set period to pos (thus trying the hypothesis that bases from start up to the current one, - // non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base - // on the next loop re-entrance after pos is autoincremented) - if (offset == 0) - period = pos + 1; - else - period = pos--; - - } - } - return period; - } } - -/* code snippet for testing sequencePeriod(): - * - * String str = "CCTTG"; - int p = 0; - System.out.print("Periods of " + str +" are:"); - while ( p < str.length() ) { - p = sequencePeriod(str, p+1); - System.out.print(" "+p); - } - System.out.println(); System.exit(1); -*/ diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java index db54851dd..0e8a3ea70 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java +++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java @@ -29,6 +29,7 @@ import net.sf.picard.reference.FastaSequenceIndex; import net.sf.picard.reference.IndexedFastaSequenceFile; import net.sf.picard.reference.ReferenceSequence; import net.sf.samtools.SAMSequenceRecord; +import net.sf.samtools.util.StringUtil; import org.apache.log4j.Priority; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -40,6 +41,8 @@ import java.util.Arrays; * A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer. * * Thread-safe! Uses a thread-local cache + * + * Automatically upper-cases the bases coming in, unless they the flag preserveCase is explicitly set */ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class); @@ -54,10 +57,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { public static final long DEFAULT_CACHE_SIZE = 1000000; /** The cache size of this CachingIndexedFastaSequenceFile */ - final long cacheSize; + private final long cacheSize; /** When we have a cache miss at position X, we load sequence from X - cacheMissBackup */ - final long cacheMissBackup; + private final long cacheMissBackup; + + /** + * If true, we will preserve the case of the original base in the genome, not + */ + private final boolean preserveCase; // information about checking efficiency long cacheHits = 0; @@ -84,37 +92,17 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { /** * Same as general constructor but allows one to override the default cacheSize * - * @param fasta - * @param index - * @param cacheSize + * @param fasta the file we will read our FASTA sequence from. + * @param index the index of the fasta file, used for efficient random access + * @param cacheSize the size in bp of the cache we will use for this reader + * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case */ - public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) { + public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize, final boolean preserveCase) { super(fasta, index); if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0"); this.cacheSize = cacheSize; this.cacheMissBackup = Math.max(cacheSize / 1000, 1); - } - - /** - * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. - * - * @param fasta The file to open. - * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. - * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found. - */ - public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) { - this(fasta, index, DEFAULT_CACHE_SIZE); - } - - /** - * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. - * - * Looks for a index file for fasta on disk - * - * @param fasta The file to open. - */ - public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException { - this(fasta, DEFAULT_CACHE_SIZE); + this.preserveCase = preserveCase; } /** @@ -124,12 +112,76 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { * Uses provided cacheSize instead of the default * * @param fasta The file to open. + * @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0 + * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case */ - public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException { + public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize, final boolean preserveCase ) throws FileNotFoundException { super(fasta); if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0"); this.cacheSize = cacheSize; this.cacheMissBackup = Math.max(cacheSize / 1000, 1); + this.preserveCase = preserveCase; + } + +// /** +// * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. +// * +// * @param fasta The file to open. +// * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. +// * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found. +// */ +// public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) { +// this(fasta, index, DEFAULT_CACHE_SIZE); +// } + + /** + * Same as general constructor but allows one to override the default cacheSize + * + * By default, this CachingIndexedFastaReader converts all incoming bases to upper case + * + * @param fasta the file we will read our FASTA sequence from. + * @param index the index of the fasta file, used for efficient random access + * @param cacheSize the size in bp of the cache we will use for this reader + */ + public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) { + this(fasta, index, cacheSize, false); + } + + /** + * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. + * + * Looks for a index file for fasta on disk. + * This CachingIndexedFastaReader will convert all FASTA bases to upper cases under the hood + * + * @param fasta The file to open. + */ + public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException { + this(fasta, false); + } + + /** + * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. + * + * Looks for a index file for fasta on disk + * + * @param fasta The file to open. + * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case + */ + public CachingIndexedFastaSequenceFile(final File fasta, final boolean preserveCase) throws FileNotFoundException { + this(fasta, DEFAULT_CACHE_SIZE, preserveCase); + } + + /** + * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. + * + * Looks for a index file for fasta on disk + * Uses provided cacheSize instead of the default + * + * @param fasta The file to open. + * @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0 + */ + public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException { + this(fasta, cacheSize, false); } /** @@ -168,6 +220,25 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { return cacheSize; } + /** + * Is this CachingIndexedFastaReader keeping the original case of bases in the fasta, or is + * everything being made upper case? + * + * @return true if the bases coming from this reader are in the original case in the fasta, false if they are all upper cased + */ + public boolean isPreservingCase() { + return preserveCase; + } + + /** + * Is uppercasing bases? + * + * @return true if bases coming from this CachingIndexedFastaSequenceFile are all upper cased, false if this reader are in the original case in the fasta + */ + public boolean isUppercasingBases() { + return ! isPreservingCase(); + } + /** * Gets the subsequence of the contig in the range [start,stop] * @@ -177,8 +248,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { * @param contig Contig whose subsequence to retrieve. * @param start inclusive, 1-based start of region. * @param stop inclusive, 1-based stop of region. - * @return The partial reference sequence associated with this range. + * @return The partial reference sequence associated with this range. If preserveCase is false, then + * all of the bases in the ReferenceSequence returned by this method will be upper cased. */ + @Override public ReferenceSequence getSubsequenceAt( final String contig, final long start, final long stop ) { final ReferenceSequence result; final Cache myCache = cache.get(); @@ -186,6 +259,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { if ( (stop - start) >= cacheSize ) { cacheMisses++; result = super.getSubsequenceAt(contig, start, stop); + if ( ! preserveCase ) StringUtil.toUpperCase(result.getBases()); } else { // todo -- potential optimization is to check if contig.name == contig, as this in generally will be true SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig); @@ -198,7 +272,9 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { myCache.start = Math.max(start - cacheMissBackup, 0); myCache.stop = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength()); myCache.seq = super.getSubsequenceAt(contig, myCache.start, myCache.stop); - //System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop); + + // convert all of the bases in the sequence to upper case if we aren't preserving cases + if ( ! preserveCase ) StringUtil.toUpperCase(myCache.seq.getBases()); } else { cacheHits++; } @@ -215,8 +291,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile { } } + // for debugging -- print out our efficiency if requested if ( PRINT_EFFICIENCY && (getCacheHits() + getCacheMisses()) % PRINT_FREQUENCY == 0 ) printEfficiency(Priority.INFO); + return result; } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java index 736162300..bcd846184 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java @@ -30,6 +30,7 @@ import java.util.concurrent.Executors; public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { private File simpleFasta = new File(publicTestDir + "/exampleFASTA.fasta"); private static final int STEP_SIZE = 1; + private final static boolean DEBUG = false; //private static final List QUERY_SIZES = Arrays.asList(1); private static final List QUERY_SIZES = Arrays.asList(1, 10, 100); @@ -53,9 +54,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { return cacheSizeRequested == -1 ? CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE : cacheSizeRequested; } - @Test(dataProvider = "fastas", enabled = true) + @Test(dataProvider = "fastas", enabled = true && ! DEBUG) public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException { - final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize)); + final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true); SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0); logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d", @@ -64,6 +65,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { } private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException { + Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers"); + final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta); SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0); @@ -92,10 +95,10 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { } // Tests grabbing sequences around a middle cached value. - @Test(dataProvider = "fastas", enabled = true) + @Test(dataProvider = "fastas", enabled = true && ! DEBUG) public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException { final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta); - final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize)); + final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true); SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0); @@ -123,11 +126,6 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { @DataProvider(name = "ParallelFastaTest") public Object[][] createParallelFastaTest() { List params = new ArrayList(); -// for ( int nt : Arrays.asList(1, 2, 3) ) { -// for ( int cacheSize : CACHE_SIZES ) { -// params.add(new Object[]{simpleFasta, cacheSize, 10, nt}); -// } -// } for ( File fasta : Arrays.asList(simpleFasta) ) { for ( int cacheSize : CACHE_SIZES ) { @@ -143,9 +141,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { } - @Test(dataProvider = "ParallelFastaTest", enabled = true, timeOut = 60000) + @Test(dataProvider = "ParallelFastaTest", enabled = true && ! DEBUG, timeOut = 60000) public void testCachingIndexedFastaReaderParallel(final File fasta, final int cacheSize, final int querySize, final int nt) throws FileNotFoundException, InterruptedException { - final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize)); + final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true); logger.warn(String.format("Parallel caching index fasta reader test cacheSize %d querySize %d nt %d", caching.getCacheSize(), querySize, nt)); for ( int iterations = 0; iterations < 1; iterations++ ) { @@ -163,4 +161,49 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest { executor.shutdownNow(); } } + + // make sure some bases are lower case and some are upper case + @Test(enabled = true) + public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException { + final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA)); + final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true); + final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA)); + + int nMixedCase = 0; + for ( SAMSequenceRecord contig : original.getSequenceDictionary().getSequences() ) { + nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1); + + final int step = 100; + for ( int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step ) { + testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos); + } + } + + Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file. Unexpected test state"); + } + + private int testCases(final IndexedFastaSequenceFile original, + final IndexedFastaSequenceFile casePreserving, + final IndexedFastaSequenceFile allUpper, + final String contig, final int start, final int stop ) { + final String orig = fetchBaseString(original, contig, start, stop); + final String keptCase = fetchBaseString(casePreserving, contig, start, stop); + final String upperCase = fetchBaseString(allUpper, contig, start, stop).toUpperCase(); + + final String origToUpper = orig.toUpperCase(); + if ( ! orig.equals(origToUpper) ) { + Assert.assertEquals(keptCase, orig, "Case preserving operation not equal to the original case for contig " + contig); + Assert.assertEquals(upperCase, origToUpper, "All upper case reader not equal to the uppercase of original case for contig " + contig); + return 1; + } else { + return 0; + } + } + + private String fetchBaseString(final IndexedFastaSequenceFile reader, final String contig, final int start, final int stop) { + if ( start == -1 ) + return new String(reader.getSequence(contig).getBases()); + else + return new String(reader.getSubsequenceAt(contig, start, stop).getBases()); + } } From 1444cd753bfcdb0084afdadae3e7c45d257d8f91 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 30 Oct 2012 16:58:55 -0400 Subject: [PATCH 05/26] Bugfix for GSA-647 HaplotypeCaller misses good variant because the active region doesn't trigger for an exome -- The logic for determining active regions was a bit broken in the HC when intervals were used in the system -- TraverseActiveRegions now uses the AllLocus view, since we always want to see all reference sites, not just those covered. Simplifies logic of TAR -- Non-overlapping intervals are always treated as separate objects for determing active / inactive state. This means that each exon will stand on its own when deciding if it should be active or inactive -- Misc. cleanup, docs of some TAR infrastructure to make it safer and easier to debug in the future. -- Committing the SingleExomeCalling script that I used to find this problem, and will continue to use in evaluating calling of a single exome with the HC -- Make sure to get all of the reads into the set of potentially active reads, even for genomic locations that themselves don't overlap the engine intervals but may have reads that overlap the regions -- Remove excessively expensive calls to check bases are upper cased in ReferenceContext -- Update md5s after a lot of manual review and discussion with Ryan --- .../haplotypecaller/HaplotypeCaller.java | 16 +- .../HaplotypeCallerIntegrationTest.java | 6 +- .../sting/gatk/contexts/ReferenceContext.java | 10 +- .../traversals/TraverseActiveRegions.java | 199 +++++++++--------- .../targets/FindCoveredIntervals.java | 2 +- .../utils/activeregion/ActiveRegion.java | 20 +- .../utils/activeregion/ActivityProfile.java | 54 ++++- .../activeregion/ActivityProfileResult.java | 52 ++++- .../activeregion/ActivityProfileUnitTest.java | 2 +- 9 files changed, 226 insertions(+), 135 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 5aba23faa..a185ba6af 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; -import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -41,7 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.genotyper.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; import org.broadinstitute.sting.utils.clipping.ReadClipper; @@ -212,7 +214,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem private VariantAnnotatorEngine annotationEngine; // fasta reference reader to supplement the edges of the reference sequence - private IndexedFastaSequenceFile referenceReader; + private CachingIndexedFastaSequenceFile referenceReader; // reference base padding size private static final int REFERENCE_PADDING = 900; @@ -324,15 +326,15 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { - return new ActivityProfileResult(1.0); + return new ActivityProfileResult(ref.getLocus(), 1.0); } } if( USE_ALLELES_TRIGGER ) { - return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); } - if( context == null ) { return new ActivityProfileResult(0.0); } + if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); } final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); @@ -369,7 +371,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); - return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); + return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 86f3748ce..d00f5b61d 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -69,8 +69,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { - final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939")); + final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java index c8bf1e3e8..34627b973 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java @@ -78,7 +78,7 @@ public class ReferenceContext { * * @return */ - @Ensures({"result != null", "BaseUtils.isUpperCase(result)"}) + @Ensures({"result != null"}) public byte[] getBases(); } @@ -143,6 +143,9 @@ public class ReferenceContext { private void fetchBasesFromProvider() { if ( basesCache == null ) { basesCache = basesProvider.getBases(); + + // must be an assertion that only runs when the bases are fetch to run in a reasonable amount of time + assert BaseUtils.isUpperCase(basesCache); } } @@ -172,7 +175,6 @@ public class ReferenceContext { * Get the base at the given locus. * @return The base at the given locus from the reference. */ - @Ensures("BaseUtils.isUpperCase(result)") public byte getBase() { return getBases()[(locus.getStart() - window.getStart())]; } @@ -182,7 +184,7 @@ public class ReferenceContext { * @return All bases available. If the window is of size [0,0], the array will * contain only the base at the given locus. */ - @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"}) + @Ensures({"result != null", "result.length > 0"}) public byte[] getBases() { fetchBasesFromProvider(); return basesCache; @@ -191,7 +193,7 @@ public class ReferenceContext { /** * All the bases in the window from the current base forward to the end of the window. */ - @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"}) + @Ensures({"result != null", "result.length > 0"}) public byte[] getForwardBases() { final byte[] bases = getBases(); final int mid = locus.getStart() - window.getStart(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 5d38df0f5..a2c37944a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; @@ -46,99 +45,127 @@ public class TraverseActiveRegions extends TraversalEngine activeRegions = new LinkedList(); + ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); - ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); + ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); - // We keep processing while the next reference location is within the interval - GenomeLoc prevLoc = null; - while( locusView.hasNext() ) { - final AlignmentContext locus = locusView.next(); - GenomeLoc location = locus.getLocation(); + // We keep processing while the next reference location is within the interval + GenomeLoc prevLoc = null; + while( locusView.hasNext() ) { + final AlignmentContext locus = locusView.next(); + final GenomeLoc location = locus.getLocation(); - if(prevLoc != null) { - // fill in the active / inactive labels from the stop of the previous location to the start of this location - // TODO refactor to separate function - for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) { - final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii); - if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) { - profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 )); - } - } + // Grab all the previously unseen reads from this pileup and add them to the massive read list + // Note that this must occur before we leave because we are outside the intervals because + // reads may occur outside our intervals but overlap them in the future + // TODO -- this whole HashSet logic should be changed to a linked list of reads with + // TODO -- subsequent pass over them to find the ones overlapping the active regions + for( final PileupElement p : locus.getBasePileup() ) { + final GATKSAMRecord read = p.getRead(); + if( !myReads.contains(read) ) { + myReads.add(read); } - dataProvider.getShard().getReadMetrics().incrementNumIterations(); - - // create reference context. Note that if we have a pileup of "extended events", the context will - // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). - final ReferenceContext refContext = referenceView.getReferenceContext(location); - - // Iterate forward to get all reference ordered data covering this location - final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); - - // Call the walkers isActive function for this locus and add them to the list to be integrated later - if( initialIntervals == null || initialIntervals.overlaps( location ) ) { - profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location)); - } - - // Grab all the previously unseen reads from this pileup and add them to the massive read list - for( final PileupElement p : locus.getBasePileup() ) { - final GATKSAMRecord read = p.getRead(); - if( !myReads.contains(read) ) { - myReads.add(read); - } - - // If this is the last pileup for this shard calculate the minimum alignment start so that we know - // which active regions in the work queue are now safe to process - minStart = Math.min(minStart, read.getAlignmentStart()); - } - - prevLoc = location; - - printProgress(locus.getLocation()); + // If this is the last pileup for this shard calculate the minimum alignment start so that we know + // which active regions in the work queue are now safe to process + minStart = Math.min(minStart, read.getAlignmentStart()); } - updateCumulativeMetrics(dataProvider.getShard()); + // skip this location -- it's not part of our engine intervals + // TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow + if ( outsideEngineIntervals(location) ) + continue; - // Take the individual isActive calls and integrate them into contiguous active regions and - // add these blocks of work to the work queue - // band-pass filter the list of isActive probabilities and turn into active regions - final ActivityProfile bandPassFiltered = profile.bandPassFilter(); - final List activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ); - - // add active regions to queue of regions to process - // first check if can merge active regions over shard boundaries - if( !activeRegions.isEmpty() ) { - if( !workQueue.isEmpty() ) { - final ActiveRegion last = workQueue.getLast(); - final ActiveRegion first = activeRegions.get(0); - if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { - workQueue.removeLast(); - activeRegions.remove(first); - workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); - } - } - workQueue.addAll( activeRegions ); + if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) { + // we've move across some interval boundary, restart profile + profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); } - logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + dataProvider.getShard().getReadMetrics().incrementNumIterations(); - // now go and process all of the active regions - sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig()); + // create reference context. Note that if we have a pileup of "extended events", the context will + // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). + final ReferenceContext refContext = referenceView.getReferenceContext(location); + + // Iterate forward to get all reference ordered data covering this location + final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext); + + // Call the walkers isActive function for this locus and add them to the list to be integrated later + profile.add(walkerActiveProb(walker, tracker, refContext, locus, location)); + + prevLoc = location; + + printProgress(locus.getLocation()); } + updateCumulativeMetrics(dataProvider.getShard()); + + if ( ! profile.isEmpty() ) + incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize); + + // add active regions to queue of regions to process + // first check if can merge active regions over shard boundaries + if( !activeRegions.isEmpty() ) { + if( !workQueue.isEmpty() ) { + final ActiveRegion last = workQueue.getLast(); + final ActiveRegion first = activeRegions.get(0); + if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) { + workQueue.removeLast(); + activeRegions.remove(first); + workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) ); + } + } + workQueue.addAll( activeRegions ); + } + + logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + + // now go and process all of the active regions + sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig()); + return sum; } + /** + * Is the loc outside of the intervals being requested for processing by the GATK? + * @param loc + * @return + */ + private boolean outsideEngineIntervals(final GenomeLoc loc) { + return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc); + } + + /** + * Take the individual isActive calls and integrate them into contiguous active regions and + * add these blocks of work to the work queue + * band-pass filter the list of isActive probabilities and turn into active regions + * + * @param profile + * @param activeRegions + * @param activeRegionExtension + * @param maxRegionSize + * @return + */ + private ActivityProfile incorporateActiveRegions(final ActivityProfile profile, + final List activeRegions, + final int activeRegionExtension, + final int maxRegionSize) { + if ( profile.isEmpty() ) + throw new IllegalStateException("trying to incorporate an empty active profile " + profile); + + final ActivityProfile bandPassFiltered = profile.bandPassFilter(); + activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize )); + return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() ); + } + // -------------------------------------------------------------------------------- // @@ -150,7 +177,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, final LocusShardDataProvider dataProvider ) { - final DataSource dataSource = WalkerManager.getWalkerDataSource(walker); - if( dataSource == DataSource.READS ) - return new CoveredLocusView(dataProvider); - else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers ) - return new AllLocusView(dataProvider); - else if( dataSource == DataSource.REFERENCE_ORDERED_DATA ) - return new RodLocusView(dataProvider); - else - throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource); - } - /** * Special function called in LinearMicroScheduler to empty out the work queue. * Ugly for now but will be cleaned up when we push this functionality more into the engine diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index e17c6cdb7..85b7159e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -57,7 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale - return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1)); + return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1)); } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index decc54d47..0d12d53cc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -1,11 +1,11 @@ package org.broadinstitute.sting.utils.activeregion; -import net.sf.picard.reference.IndexedFastaSequenceFile; -import net.sf.samtools.util.StringUtil; +import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.clipping.ReadClipper; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; @@ -54,27 +54,31 @@ public class ActiveRegion implements HasGenomeLocation { public ArrayList getReads() { return reads; } - public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader ) { return getActiveRegionReference(referenceReader, 0); } - public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) { return getReference( referenceReader, padding, extendedLoc ); } - public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader ) { return getFullReference(referenceReader, 0); } - public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) { + @Requires("referenceReader.isUppercasingBases()") + public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) { return getReference( referenceReader, padding, fullExtentReferenceLoc ); } - private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { + @Requires("referenceReader.isUppercasingBases()") + private byte[] getReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) { final byte[] reference = referenceReader.getSubsequenceAt( genomeLoc.getContig(), Math.max(1, genomeLoc.getStart() - padding), Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases(); - StringUtil.toUpperCase(reference); return reference; } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 73f3cc487..e96eb843d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -24,11 +24,11 @@ package org.broadinstitute.sting.utils.activeregion; +import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.Collections; @@ -45,6 +45,7 @@ public class ActivityProfile { final GenomeLocParser parser; final boolean presetRegions; GenomeLoc regionStartLoc = null; + GenomeLoc regionStopLoc = null; final List isActiveList; private static final int FILTER_SIZE = 80; private static final double[] GaussianKernel; @@ -71,19 +72,49 @@ public class ActivityProfile { this.regionStartLoc = regionStartLoc; } - public void add(final GenomeLoc loc, final ActivityProfileResult result) { - if ( loc.size() != 1 ) - throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" ); - isActiveList.add(result); - if( regionStartLoc == null ) { + @Override + public String toString() { + return "ActivityProfile{" + + "start=" + regionStartLoc + + ", stop=" + regionStopLoc + + '}'; + } + + /** + * Add the next ActivityProfileResult to this profile. + * + * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown + * + * @param result a well-formed ActivityProfileResult result to incorporate into this profile + */ + @Requires("result != null") + public void add(final ActivityProfileResult result) { + final GenomeLoc loc = result.getLoc(); + + if ( regionStartLoc == null ) { regionStartLoc = loc; + regionStopLoc = loc; + } else { + if ( regionStopLoc.getStart() != loc.getStart() - 1 ) + throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); + regionStopLoc = loc; } + + isActiveList.add(result); } public int size() { return isActiveList.size(); } + public boolean isEmpty() { + return isActiveList.isEmpty(); + } + + public boolean hasPresetRegions() { + return presetRegions; + } + /** * Band pass this ActivityProfile, producing a new profile that's band pass filtered * @return a new ActivityProfile that's the band-pass filtered version of this profile @@ -104,14 +135,21 @@ public class ActivityProfile { } iii++; } - final double[] filteredProbArray = new double[activeProbArray.length]; + + final double[] filteredProbArray; if( !presetRegions ) { + // if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray + filteredProbArray = new double[activeProbArray.length]; for( iii = 0; iii < activeProbArray.length; iii++ ) { final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); } + } else { + // otherwise we simply use the activeProbArray directly + filteredProbArray = activeProbArray; } + iii = 0; for( final double prob : filteredProbArray ) { final ActivityProfileResult result = isActiveList.get(iii++); @@ -119,6 +157,7 @@ public class ActivityProfile { result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; result.resultValue = null; } + return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); } @@ -166,6 +205,7 @@ public class ActivityProfile { private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); } + private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { if( !isActive || curEnd - curStart < maxRegionSize ) { final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java index 8dc29aa3c..273c2e785 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java @@ -1,12 +1,16 @@ package org.broadinstitute.sting.utils.activeregion; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.GenomeLoc; + /** * Created with IntelliJ IDEA. * User: rpoplin * Date: 7/27/12 */ - public class ActivityProfileResult { + private GenomeLoc loc; public double isActiveProb; public ActivityProfileResultState resultState; public Number resultValue; @@ -16,16 +20,52 @@ public class ActivityProfileResult { HIGH_QUALITY_SOFT_CLIPS } - public ActivityProfileResult( final double isActiveProb ) { - this.isActiveProb = isActiveProb; - this.resultState = ActivityProfileResultState.NONE; - this.resultValue = null; + /** + * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb + * + * @param loc the position of the result profile (for debugging purposes) + * @param isActiveProb the probability of being active (between 0 and 1) + */ + @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) + public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) { + this(loc, isActiveProb, ActivityProfileResultState.NONE, null); } - public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + /** + * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some + * information about the result state and value (TODO RYAN -- what do these mean?) + * + * @param loc the position of the result profile (for debugging purposes) + * @param isActiveProb the probability of being active (between 0 and 1) + */ + @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) + public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + // make sure the location of that activity profile is 1 + if ( loc.size() != 1 ) + throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc); + + this.loc = loc; this.isActiveProb = isActiveProb; this.resultState = resultState; this.resultValue = resultValue; } + /** + * Get the genome loc associated with the ActivityProfileResult + * @return the location of this result + */ + @Ensures("result != null") + public GenomeLoc getLoc() { + return loc; + } + + @Override + public String toString() { + return "ActivityProfileResult{" + + "loc=" + loc + + ", isActiveProb=" + isActiveProb + + ", resultState=" + resultState + + ", resultValue=" + resultValue + + '}'; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index f7c564c74..57dd19888 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -123,7 +123,7 @@ public class ActivityProfileUnitTest extends BaseTest { for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(loc, new ActivityProfileResult(p)); + profile.add(new ActivityProfileResult(loc, p)); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); From 872abddfcec844f2217fafb3ac3f4451b9cc844b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 31 Oct 2012 19:52:25 -0400 Subject: [PATCH 07/26] Add custom TestNGTestTransformer that adds a maximum test runtime of 10 minutes to all testng tests -- Closes GSA-494 / Add maximum runtime for integration tests, running them in timeout thread -- Needed to debug locking issues -- Needed to debug excessively long running integrationtests -- Added build.xml maximum runtime for all testng tests of 10 hours. We will ultimately fail the build if it goes on for more than 10 hours --- build.xml | 4 +- .../sting/TestNGTestTransformer.java | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java diff --git a/build.xml b/build.xml index c6b1afc56..7702be7e4 100644 --- a/build.xml +++ b/build.xml @@ -1174,14 +1174,16 @@ + + listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter"> diff --git a/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java b/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java new file mode 100644 index 000000000..6a1a37de9 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java @@ -0,0 +1,37 @@ +package org.broadinstitute.sting; + +import org.apache.log4j.Logger; +import org.testng.IAnnotationTransformer; +import org.testng.annotations.ITestAnnotation; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; + +/** + * Provide default @Test values for GATK testng tests. + * + * Currently only sets the maximum runtime to 10 minutes, if it's not been specified. + * + * See http://beust.com/weblog/2006/10/18/annotation-transformers-in-java/ + * + * @author depristo + * @since 10/31/12 + * @version 0.1 + */ +public class TestNGTestTransformer implements IAnnotationTransformer { + public static final long DEFAULT_TIMEOUT = 1000 * 60 * 10; // 10 minutes max per test + + final static Logger logger = Logger.getLogger(TestNGTestTransformer.class); + + public void transform(ITestAnnotation annotation, + Class testClass, + Constructor testConstructor, + Method testMethod) + { + if ( annotation.getTimeOut() == 0 ) { + logger.warn("test " + testMethod.toString() + " has no specified timeout, adding default timeout " + DEFAULT_TIMEOUT / 1000 / 60 + " minutes"); + annotation.setTimeOut(DEFAULT_TIMEOUT); + } + } +} + From 386b45e94db43e5e68b2b0fec3cac8fd97d1d6fc Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 1 Nov 2012 15:44:41 -0400 Subject: [PATCH 08/26] This VE eval module isn't useful anymore. --- .../evaluators/VariantQualityScore.java | 249 ------------------ 1 file changed, 249 deletions(-) delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java deleted file mode 100755 index 347ca56b8..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; - -import java.util.ArrayList; -import java.util.HashMap; - -/** - * @author rpoplin - * @since Apr 6, 2010 - */ - -//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score") -@Deprecated -public class VariantQualityScore { - // TODO - this should really be a stratification - -// public class VariantQualityScore extends VariantEvaluator { -// -// // a mapping from quality score histogram bin to Ti/Tv ratio -// @DataPoint(description = "the Ti/Tv ratio broken out by variant quality") -// TiTvStats titvStats = null; -// -// @DataPoint(description = "average variant quality for each allele count") -// AlleleCountStats alleleCountStats = null; -// -// static class TiTvStats extends TableType { -// final static int NUM_BINS = 20; -// final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately -// final long transitionByQuality[] = new long[NUM_BINS]; -// final long transversionByQuality[] = new long[NUM_BINS]; -// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out -// -// public Object[] getRowKeys() { -// return new String[]{"sample"}; -// } -// -// public Object[] getColumnKeys() { -// final String columnKeys[] = new String[NUM_BINS]; -// for( int iii = 0; iii < NUM_BINS; iii++ ) { -// columnKeys[iii] = "titvBin" + iii; -// } -// return columnKeys; -// } -// -// public String getCell(int x, int y) { -// return String.valueOf(titvByQuality[y]); -// } -// -// public String toString() { -// StringBuffer returnString = new StringBuffer(); -// // output the ti/tv array -// returnString.append("titvByQuality: "); -// for( int iii = 0; iii < NUM_BINS; iii++ ) { -// returnString.append(titvByQuality[iii]); -// returnString.append(" "); -// } -// return returnString.toString(); -// } -// -// public void incrValue( final double qual, final boolean isTransition ) { -// final Integer qualKey = Math.round((float) qual); -// final long numTransition = (isTransition ? 1L : 0L); -// final long numTransversion = (isTransition ? 0L : 1L); -// if( qualByIsTransition.containsKey(qualKey) ) { -// Pair transitionPair = qualByIsTransition.get(qualKey); -// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion); -// qualByIsTransition.put(qualKey, transitionPair); -// } else { -// qualByIsTransition.put(qualKey, new Pair(numTransition,numTransversion)); -// } -// } -// -// public void organizeTiTvTables() { -// for( int iii = 0; iii < NUM_BINS; iii++ ) { -// transitionByQuality[iii] = 0L; -// transversionByQuality[iii] = 0L; -// titvByQuality[iii] = 0.0; -// } -// -// int maxQual = 0; -// -// // Calculate the maximum quality score in order to normalize and histogram -// for( final Integer qual : qualByIsTransition.keySet() ) { -// if( qual > maxQual ) { -// maxQual = qual; -// } -// } -// -// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); -// -// for( final Integer qual : qualByIsTransition.keySet() ) { -// final int index = (int)Math.floor( ((double) qual) / binSize ); -// if( index >= 0 ) { // BUGBUG: why is there overflow here? -// Pair transitionPair = qualByIsTransition.get(qual); -// transitionByQuality[index] += transitionPair.getFirst(); -// transversionByQuality[index] += transitionPair.getSecond(); -// } -// } -// -// for( int iii = 0; iii < NUM_BINS; iii++ ) { -// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio -// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]); -// } else { -// titvByQuality[iii] = 0.0; -// } -// } -// -// } -// } -// -// class AlleleCountStats extends TableType { -// final HashMap> qualityListMap = new HashMap>(); -// final HashMap qualityMap = new HashMap(); -// -// public Object[] getRowKeys() { -// final int NUM_BINS = qualityListMap.keySet().size(); -// final String rowKeys[] = new String[NUM_BINS]; -// int iii = 0; -// for( final Integer key : qualityListMap.keySet() ) { -// rowKeys[iii] = "AC" + key; -// iii++; -// } -// return rowKeys; -// -// } -// -// public Object[] getColumnKeys() { -// return new String[]{"alleleCount","avgQual"}; -// } -// -// public String getCell(int x, int y) { -// int iii = 0; -// for( final Integer key : qualityListMap.keySet() ) { -// if(iii == x) { -// if(y == 0) { return String.valueOf(key); } -// else { return String.valueOf(qualityMap.get(key)); } -// } -// iii++; -// } -// return null; -// } -// -// public String toString() { -// String returnString = ""; -// // output the quality map -// returnString += "AlleleCountStats: "; -// //for( int iii = 0; iii < NUM_BINS; iii++ ) { -// // returnString += titvByQuality[iii] + " "; -// //} -// return returnString; -// } -// -// public void incrValue( final double qual, final int alleleCount ) { -// ArrayList list = qualityListMap.get(alleleCount); -// if(list==null) { list = new ArrayList(); } -// list.add(qual); -// qualityListMap.put(alleleCount, list); -// } -// -// public void organizeAlleleCountTables() { -// for( final Integer key : qualityListMap.keySet() ) { -// final ArrayList list = qualityListMap.get(key); -// double meanQual = 0.0; -// final double numQuals = (double)list.size(); -// for( Double qual : list ) { -// meanQual += qual / numQuals; -// } -// qualityMap.put(key, meanQual); -// } -// } -// } -// -// //public VariantQualityScore(VariantEvalWalker parent) { -// //super(parent); -// //} -// -// public String getName() { -// return "VariantQualityScore"; -// } -// -// public int getComparisonOrder() { -// return 1; // we only need to see each eval track -// } -// -// public String toString() { -// return getName(); -// } -// -// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { -// final String interesting = null; -// -// if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) -// if( titvStats == null ) { titvStats = new TiTvStats(); } -// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); -// -// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); } -// int alternateAlleleCount = 0; -// for (final Allele a : eval.getAlternateAlleles()) { -// alternateAlleleCount += eval.getCalledChrCount(a); -// } -// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount); -// } -// -// return interesting; // This module doesn't capture any interesting sites, so return null -// } -// -// public void finalizeEvaluation() { -// if( titvStats != null ) { -// titvStats.organizeTiTvTables(); -// } -// if( alleleCountStats != null ) { -// alleleCountStats.organizeAlleleCountTables(); -// } -// } -} \ No newline at end of file From 6185e8c43255d7213f4d4c464c71ff3b5e1a09d4 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 1 Nov 2012 17:48:58 -0400 Subject: [PATCH 09/26] Allow large-scale tests 5 hours each to run --- .../walkers/genotyper/UnifiedGenotyperLargeScaleTest.java | 6 +++--- .../gatk/walkers/indels/IndelRealignerLargeScaleTest.java | 4 ++-- .../indels/RealignerTargetCreatorLargeScaleTest.java | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java index 109088875..c5a5dcc21 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java @@ -7,7 +7,7 @@ import java.util.ArrayList; public class UnifiedGenotyperLargeScaleTest extends WalkerTest { - @Test + @Test( timeOut = 18000000 ) public void testUnifiedGenotyperWholeGenome() { WalkerTestSpec spec = new WalkerTestSpec( "-R " + hg18Reference + @@ -22,7 +22,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest { executeTest("testUnifiedGenotyperWholeGenome", spec); } - @Test + @Test( timeOut = 18000000 ) public void testUnifiedGenotyperWholeExome() { WalkerTestSpec spec = new WalkerTestSpec( "-R " + hg18Reference + @@ -37,7 +37,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest { executeTest("testUnifiedGenotyperWholeExome", spec); } - @Test + @Test( timeOut = 18000000 ) public void testUnifiedGenotyperWGParallel() { WalkerTestSpec spec = new WalkerTestSpec( "-R " + hg18Reference + diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java index 4526fc0d7..2dd5a66fd 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java @@ -6,7 +6,7 @@ import org.testng.annotations.Test; import java.util.ArrayList; public class IndelRealignerLargeScaleTest extends WalkerTest { - @Test + @Test( timeOut = 18000000 ) public void testHighCoverage() { WalkerTestSpec spec = new WalkerTestSpec( @@ -21,7 +21,7 @@ public class IndelRealignerLargeScaleTest extends WalkerTest { executeTest("testIndelRealignerHighCoverage", spec); } - @Test + @Test( timeOut = 18000000 ) public void testRealigner() { WalkerTestSpec spec1 = new WalkerTestSpec( diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java index 3203ee100..e32afd06b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java @@ -6,7 +6,7 @@ import org.testng.annotations.Test; import java.util.ArrayList; public class RealignerTargetCreatorLargeScaleTest extends WalkerTest { - @Test + @Test( timeOut = 18000000 ) public void testRealignerTargetCreator() { WalkerTestSpec spec1 = new WalkerTestSpec( From f8a0a947e34cc7f3874e6adf43175cda5b2d7e26 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 2 Nov 2012 09:09:32 -0400 Subject: [PATCH 10/26] Critical bugfix for GSA-652 / Multi-threaded VCF -> BCF writing produces invalid intermediate file that fails on merging -- New tribble library now uses 64 bit sizes. The 26K VCF has so much data that low-level tribble block indices where overflowing their int size values. This includes a to-be-committed tribble jar that fixes this problem -- See https://jira.broadinstitute.org/browse/GSA-652 -- Minor cleanup of error messages that were useful on the way to solving this monster problem --- .../utils/codecs/vcf/AbstractVCFCodec.java | 2 +- .../writer/BCF2FieldWriterManager.java | 1 - .../{tribble-110.jar => tribble-119.jar} | Bin 313966 -> 319935 bytes .../{tribble-110.xml => tribble-119.xml} | 2 +- 4 files changed, 2 insertions(+), 3 deletions(-) rename settings/repository/org.broad/{tribble-110.jar => tribble-119.jar} (78%) rename settings/repository/org.broad/{tribble-110.xml => tribble-119.xml} (79%) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 043e5e185..652f7f96f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -587,7 +587,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR); if ( nParts != genotypeParts.length ) - generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo); + generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records at " + chr + ":" + pos, lineNo); ArrayList genotypes = new ArrayList(nParts); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java index 7b8224568..9c63a69e7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java @@ -76,7 +76,6 @@ public class BCF2FieldWriterManager { if ( map.containsKey(field) ) throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders"); map.put(field, writer); - if ( logger.isDebugEnabled() ) logger.debug(writer); } // ----------------------------------------------------------------- diff --git a/settings/repository/org.broad/tribble-110.jar b/settings/repository/org.broad/tribble-119.jar similarity index 78% rename from settings/repository/org.broad/tribble-110.jar rename to settings/repository/org.broad/tribble-119.jar index f8e312ad94d7c96eba2e9093a65a2470204911fb..ab456938aba23540eb8aedbac454841f220c6bba 100644 GIT binary patch delta 34968 zcmcG%2Y6J~);GTPnUkEE$w^2iJ){y6QXvHhgqF}dp%V~EfB>Ne2nep)Q@D1N96nk(W&H@Wv;qIbT|U)-@S+U&~?g_m!EVr8=t!l0yF-VZTYHroMs`<5x1v#cy&8?nSjXntq zTvWAq$=q5&!#pR`Q{>bkBE_>u3--7Z({0I=;t`)kdMe_g(@jdIB__pF0$^TMYtT}Y zuAyZnB~p?}eWyLnA$9 zQLZSHN@xg2xzS9ANyBM`AoAQ35eXy+^9)ML_AI`=lgG0xTrNnlMP=ABGRFyu{x9=@ znx`$8S5rN=N{)1C2{P{?5fI#BrsTe-v;m%V&qd1l*NYCGX(Nk0XOiOFalkbKO1QX( z281s)`0A5v|Uil+-D31bpK z=@d%Y6iXebEn=c92$Y6+{uf8>D8o&HGFv2OH6~^wEeBs3fkJ^GU6a~m><60X#kV_p=lEJn^>3p<~dtxHS%Edh?!Y`6U z1EOsPbup-`LB%8t>PFp>q;vm!;MWtqv#Fu}i{6F(Z+i!@fj~2pO3{Vh0N>}z&bj}e z_D*%qn!;WPm!KWi6wbl$@?b{08I>@Nu3<+gyqu>p!r`UJy-2gLiOChwAac+U)%Iko zT;n7}`zFCbA?RU3i%Ez^E}jH6(yA>kUa-D4jKEuSBiWJl6zSRZ6xCE~AaWCpXok0r z#u^>I8k;7Y(^}TU=-Ll3+5pN>t_SbHEYuNpMICOM*sPMk#!3PeMp_EL;m3(cyq25%NvTy?X-3u&w+gxmvYAVAuICs(2{XX6U3`5UBj z#oQlbh`bpcTSdL_-H&cljb<7Uh?p`OiDGC)WAg{lDAi?-8?9Ep4QMq6AzL#V z!1=jHOap?+c9{aP_C=T909Nyj;@D&whg6Nmw-hF7`Ya6n83X#W=Gk{P#`9oXQ~qa- zXmwz8>VGs#fEb>>T?DfL@x*ufW6uG`bF%VbE!I<}8=i!WHd-BIcd0O`k^9&gpDnIxU zP=4u!`8H z{NgL3Py5X(Qa1YUn~H(f{Of@t>7LM85%T+o^}quoX7u&tmesfWLT%^u@P(>sG+$`W z;+aM;OLOo1F!g+B-pT^PHHm0uB{&Tu1)w#E;I5bmVSm zXmDcox^NWeb~l5%ADDmLav^$nzPkSI?%~waq*fGaP%mcYr3Ur(2?YJ9)TA(SnOLa^ zfsYzAzy!(+;*V5=27AsQ=;!(6hPU;>gEI%qu=%#g^;=q}BQY2)WP~7w%&lNJc=9q2 zrwF7(dNSPEW25k`IG9*Lx>!jLaTA4!n<-YTqG+U9bTW=&6n|%K%kTtJ;uNRXz~^~2 z2ylCW+|21CJv-ewZicMrpSVpyvX*CSAEY)&1^$Nj7gqpCWdKNg9EEvt6@sb;;-}S< zQB|Zz>5dH#P=NMC!YxsH1UU{-5dPoRS!KzNj|{9Q^9dBgifroIT@)hjrbw}lqJ@Xr zi4Cej%+vtCv1^4`t!b)jg%B;;slh9x41{bL%rvY>7D?KH&Sc>ks0bE1c!{D}*zpg7 z22q6vJ|hPs#PSb>R%igzs5C zt>-uoKQ{1VBR@6;k)(S$wAqQr7Jl5vpZ5pR1N0z=wi>j}poa|F?nHy{veU!-uCvo4 zoaIse*kPxg{IQEa9&=Kdn;s9MCulc6o-}BW6SKLOAN!oNpAOh4an%Zi%K`uX7^89T zrWH4cL`(z6CV=Y;rk%An`%beeO# z!1cdq&`a4)$V+Dpdf86roEWWFxY}pz^ePv4oI82mpw|p~-Jmx-3sVygTzgZtEf^7R z8uXSyZzIvO|CR&#JLtQodi5B{NP9d-SI^d;LuH;}_b1BY-6GaA_14p4$I>{H#?vyC zHR)aIXV7~ly-$BLL4n7ZAit2W!OTp^3IhSzq}6n*$+X)iuHV2i`(PSk&^so5Kp*nj z>?c3&AUesKfqIPR%x&HCKcWi|#*0n5NFST@5q*NMT$4WKug|#1c>3I+FHD+8^G#Yn zw|nyITd!Xo5NFbt^p#0p(^m%l-K2leKMnfEq;Kgv&%)d9&KXbNbJ-u1lyA~4+>{>~ z#J}h#gZ^#O&-5P*h)KWDubzE#(gIBSjeeIEH;cBOYu7~EFEQ|s=&~pOn(m$p#nHC$ z^oQrWH3RVe-P3h#s%-()$d5N_k)9L#GY>qlHc^X)x-eK!=*+5GXkLqI%N8uIW(~_z zd1KcDL3jOKxJ!Yv0^*PZIFG?oxQ`LRo4MLR^~O-*t?{rS?@$#UhHvK39M7E5A3qA; zmJ~ zAPV6zyMbjM6`$LbpdkUdXb#FUX$~m|h1%6YlS_K|+P@Sap|f(en2B`IXB!H*L7knaX{LG~X8m?EP2#x2 zlql!~B@KnZn(Wx{AoWIU06tRcsf{a@VcCNlDG$9Ql>al}b4Lw2y<*vw{&^&t`--l8m zkX>T1-g#tpZc6IMtC}fGENlS0B)RWtf2x~f{C&5REo2bZToCet^`LCIr_>qlZtBOP zX8q9q1dC%BmPKW63oT{-;_FW--rRzKta>W3G^|FjU@}c4DBw?E%Q%YR+I*1L&{}+H zbO&9HcO2qN!R{v`oQd}#ki{;*s59hfDfYB-ZS6P&g6NO~-^x0pby~kgixwAaa;W_tAAIGy;yU$@-PI(TU87^M%7-wfX zj$s0iL#{pQx#nz+(tT2XFTW~098@tJ8dRbC0|!hw%bZvj%MH4gMgHrYh`HW@m>cYL zqZ3%af>3PTD{lIkd^3agyU9|7~W`FOo&S&2&FM9&plww3Xww@#7(eyWL>f??I~Lm`Awe zqjuWC@12~y%b>>$dfcEVJjYHa9XS2+co7&_yI@Y$d>J-P#L8a%^-xjN%z%o4mC3tw zr|9Ns{mONogRhKopa!(tAbBA5)m&i!9@gq^&zSQy%}uZJ#CZ|qIo}Dt^VRkzS>NeI z-94qRrB!vJJtpm?8WVzeM_3=Ps=YoVIXly!eI`quCX1P@4V9QwPhJyTRLgd zvjq9@lu6IiX_H={7g18@+(kmQ#mCfYmO?r)9H|Y(QAKzjD z@hY0^xo25;GS>^4@Lfxw^a|43Ppz76(%-mAy{N>q^7WH#{%;JgU0WDlVT=AhwWfFT zl)TZoZGf`9!-SM!S>5MW&Y#(#&#cNtqpKD!shU5ns@tGUS^0w&F3)~q>v|yc&3xhU zM5KF4-&!j&Kj?PZL5cmvn-uM<}aAXT&pIv1jck;U$eZ zvWE4~>JQV;C`by?)DKbw3%Y(l+d%ScDBcSV4T9FWg#yIhY-KmzLXp`n@Z^v!WM;cU5!g&Y%4|1Y z>;?C5WJ8;EDJxsJMTU8-EMrn3#;ha8sR+xo3#`OlG4kErc=b?COSbB4Y+5g0%esP^ zS)y)#fPz)jj5Abo2Uz{qN)YKl(%0F?dc+58SBQ@U>!we2Nt-ZT`~0(TWrJz#v1h$7jM*2T(*HSt-ByD z8gcOu=s10EyuA)$iyh{*AbcYQ17#0Q-a(F7!&Xm;o$ZnK(A;el5*xUmk{|-^pajHZ z`(hv}BE}uWscjIB4+XsN>`;4wRr?`It*5r}p+R*NmId1#S_Bh7nwQd1H@ZHaWRzd#>40--}dbVjLuwg%}MtwgmXFyk=@1)$s>rCJ3-Xd;Mb<|T59KiIf5 zUcj7@X3&Ij*fxX&7K^BUh;oOEuwTiD3}?XxW8+`;K5MOwEiTQM(VeQ3)^gMu+o}0* zl|_`v$Pa_2cpZ4lma4~dLIayHF(4Z+U{0t5f%XxdjY#7twJWw;FGo(Ovmn+U znsbOU#^;0v6~sEMaI6k2@?7_RJ9k7yXEV}_wMPaqqzZj{=iuB(Gcq_bATlu0Sx=cN z>!ajY=AdrbqC(1HrBI7_3=5>###1Q+r5Eyuxn*%1uEi82aZxsz4}+ z2cmJf9#<2`boKE@E%tFB<2Xz-#|=k09S|P?Zt}E7qnXnY3PeNKlDi5)1MOV}r0oZ6 zhk!ye5pn>)SSY6*flEC9`4rfInw-#Rh0nAZEF`;hA$KUhHXx7i{NYCcsQ$K5&((E965t94u7MoQgCrnmwx|4(*p|ZWfngsW({vVv545@%9%O~m z|8YmK6{`PbyG>qTFIdTLOr;+>^#jPF9k1_lhYo~FXYrF$y zoN|6laW7SaB?84* zHSoq?jIRn8D&wtm4T345pkN&uvt-pOPJwa-AmQ`uW1+6Cs!kSj9QEYXD|$ezvBEtQ z;rO0S0foFLS+{g7FF7NDgF#;{7m4_R>+nZ z>qaOPp(Kt9^S6qUWA&y%90&D}K=}>Odk2JzNC*~5U>{kqd36ElFM(VzT=9f)pkU*H z!Q~KBCSnU>61dz|^f0*felV?*U{`0rsNMh*`UI>d8&KWMc`&Ap^ZmJ+0)a}-19)3Z zMiQL{1#XK8=|Z~|_14t{6;XiWGE9V`1V*zi6o73pgL*LIfy|E0um+TE5eX|>FM~=z zX$ve2M8z^ZN<=h6!dCwU2}>CXRY|6?jD#(ihC(?FBjQKq{|<*;H8e0EUO+yBcPRC5 zNt3-tW5nyN6kSEW#(!~&uCCjyJLItp;jqZ^*$h!0Wd$>OiKQ-8>in9G*Yt&y$G;p^ z>f%6QmkDEq>3w&k7-Luk^2)_mzF>T%*l7`Z`C_%`toC%b4*p?SA{JVrXIpiwyf|B= z%B!E$1LcW%!YzCD(jvTw+h}d#dY8`=)2wYzd9g-xvMPvOC6N;1ud5S&mf1x z5sW_;I+-%_qt*%HIcOS3F5A1sqw;&V80vAn8e$Vv+Kh>OzgjqC#Yzz>i&uz! zZ7qWe9MS--J$81L%UVBp+o7KSi3KID6pwVwXTAV&hglFah&2e5n85u!W)gQWH(CrH zb0^(pg4(v{yglUkIigrj+9_hN`-5$1+4m+fwga{lO_=5Gfs2M^Wb2X4`~*BEpFGS@ z5ZcHVCTT*z-OTn3IF;qoW`nkvbRXSs^7f)ZTlr&~3CVW53A#{c&?8*>QG<4H%1#s7 z&totunS|X$2JoZ_Wuw6J(9$H??`9EW%cs3wSXq9s#byIBcnz^zR(lmfzwi$9l)N1+ z--{GxHd`7jW|R!#l>*Y|@{?u5g6V z3h)T9kq9_49akE36j;SDs5~)8D7GB-`#4#18>nCFSUoZ@JCd0h^NLZc0>r97s4|}^ z_GK$jI(`DU1=M<|+)0NhS#dkwhEZBO2XdiFGas{HFyFF5vAUPq?j43rgtQ}+-q_Z5 zO)YeziD*p5an6Ljo4?HD&!Z;d2y|nR2|7X-vD9qf7_5|s;@Jy>Q-T5PgIVnhlSe@{Kwj`FPGd$k0&Bettf#mcrUr4BEwQP!M|>EaKaz1qWg6wf z+7|ptA)}@Ss#&SDk*ZC+Vah9M42Fc?`QwU!l|M6kSTpC0IyQb3-3!h6GG;%HR6-!ZmM)bYs?ss_r=9tI?ZC8z^7 zrvwepJxuw$`S-l6JSmdJL-MARB0C`|)_#}@cq4FLu_G31Y`$Z|7UE5 zzaC>b1oZ4*2(e@2!}JsV8?&DYL(I>ZTOH!aSyh6?&QGArOT0PsAMBO0-`e~1&p7%9 zgZCVwc}-hDezkzeub&l#qK!A}+riv;=GNWH(jWtR=k$E!3`zWKbED{YTy z-9wxUYG+j~>OFVCG^jZp<+{@%KG;Q_mB$b}2ZhQfKM?KY#nYmn9X>{EIP5BOUl6Hw z5I_?otGmy+KrVYhJll#F8Do$O`@Di8pdx1Rkl%^|-7P%Ew-sc`ECB|86-tu4HOxrkp;w~7c@*W$P?r0|v@)`;3k(||j+%H8;~ z9DXdkPHYCj>8ljZ;L6m%5LRLIxIx{LP+Z$^KNB1sR4D-H7%0nL5~=QkWV38IP2EaE zm(sDqGBB-Kss@W}cWdlcH<({T(Y7SOMwgg?CpR{mJ%+j20o>#(Tb$Rd#cYI$<#7_= zR&DU)Cf|)tbCV0Dc}6793}+xY_+b?oI@AnhJ+mnp-I@fp+jK%}IwP(~LANHMu(7p; zpgyd|MWWSdm>3qadE9jaJ|4*irI!d<@~{?RE%nFF_?P;P2ljvUJlwPSmv1fgvU8%1 zyPp&0!o3by4UNA8_{>$?iTQ+1+O(yZel!qe0-1mb;Ig zPBM&Vomfq$*a66L__5IoFky1aOLjWLiD%*RgHsA_u5JvSGuZJbkv)$R+4U!p&O4xj zycPuW=^OCZaiY&iYs+bEt+cne+}loihu#gM_vn3o{Ech)fD3&X1aFrMLEsS|^WzhP zKIKM!#%Z6!0mn{XIx*5;K}G+XJ&^Wt!rz17cXPp@e+Getf5Z8|W%%DYnf7olH059L zwsC@o?6uR+{QeJ*-!B~cl^?&cBNtqFerKZxT0Dd<@#8W-{@_Q0fjukO*vM8rwGsiF zycny+%J#2{r9$^U@~T*&>j8rH%axg08@W!`GUWI-L@Sx_jwqLZds9?6Q5^pR4Pkiu zy(LbF0QwqD_B!7Y|3vtA^i=--u6QqKwy>K}-KH3jN>9FE>i`$S3f{|tUB!eQ8;=<@ zlYO7Yo3II1o7kbh$z&^y$wbnmM_I6g&xawLctZsl_P&Uz2ok0tf=vOtkSRh%n4qZs z{YuA-9o28j(0;=Qj2Sp(^q9U=`t}=V3YRc}r^EV{4xi%BGOl#gpwi)e`k5k}6NCNn zeMXEOK8Artc-9Sz2rxw?jhEj~vqj3hF|gvCIIJ7dCahyPMuEL_Q^5RS2xNuU(=qy*(*dje86T{@*Iz3Si-6h(}x@nm0V;_oQSCQyqimvb#6Gft% zDY}aun?4dzuAZWo3B5Sm6s4lKDf)<{`W{AGB;#Zt+xoFec zd3Sy!KGS5#XwC4fniVBIpNJ)%19Q?mQ$}=^ql&Z?nfWPNdtoU|f90QwHIqjIXW2f2 zgG)UBcoI)M;@BxbgVIloIULP6quxLp^#KGH0Lo2g8^TqIM~$-=Sa7cp19@R?}qo`)pM=)=-m37AnS6g={=q437y;f$>) z%>GJ?(MTrHzzjUuIGuwxr)ipHfo7-{f9a#3SsY69K@&V}R>>~^L(#r}S(N>uxWC&! z7aP#uqAx`DSZ=Uo7rD8y!8bzzosOKX(Qr1f*uan`vpx#$kcY9j0UBulc8e_bY2{jh zTpF)PJm*U97veB#SoozF;N}`eVdi4MAYf1gj4ogfx4@(rfWa6*x2m{X!4_^)aRiSX zHm>u(osC1cQ_X<}INQ|B+ZyY)VkV>h+fgEjv`aL?pf$fk;AZ5A)u4Y0UL4l2iP5Mx z5D8DqvaiI!Fz8fviwE<_w;{W{?`zT8{RnpLA628sqq7tE!u=0`;MRuCoahB0G`90; zSUqO~KF8gIvC~vr(kP(Pp!E=j);FMGHiJ9{ZBRZ}>-9@~G-l&(V7`ic@vwmxL%~y0 ztNR0ZQNhVT-%i0ts8hMCvzLmFvriOUp^5{LSS-*&sN2QZMN~5FX4=Uih_xrEJMUi} zhJ^}xN8)Dc<8HFx)*N_JplaGRERgfSho24KPwwHq1BYe zQeeboid4QN8!yoqgX%i`M(MvHqeY1xK?sZpPhQ-|@MfE1!4VpQkr_HX7ux%(-mb$da^PMR(6bUVj5B=bMd)$$o_X#ZrLoFiGS% zCX&2ZvVgj~&^114b%R>K+*ymjasCy^Z{>OnId}oVo$?a384tq=2OM)P2RX~#0ATA=e_*B zkiyQfD+2aR^4|X=_B2l)nr}n5AxAAjrm}4T$>61{y zRkEAVhKLGztI&pt8S*Wm^%ZkuoTd#3sDZ$;P|nb_wFyfoNL)jqVi~1~H3qa=J|$r;MSYKh$6QJ7@#{dRbEIx_3m141Ed;YOt>TXcO_9>{_0P zm-%+BCtfvnZJr|m6PpMbK3;xd*K*XDw2oN%p*PNqKCXbQmcq8!9plSj^0pw`heW_1s?a4KSXMC#Xxy=6u=CX z_eE*f1ebv+jRfs2gK$1dCPiy62d5!_I<)n6utl_$zeH;tmBsYP`|Dqy(FW>m8_$Vf zAmS7Wmy~_=5VwFCa|%6Yr&CULPcrBQgGFa}Cb9l_)<(v8ta6=RHrQj{NoV;1>3J#( z$5ZLNlf9JSEgu9u0)jIfl{mxO@LG3Z1-=WfcPB6w@ZaY*MCK^^z)2s{M+RN6)5oxl z#w&T*pwFCG$Dca@^b0;a42gL%eZ`+&J3y5G&Y%C_D*wrkZw&g@&a(1U`rZkQhQK_P zehl(CsI#jaCtGMh~bvK&7oMkhy$!-(HcV{;_V`VAt%~Jl3gSlA_Ze4+88jT?~c{lgo(B|3_NK5 z^r~fJme*84nDj!9H?^oY43UnB!DvGeH(0#Pz|*V6Yq_=n0q?e7yd9IYBq7#X5ta(~*6_yam!dOpBBoUKg>Vr)+pv#BI7y*ge~x zicRU(vm3_A5Lr-c43Q02vl##}X2Iy%MexfM)WLHyAp$3;?Q;Jcw#Zn6Q&j~)(`Hpo zn=^XBB2*%10Cy+TBVSc{wajty;tV}*(~sC_9-g9wgh6J5&eMM8wHOeH)gsrkVqcEKir$kF|pV0pfc*4Z{o^pl|j z^(dK?swHL568%jvfK?#a#rbisDF)(G45B(Vd2cr{DQMIX>;mkY(@w5S)jCBE6Q$ht z;iecN$}oBnkBW# zGjO6idES!6waK$8msTZDTQI-2dghV^OBN?jQ<$-M!Mv))?V*b$kFLBX**Ck%rkF0O zJo$ITXP6Kpo)HwDJY&J!xeKnTnw~tj635@+sGE$%nOr%4dNRyZ^OJoiOCqZc)}>}x z!!gqovqZHiW{Wwdm@BaL&ic-L9=)Do0lsogQ6nIj4Qtle>cz?P7t|&%R+z>0Eavkk z$^JFbKG`|GDtU3`HS&fJM4YVsK%^?QjVY*ZiiJX!#@eD}Mw)gcB)I9YJ8lkW)b6|= zr)lGCa#=5^*ef!%f49;z2JzW=V^-zj;Z@6O<#62=EjMIo=|&*VzhDV@O7BiJgLv>N ztLHBsQnlPOvMgV=zAU=Sht6xI^20a5NVfbSIyvnA12eL8uAbtVbf-g(%+$M1fn+)v zbe#=EPR3G%S|M!dGPCi;TjVTl1Z3fWyyfvY2cOn1NG$kxTO|YeO>BIcg!L}Vab~u+ zK3mHYZZ2^xcpsrE4Z52S86prWe=88|cBr-!RHvSR(--36U_clH&0sS0u<6ic=0Z=Y zg;;Y9Wb@?^ZZ<$Kk+8aNhIIHSQg%XB+K<$ykoOtnIEB0~An!}a`x(MtlH0ACSOZSk z7p#NTI~~=;0l*>_aX{i(Qwv1l*3i7JLF#6(>SZdl6J1?~6deGqR8~*FX0HmoHGqd} z1x84Ry&qe6hzZ2Gsui>nqS8&sF`9z^a8o-R7BJLaUT|llH#Xm<;5Qumc|#1ktpVd@ zH|X}pJSYa;1`o~|&)d%W70C|XZbPj+Lp@Lv3%`!6Y`7$1*Mx;i4O*{X#awIk-;xI% zygP>^o+NC-0S3dm#|Mj#A+h-ERIuU?VW$OO&4F!FV`oKYtpn4ix z=x))(w7+QLLkjADZ=vU}TbTA&E!_R57AA3b|Eh`DX5@~`YddIh3P~-U{LueTYe0Z~ z<2A@PUT~((!Dds&UkuoD&6*R`IAB4I0~Q1{lo+r`2h(&P+&#ccRw??*j6AKQ8+un> zIo}GAAFCgt0-h6_nqX@q@D-rTs}Sv8!*=!S5MAC-Ray#v-^MEYHdfi!S0&)9*|MQr zEbw?Udc!yidovrTw?OC87{%oKdD=KNKG2+z{tlwwLy7mXnm&Z(^COi15CY3bun>RT zth+^OZJ+>T*ZYBW?5EI)_igd!dJ@UjfTB>Du{gHks3h-B-FAr=!?3b5d}ZzD>4p4Ec*S|iBLk}#M2^mM0uM&om0^{BwZG+L)vefW-sO!Q_vfo?WRU#bP+l1= zer}x6pP|U^#iViEJ`64I?f(R3@g!z67&QC&mP z2+WRa93J=@FL1PEf~(y6ny9|&$sKnEDHf4xoGi|p2a^ix?oPF-XswgyF&s?HYqL+& zA{dhZ7YFq4+)mRF02(3^PoUst8hM09z_g>#&NT^D`_rsuOr%Hd|29 zW}u`WpDb$|BZ*pjA`Z%WYg#N4o6(2b#+j;Bv%;fNtLD+74*?{82dT0H=d1ZTA?9{% z)Yze;;WYUWU44KexhBz!-Nj*iPa;l(vwJ1pY{tlgI8gyWQ}Lb#GsbkhtMHCT>P6ts z2+9)S&CM9>zS`+q`r~7h_>eRdWf^%RZen6_&ON4Tac%o)&}$Z-`QzzhsG@*RKPAy< zgT~ltEXzjY9BAoyJ5As>dm8b3lEL1kP8!W0lbvkLwo^qAS~=C{SXxCh_!ACB)7Yi7 zimLf@b`ZKZiyw30yBWaFg#2%~6ZC05=UTw9__`~D7CEpi7W1PPhxUNgmcZJ(YPw9x z7SY+QmdtOA=?@`XP}ub9#Y+C3&+@kwXU9RIJ~Fwl$n^9v`pPLb-RgP*cOB|)WG&u( z7D4H%;(MVg7b%@nR^*5r&*Ie9((?#(9c_n}rIb7QevU9b^KQv-x405(DK=_HErH-a zuWB5$LfoHKIv*l-rMfW-&fG&6ESlMYE5b3M%BGYKzBWWRwL#TB*4Hru+d|EhV5p9R z=FhKM)CcorTU@nRUWkSiT@$1wI9RyWYoRF1e5cka%2l|2^c>vS6#;o%wc1yn z)pbNiWn@m0`7=Z~us!X+^}lbkW?(M#`IYl9jn2^vmMof9WzAn?3)qaVta6yJAZ1}Z zl=@MX}vZr&P6HTsaTd!dXYgGWpUkZ1D<;Ynj`RO;)xv z&XLqIhlO31g16YpmQHPP_AF(f2_}(&lO;a)WF4wsfk8K#5TjQZmVYu!&Fr1Ly3r|l zjdC?pPG*MB6WQlbfNM$Cv-8+m+MtwVsE2AS<;Xo+q+Rkw3irZ3cA)?pR7x!ygN|Do zcKSZpA%GL{kVQ5tosoB}7G15YoDouw*JgMM>yxGJTkNZ;4Yvr}=&D*z`?9p4uqHf} zdt>zk&s*L|dEK`nHf3@3wU`-PSuhBr4subosJ7q2C6#kA(*YTSpxntR+i;@sUmw}R z`aX=?*=!@^Oh+IYwD1BicZ z5fcidp7mxgYN%}>jhJ*iv+=bMC78$-u`&l)Nh^2p(YAxo?$k%1`Y@~yyZUh8!wFd= z=pdPv`iH|mY+PpXl)W>;@r-~d7l|68l#jK=k%~YwtuuOgsGmhlqOXvj%T>VTfPELt zLlivJ6*3GAq!qYPJ%t{E#~C<}O9R^s)2(>;E91aSc8(s3jg3g1+{1N9QBeX}B64kq zDVl|%m|{JG1su31AEJ1R`{baiEYMp%4s1euYX$pgJer!0!7HR>sGKSA*GYv2*cOUu znrgfyHCOl=Zz2soix{+4Fre~LLyda6G5UPg(1t{IZ|Qn3_7!8v zMW|M4S5(R#HGBxBKMqJugAdqX>%xVhf7t@QE$pvb&>j_ZXx0LEh;MZM*DV+Z_ipQ6 znn4hd7$cJcj#6T|O+QLW<+lBlJbHAw?*B*`J=zj|cs`+_@y>8zGfYeJHHXKUJ?#0G ztupSahB;$7fKQ&WLy-oqcr}%R?+<`ZKk!O^FLMgDZptmqJEu_l)oul6b=OW4%nNo7bTaiolT%py-o7QV_g5(n`wEH~US4D7Bv%FhZYU?xu z;tKp_Z;&}VwPJ70Ds7^$1FhA;XLWOv>K2TBSOC^}VO$-ld>TM#JJF*iw3waPBQqh< z?ebI(ijozpwQE8-Z9klLd?%$l$-l4G+6?b$(6c6X_~220iqAK9H?h^%mk&AD^W`za zOw1Y{FVayy?FKZ1@b-vLnp!L-d{Ym9y{A;#xJ;IvWz{966<{9zC+@ zIB#E|X%Rr=04}c#>*~4Y$F=4+QX58Yt2O;NC)`B&K=1&DAb-C@`wXqWc&9eP&8@!{ zv}zPqGS`HY4IqNJwhIk?04r%Q7K;N4dT%(O4nv~|CIo*k-{9#{EqR0I2(ZH-Sn~}z zHekkfz#NUi>xgZ}F-Q%7IyM1rpCbWzCDO(LvQeR&Ie7T2qI`9Zm8~4vcuO^h;3t-7 zKtle5_g$}oUO4$TO8bKy7a=uYEiO?mUcXT=ANR95I|YrZ02$`*%dUkSX}|#?hLNa21*qiqYUP!VP#xXO3vaOh62GIE1vze83f* zLq|(t1GTsX`xmhBRkcwB0zDn9(@v7ttkV*)NwR63mL7kLmojk+O|cy$8)|z!;R+Vj zWSnS`@2u0Z-NWIcFdjPRM5u$@bau^X3v}hFv?+GNQ^}4V?Vwfht(Be8p}B~I*Dz3l zk%ZA;Qe(#^!8FSk1;qft79gM14P3HxiDICoHSYl&=xb0`gSf#00Mp%UphtVa<@2lf z*W?e(#MJxbJ=&QdMJ`7>Ztr_5W`%#9nOwG&^S5cOWZ5C?tUmjI_76=A_F}yUXpZ4( zR0*4x&uquVu!$c?#kmVEJ^-Q5}`0*`2zBA~1JMVfH z(2xB6ub|G{41<2+8vxPwfAbrApegJ#Sj0YqDeN=&CjF+|1$)sYgDzw1(__Av=$-k9 zc1jbUdciR=g^L=Xbu`nJz-I3GR`imO?!p{k1%E2VW7=4qZ|ss|9@DNBuX~!_T5@?Bpm8Zpwn<>0A-+ed{0JSaFdvxBlt`sgciR340#4pe?w@du+dr*I9p)~ zT?8mEI=(F$UIo0(YKkC!m?GGx6JVPv1V>FoD}IDBun}s{m0kTJ3=yfE_TaIHy08fu zXHWyY^w@_lUL=?zQ6yoH*A&Tw{jC(Pvkk4oW-FiX_~9|F5L^t`&X`P9_%sWSv@Ag; z$dY(?SPM}uiw}TDing43KgVsOhZO6SD|Y~4OZRF~_7QLnME@sPdxY2R)e582u!9Ik z57gTZ4=lv1ls_OJgu#B|2%NGMBcZeGzEw+=m-lLyH|+Rt zvUgpO{N*XF^^j65-rm4(J}TB1IHZA$_E&EVuXZam7I`x;4A{TM|E0IHq=d!R!Q;Z~sB9H}d`apqAZ%^H~yXeq+9T)fVP* z`KmQ+7(<^`*#^6aYBvf=7B8&>oAFn~dp1@@7#5ZJj(RPj6KA&A`A&#HkEsF{&t@^+ zDqzC!b2Wj95PrU;Dixf6E<(&`_BD>wJ`jACCpd0CLYf86gSc=|Ryj)R6$i=5 z+};9Vq0a>j&&6h7f>d*&S$nf?n?uJ$EJm1FZW{?N;Q@NRZ;lwOG-H-e{n`qNrB_YzpF8Q=Lz&HCzhKNKR z_uaK`(Ph3SjF)}QGg`Hqu|_%Gfj<~v5AYnPNHDf0S~a1uQ4_Fs*t;|mH0v%<6L!{R zf~1Z(^g~lh7%8nSRg5TBsb%ZPb(Fv+-c}(7pGZ<@5sN^)a4WRXaa0-@QCWfma!I?n z8w#7PcD085A5Y05fwEyb=XUY7RWgR|9-xGd`jZq&uoRl>Yb%Re>oL51eFe7=lBQ|R zKHpgYzJw>YunUW^yc9xOp@nH{=HQur>w-OqX||w58&$1E;zN8@2<%lVFf#g>mefQ( zMfzW4)|d@)opSOqt+QAoHyqQF!4?jpXtf{9J76N#!#-z#K*g)h1;XaiNp+5~qkrQ> z^vAN|eNr9xz6Bu*C!$EdOs#+cH(us*t@qi~)gZj%1E#Ag?{Qp5^=R^Dl1HUt9%_siRZA7uhAT`Mvg;|U~btER)U?@@n|=92^SV#A~wd> zKrOWWMsXUt2XlkfJu!nE@|BZXRPRnhvoVSeuoQmduq^LqVy$(C?+(m}SZiB&o45cw zTSYKpc5!vBr($I3hF3jxM<-?avsx#2j~x_`qpz-V>?4QchkFrtNrKYxy^0NcvY8R< zYF!pXcOx+s^D+&zemVwV2IYvEa2lOOqs1KPu5)RIn1`!87tqb3hSrM3R3~a_hgjM; zC^JCVcVYe7;4+d9NyW`enGj`7ste2Iaf}5_qM&+%K=C!~b6V1m6~nH&JWfdzc$v4I zKnHKU#9k)%mEd=YLh$^RVtCY^#3rbPnyhv+op7~&CSX`j+#$ks61{i>y~NGkv(~(| zTX!w?r?IaJ|JNo`=)ztq6M80@`!Q#{0z=dusupU;E~S5t9Jy zJOYPN*zm%Z%ggSiWpYh!{Ms0zwK1NO z)3DItz>d4+o}EZ#{Z2YD7xy|rQ?P~ClkN)wy?7vq?=IxWHe6iIxwbn&aUW(e>k-cN zC_i@aC$6pDMGxTOLeB8G1JktIiN}-b&guvTT*n8lB50q3+$qX1#!*l49RLS|FaRj) z<(NYZ;4s$?;OSxqO52IUsaN^r?n1p!f9;??+$wZaax-!oJS1ht836j@2D5GABET> zbGM0(-sNZDhipKz#h&>?@|m-+)qOHUkC7QKYvH*{@q?WNPQE5)@l(Fo8he}{@wL>S z1C)sy#A?(f)P9w#E#F4{cfN}HQ=Bg|aYXi8F7zE=TaBDJ5@*nlGHjt6j>=M4IT@6Rby$KhDh!eKH2ofFsM*p#{{KMvJ8 zz?&M!($t;z5WM8@SF|zmjcFn>=wnj^^06(uaQKh7zWR!mJj*G93}Hei6(LYWMW`Xd z_`?Mq6xE3c92j9mG+0ELBASL9B1Y+=Ciq()>>lOkcgsU_WnU zuz3n?#4?QY?L=4#17c?@hu_W*T zpe=*IXDyn?EY7-m8-{(542x2o0x@H*(wYCd3U6a!wLZ3ZRx0EUK6a+#oepJmk|LU&eJwzJ&^afgPaMJy zK3Y?Outm~1trBTd5rb2(><$9SYZ~qkF{lc6G83>cjo8algMh{tjkBqb3EoNk(GJy} zLiY1A!My%=ePKQ_3V=pKNE`#J&sa5z+^^)uewoesRp{>*|HYu$psC!kLe<6Q9pjiv zG_|o~EckNAtiHAA*c`yXGRBiT=D&pv_gi9)c-XD$0j!ChfQFT;CgSfiu$~y`Zx2G2 z#nFEY8w0x!K$(RVL#PSH1(&ubcxG{*rP@Fo3Jg$FACG{V`p{uHn15^~Zsxo@8a~!7 zDT^`9+QOU+38ez`vXc8h6?=KpRcn$_O(rIlokcZdZKDu@$EMiVKpu%;(jAw`#Kb24 zhHN^t-dbO`tt+PB!?~YakSQ!Jm8(?(C8oUzxmh(D$4Uy6TXw+(BU+vU(i_7ct91&;)p1Gr-Bsnre_1HVhj}KIB^l;H*kobHo=(z~cZbBAk3T z4>dtt)C{8TgD8ADC+0+Cpx>+r&)H8gn`s7L1!6TX&WjtqeJxc=FyC0^n;|$d`lcwM znALbktUCaTCofhUb!&%9L5huBK3$d!J#$CH*&~!vo`t`6)rMj0&9cNXZlfiWuExCI z0o4unJc_$$fVdkLvUM~;;KnHYfsGmBUikTMQT<>`_yFuf+yJx-1Ql{X1S^NE+|>Cw)Zq;LR$`WTdCl<+0*7t_nG%fzz{+2Vyo?=T{I_vZ)!Lu& z8x>*m0$Hzv4r~p2fp3QSlG;@GceS;Iu*QKGyrbeLEO0AA?TjL;pd2dz0+)NKfqy02 zm1a9i=@9M5Hy>`6fYox?gMeBN32HhZ->QMuZJh&{9%d+738s=)PZwNNE8i;Zko zR;V4|%Wb*Xlw}t}>{I8AE>~sYZrFZwc_4%7pkj^40QI&ekh~HC!fl!CjorEq5zZ!ipUY!v$Gr*q;!-x)aiboFnMrpb=5jsGoH9A@2JQTIu%iIA9l>*L1Cc(c@@c&O zUTK`zSJ z$vZGB=^zzcK?8*sq#joa^JM>-2HpeXD-H^w6CNMl!~;10)AtjGf>ELoWx$c_&~``q za*p!lY#&Z3hp6xXb#!%dbv{Hzhp0b47bk$Y8^@} z!Yp(HOD_RM>6^L6zko3L3`c6&ds7P zwSF3g5Num?lzaN)pu@$lwGFrmV9DQcPQc;a_IIs8`_nb*)-`1(E+gM7-)O#T%Kr9^ zc2k&jpG0MYR%rd%rGxj{s;rP#Xtv-;1uAmR#T#D%*Wh0f-(BX(8<^yYF(*WvjDFp9}cn z>8a?{)BxoUP?KnD?JaC?TV(xtOz*BjTWF58D|K{=Enl4>TyuJI&`l`9zX9^(PFtM3 zSYgZZ_NlO~_Z54u%64ap*UjL+;VJW8#?_nWpVeL7>~pq9GE|lxFFki5F9^=8L6#!~vUo0DXO|tGgU8@E zPum>cwIX_~72n2Y!Bd&nJ*T&l3-WY__kN*2-pcCHkTCs~P%D_w zMsF2j1@rQBcc>b`dpB)ZI0ZAxzij#Ozin+c^|l4dkms>e_-%<^ZLZ>pE4cBW! z^?laGq~6sd^h?6ZuzIBalW!1j9H;-`%dnzcf7!|)%O>fatgN?B(*L8YuH1gp+hut$ z2{{XoWt%U+->((jL&q zSt0iWdX*LW>H&ScLj0z`(<+BOhzmOTq^{iZpdP0!%<$~%7@__~n|w@nSt zr6xU5mOP~Uu5cZ^6)7WfRSN!Co8&Ud46W&}wPCXsOU(bTZsmyfzTBp)e!!Re@rmc> zm4M6gFUy}5@4A%(^R3)bE}6Z}-=5NKsP%j&f32QZLj&cFNL4itH?NxaI?D1dy#=hV zx~LL+3M1qPJ9Sg#Zt-{FTIBwtD{^;l*4MI!Q0wSHh)r**EpbxJ$fH0;{w1|2@17*X zw(G6sg@=3%b5`Fq9nSvj@DDYr>0jN-%VPgPLXv~xW980z7l-*cFO)vcT|%YqU*Jy_oIupi2O4+F~NIld(8 z57b%1nfB=Pr#hl@{Ifh@tVY$X{1Jblf!ninj!Tx;`5`T+10?q%t35c{n5Wuv&9P1G z{Ow_nIRC%8l_!=oX0_uKWVkGO#E)EMj{wx^Wt`qtu6qQ-*y4}ifrj??!@;|5W&R2o zwnOi*{;6=OKkCcf_y^9SNU<(|fx4ArSF7BfyVr)uj$3`%8~-qfi*=Z$e|0N84_Mic zZV8eX{LuUVG5@8T!-@D;*pwS>Dc;N3OI7EaC15wi|1XrjDGfQLFmm1ZL7`_8itx%vU{8@=-VV{ix0DscQ4GID_6>aaQg)e%{;Q;*WV zZc44$si&)^HT_|cE0^iS*BW6Vw_U!y(?7%i+=+(m`K&P+e~cqu5iMD=3n>e~OZM=S5SRAOO}OciZ+nDE zwCP{n$|vpq?7F|9f^rYold6|J!7 z;LyhbyiH0Y@JL1Pth5ggBkh+wD@|Rldhv1H;yGnS~Tg-0gZ*?)D)=vE$(G)k>8lunBTgmRes~IWWA?*3XZbyaTe^qHT|nw zxv{dT&?cNwmxFivhqGcgTE4Q{N=L79tXg6l_OIHYCy_R@DNWt> zYV~txP1Q#(&=LL>HMIak{}HGF!zx<thSmh&J6B%cnk?{?)CV3o^kiQ#Yz6@70r3e)?kX z`3F!X|5{etV?(q2xPi5mT)P)_D_mE|neiZJi2GAdKKZ%&au&!n|5}pltF01zA7YG5 z+=yfB-S+v|SBqN{o0^Rg{a~A_rtuO*YX%xG3S`6 zRP$Th>(L_r_#hcpkJJDDYd)+YZ% zOG&B_`fSQGPoWQtDJ>5nHC0l#@|xa_+2i;mgSETSoW?0kyZ98aylw>NO_fOp{lvDz zK}3J5qO;}FgMPfcgQG7_;N02j>N0r&X{sf`AML#M6ZDjS>}u)%SGTfm2Iuc@VfG}9 zllB1?alPI_K3eYw{0v8@Y_g)23v{Gck5GVH_(=Pq;$64$*ZZ2~XEd;?xZev{8BaE* z1<3F3(E{ZiUJNn!xrG;D3xGc#=0d5BPKAGFqfL?9V!keBl}ZAe{7Cw6^k4Eg^Onm;P;Y#Jicgm3b8L3m`x#%wg*mS+9|HM_ ze=XPL=t5tCnk1VcFLXmG7Wk~LJpBwne&3@hhg06{#or)O{fs+`#7?D6iKa|D?W?az z{l8N7`>=0Q0lPeQRFCnE8bez*xG6DEepCqUmHVg0qFMLhEaU&jC|S4ECdD~htgJq! z2dQyxVM_34#k+3hdz`nzHc)rY@E$M55qRC(lte|f)#Xxn|q733KC;4X}ddD2JH1MF3^`(ax7*K%rqIW4!H)YIgN zZ{hJcVlN8KJ>es{|E&2j_XG;?uTzTwy8AUQ)JF~DsgYgJMWOrrYbknd|G?Ll_-FNY zD(_FL;}6^kF2uiaDGA-uEo}^WLY3KWEF5YoGN|sV39Ol~{AclqJvW zDQrYZR^!=ZIk{3M=^kTcg$c!1+n(nzs0A5T^a>lU_Ww2jSM2|eRc19O>=ZKSU9Ak? zrRz>P;gmmZKBq+wu+paF>7jD#bN&fG%V}H2b6Pt#vshEzfBdEu=}=Vo*K*W)PvWd4 zssnN6g;CWI9r)M2#S9#pCbvBgO9c}EdGUEvF>eOPw3Df)^$07v+i8xT#nEZ<#!=|a z@sW6(IqDlp_kE!OM**3C?OH&un#YAw<(_fK|KVwLMb+~S0NQ5O=)5>$mb`^s3`9;Vw`sv;ZuuHhph2a2gw&$Uuf7hJ?^d&g^oKF%}T3 zeNF&Pc-#jPQJ5~!&B8iezk-Dgr3nZ!O{(ko^bTO|0bwl^F9FZv0_(kh0$7-Uee)Y+ zaP6wXTCOCc0NoX;EWD{_o91MVpoyavT w(;KG(6Xl1K(1EGx|4sr!aVkVye)^rMEOKH2-mGjOUClsv3>eXmH-RQh07eh;yZ`_I delta 30841 zcmbV#31AdO^KbR+?9T3HLXr)+6W9PDkOM*>0Rn_@-w3Jn{HEQQoh5b~g!r@BQC<(Vgk(qq@7gy1K3z4!#)s z-TK%@rGsp`o5)Ete8$@)>l$S`OL6bh@yfz=-Dc*s%bk+fA-6-jg4}j_L)sU1DD<}K zQCT^s!aHEz)Hban-I4CP_S|mW+f8(J|E-0(`@JGZC8|rs?RG2mk>iqXrOrpZ6WNHX zzad^67waO5b@|JWB>JNU$N0Zbit5+TO^v9~ppI_RsFRB#sk0lGBEEFtOII%FW>7Id zba#`BN;s#;pdJSG6y%;Wb6&*=`Sd_h;QvXCu5O!>E98>{#1PS|IwGw^_>Z4S^~bhu z>_5?Dkr?7Xyr-xCLeuuP81ne9{yNToXrNO*by|;VVp0tCHYtjtP0FHdlbTYFNf~^J zp+5cxe~t5(#(Uf*Wl|nz_4Ti79H*vHke=;#?~C_8pYD(kj}S%v_nRgAA~7u@K^}&P zl#_Hc@?hk#c$!Hzq=;g&vI8Wt_LEi?Aln|&pGKyQ8spjsccNBg#~7liHO1j6-fDz< z0-n&E5-ACtBsVBa!IRG_OhsWD9yMx$R8#T>`!5al@6O@@*lWnK8?~&C9jHl^TdzZR zxI;IkQ?p=)rmDwf++lP4bmX+aFN3^30ON`Nqa)+;j`IJ%JF+I$5*=m&PZnZl17D0c zUjpM4b8`;aPp-18oEmbw0u)i#I};I6N6=GeH3F|{mk^$6LU5`stVC<9fTpl)O`2;p zoV15&COC#+xceBmE9+U(vunt#>(xSTw|aASg?n`U1G!w*upeH;9L$(;I~e_NH;ydy zaT#8*{{8E9;s|%*z{<7(045o0GF*mEbY3?GZK1WTLTg^ItNyriSDLtdD&y<*qJ?|W zI#D~yN29uKxRB?f1v_X@9n@1nL+(=TYt_MKv5r9YLqete6LT+GSrgm*M^oJ7iSeiA zmgqz!x;)=hOXRI7y#d`(y|&%JAm{n0pm7k;FP%IVplY*dc$5XHd_XIYmAdnaUxM{k zPTHzjdD&BX22)p0`z=^Hcg7#VyxFtYS$WUQzb#mJ=Ynp*)X7C!F!koOR~a$P`c7pZ}XMaXth1m(DSVcgV~@;KFw^YZzzJzqMwselSO z)zP5N1{GDGTfRhyF8=X1-qkgRx|-mPjST8$QZaQmsKlfm)YGJ1)IG@2dUL7?H8rS@ zNqwmwKcpMf-@k5eAOEbI-rkUIbI6}M+G6ism(mh!A_C*$wIT+LD+SpY@2wyk^KZI2 zdo**Zck0GnwB33C#7O|!TA$d`1CoQ(GxC}r#13=+J0P4U!3wzLkpN1_`v8qsy(>Ie{ zdonFfuO(NZolCM)UQ(Es?STXDQ%K85l|v;IkD{cHmac>mq>G8@dJ_!}!huM<;TV;3en;Ht%e> zJ%$eUyB~-rdLUlL+^;3xy{@(YsavCbf4Z>nx4Ef1-R`10XtkT}q&1v(7hl%$WgTDc zjvzm+=hQuJTsH7U^5eY`w2?M(>OO<+H)yj#54dq$-R`6G>R6Xok?#}PlMhv>1}$)#K|^1xXXH5d#Vqhnsg)G zWYQ2CYSKblWYQA8^rQX;oiXWMdXM*HPx(WQEzxf-OY;}68P)E6`aqB2wf1C6b zeJ#s3X>tCw>%IQ`yT<2?qHh??w?Zv}zxsD|Z{c^WO>q83pYyR2uPUkM+$61GSQ4qJ^mlOlB>0(T;yPu=^fyT;f;-2uqc zXdr$VvPuJ~;e0+~u>eHT(S#*JxeH~ZXf!t67%-zWl=JQ{&C1>b`ReWs!^BvB>4vtx zameg|UC5G28BM^QQ#>T*b){-B%;QSIB>Q0Ba%v)7i58QPw&X8vIT=q@Svkt4AYDM( zB`mJPps5B;GpIrcgQgQMS6zbiXXWSC6=*0xEt-M0%>SZ;Q=W-5RvOna{%qN#_gIu$ ze9$Bwy~PJ3gQMqB)#Xga6ORR)F`@)y1ZtU!iH-5!DJy&oD4DS$qRB2iVAb(tidc#i zjcK4rP{3ITillBdkLD|c3=Vc<$EsASb*V!GmvM(y*+BHhUB@ySOv3DdhO2q{3%aS9 zK_aRGOza9C5o-QI26`i!10L)?N~8F;m(m0nOMfffM9?8>IvCQ8_W^2$JUf!G@(q^r`p$hPTBdyVAZ=y%UV5G&f zl*3tjDcAq~#x&8!AGxVdo4lNZ5OY1yA!0rGH56wxYF|SgYA9vsekv%-wc6Tp0#xW< zzA2+yTL@?sIOWsBQcxe#5_tA zb07}Qr50ko8VfU#B%JQN$9U`+{}>hLOhyTljgxGR24E9Kpf1i9wB<1tlkoR*N2)z- z&2vG$d20WF$ThcZi)M|m`wpF?kKq$N;Q|R`}F$09xNw*#UTN?`lHR2hVc4_ zJrZ9%>yh`}mSo<4_o-Nw@~0l@Xl0E(GQ*Oy(M|X-9$@19-gk9R!><1GSJGb(|9M;& zwQ}cmwzeA8%8#zIHT4fZ)xvcE-0^v=%t8N>Q$xiI{#Q<2<=TtY-3LLf*5B!Lj*{l5 zT+uaYS_~2YT7-J>=bdr-ZxvC#*IZDd7PukERT;F-@YlOCh7g6s}~_-xW3op3#7Kyf%A z*cZ3KcNRkkkagnqsN>zzPX6(LsO^ zqhJIn0!X1|`k-Gl7KS{tR#6kVKV2Nbb8$Ch8D=ji#hFoZ!(n_Os* zvllwj?As_J$IiKhJ_ny`NqU?;&Vkdd$VhW+qxc+0zNfJ#u7=`^oJmF!Mv;(aY@_5H z1C0_pAuj=WN!!WE<-60I+h}l(6DK1-TVr-cd$*G-$!Wt0S?G#$Mdxm%MoGpqlv+b+ z+bIn>*~sy6Ruh#K!O5mbrbPo@Y)-T@-|D@Vybw#$q9e9p8gcdx4-@NQ29T7c7TqV+}aoEufYXti00)(6F>(G)@4vVO+hD+Jl@v zTnykjYMd^Ba^sliJdj3+u#mC{r~us#P~vyXkEO47Hs}_A}88Wk4kf*nb2Yi z9XZi1l`!HA5OkAsxfvL?H054+#hBX-}2L^fvl_Oi3}VFXbLgycwpOR*5YllBLCx(fZX z`jm{{3~RZOQ;hXv7U~A?Y3a$Vp;m0&v6!nx1CQ6E0e6@N+>mxHerKs*%%){xwlL*W z_X(qU9SJ?9np>Ke-D(b@syoe*%sf5V_YJ!pzaPTr%R9hkqT4{W{dCjHuFkSWnzDS&VZkxhn9SQ+#f!q%3?uG?T?i!XaOa6K*92 z-BYuu$c1x&f7vVsad{&d5ic71{4g`JtDvtN>)+2w{oPD8P8!7b!EWsJp?n#}=V2*d zhVx~FlSXodQ7%x$7`}|<>~VZ4SmY7G)`9V zWjbG=YR@oeX5hJaFgxC z-9*=-Qd<+d;ue!`<*b!_Sw+_xbQ=S1db`Pjo(ZjMjY)U$Wi8iO$CtZlsoZ+AEr|75 z$s*UkGcUrwX1mir^1YVwK(a`XzpNG+0W(Fc(wcj*8Z0>^2CbDX6g{zJQs6ww#I=>Y zVXa8=u^FqNYi_D!6W^xn|%Y)Dg>7w91Q!WUbMavAq+G9x|(vYm-2)5=j7KT`+ zutXui!DhB2pXj15+oMB**=WfW4j65RN(+87OvKL!H8SjfiXAOn{{q#X~_;}vusoc${>?ccFecwsC{_QmkRT*Siocx61O z#_kyn-Ms}y*Bpb&$5CI1l#Wh!LR{&D73&Njuy{pWq$s!~{vxjsJ;Wst1&m!?4C;zZ z=H1=c3RHXv;4aUHm8jUD?qG=Rz}Y~97~TIsQ&ImLnkrE=#nwe;lsz#nzV~j(AOm^n z;tk6IB=6O2&E%^7Lq1+<;tKXVBdy zyHiZaUmFaP{BW-cA#9Tgnd^QyQMmB~P^-CQtI0x`K@Xb{#2#VTkD9a{W5`=AI$|(8 z*xVjivs%1nOKGi$ob^VFQEiT;@l9d~ofRxQvxe=F&i}1xG7`ga^tO-GW?t6G6)ZkG9yg@W~+50I123ixg$w*J58fA`kKVL)9o)|npJc|Oag=TjU3ZR?$E%q3vI?XD;u^@Z9l`diaH^C5rVeOc#a1=I@^P{^DuNb5 z6oZsSF@4ZQ2QoYYEWafg2K6;Q-tS zjzVC&Zmd%TU7#P(RXM!N&%=TRX&S5iBaG_~$N+2UC!}!Dg1NwXhxKet&p^FDfEA0w z$1Mu~Zy(rOLlgi5K!%$7ZG&i_Ip0KZ5jRQ=2s^i{rGK?jbEJ zZ4cR=MiBy9#&=jqks^`CMbe z%0-6xmqC9!>5`iRHVq-%upaJ3zR)6s4M7V+kDvjLXhm0oJK1Cr znMF%TmE|+Z@PmiIKLRH4}7%`y7&|ZB^k;#RTq5NLMMh_ju zKwAllVW!!3(ZZQ?+V(H6nlYrj(iE*}GSu?<3$F5a92O-HUDTQ;1g#fK={i$niEKk8 znJ{JLTGDiMnby466m4Ma5N&C)A@WSoPUHi3rf3g)hv>kU0#Rs+j-rzxI-8^ZF%V#=N@SQ~5Z%Cs!B|_B7%Z=TR!er8hz=fV zh+*>3Zjl&QDzId1oe{&u2t$ly{EYIKFYGI$6SZ_X@e5IuH(CJEeFWT9_0TqIK}Cf^ z+c;5Xitzv=t`HMUF;R4vr@s)r#FcV>Bdu#-_?O}{O`aPC9q*3ju*;nD>goQ6M_uVZ z@J*I187b23dezLO6|$U3Xw+AtN_SpWu~44=R7;b6zZQA&-mk^&6UT$w2y8?c?3kEv zC8i?eO+`wBxIj#coXrHN?i~NYe;`A3w+xBIbckn;nSs8B>mup<1}n2z?)ye$W?X|Z zi`lnEXu}fKvbE}ynID&~h+!$B|N^Y!%$xIuB#R&0|%Ud~C%+UYUX6 z3bu+dAmruX&SjcrnQ0lSrJVGF1Qa?9wIDS3{zEN0{U4h4{;y48fa6Z(`ESK~%y;p3 zB6~DNaXO|a6ip_9kH2hT~)++v;#sW z+o9G#1DvgDsgtHf-D-sXUd@J2S5Q=cEp@NMxem@mL^XUkMu9K%TA|gU%MV>C! z6WZ2LS5G%Qg#jR~io?)RqlSv{2w#V3L|_ELXAMUEuLUdrPDryZPX$Lhg=A|uIX>_r3?e9}!e zQbCgSaES8v(jY-AJ3Eu?iO!a@(^e)r=hxEU*|jtzSf_@Dj@?ef^B+)e7 zD{!|V!gdz6$^b}a4?A3xRtO#QM`R463nJq3;GDqWpT);EW&2!S_M7;<_qmhqI2ldEKPb7s39qbtA12bX7e$o6v?wZY_a9WxjjnD!|mNDtr)k2Xl<@51q6`_6);)e8Lj20 z7qBiyKe{k*I9mJG4d>df6f}V$I7RCU;VLavTOE@RUeq3o+yOdR0d$o@xi3|lA-c%s zY1-sCw%&I~jS}dWJ#d)x1pW7t_oiucL|KE{L(%e~ zR$Bi^XXX5=nG0vmhd&bQ+`Y9{DqozaCCbg0MB^AlMnnB+bJbE6Hje0Ue~&;D*&nlLgA z=K%J2dFKnZbgAWNDKQ)^Zo;bcyFq`LusvS%Pg>SZ&d$*~*h=XyS@M$#cTdUX9{)D! z68VB!#r3e>gP-_JA=uHmjBYoDhCv9M&=H89DO=}i1&Mb09W8~!gcjyBgv%6e5dj0Y zJfEp$D8}OEa+87HB@aESb&&tc)rulZ1%_QJpeRHEZ;;5_z(r6hVoc%TK4OJ#BG4($ z6!8LzLn3Q7$s$E5B-qX#yD3tk*bUFqPWhu6HxFFbPHSiLKm2Q~Qdi`6zlj#|WC!g( z@>qe^NB&f(O_MM9^{z2}uxlV6fYvShW2AiGJ1t3G=%^XlBT-@*?i0WO36U=$>DJCp zZ#=o+xf_SpBLdAjX<5R@EtW#7Cy1Y#3bUAmonH>S(iB)Wrh#^^g3Wdo%(1gU=W_sY z9%{}<$znWR1M&VkT$kX#rI4bQ;pnY|0CF|<<^milRUp?zkf#>oFu#V5pzh14`6}vu zhV++oy-y9X5{#xd_Fx%I7CO2~hAuK5OkfoD$^>-hK%*24ZW2m2g40b_sU4UqJoCtV zHCW2J%fB&j>jeH1gRH<*b^4rrOqiYn5lSs2#L3v!2>u|3y{6?X%} zM(k^-e4lan288uCIY0z0+9=wd3g{sPk}^OHtQwVchx%h_ETONzqi^hNw1}R z9+*unS-h%r0tE0Hj<(m~DLKVlU3F;bcfGXM*!0^NqgmDhdZtA-mB?g& zl6OCKVTW1Qp}AQQP@#Sm%O1sAtQx7MqkoN--#|zIR*fplY8FO`H;fVv+FGsFAb~xf zK!KZCYp_FBfw{xh2pI3ak>YVH>5+|V5AR;;snhmL|HeZv&MDbTy?r(a*`Xy;!SOgs z40tQ}C`rK4k&Kh037kUA3wg(wU^eixqJQ)OLD!Evr`9XgoiW7$`boh3J&1g;kkAZw z{&UAtpIYi$O>wMA3MPxu;3{00h(;T5*T4b?;NA>i@*oKG2cUtt4}xSo822H#r$TPn zbGaalHsQZ?8M9AoQSZ%q6+7_5FzxqX@YHwXv_n>uRpduImVSaD!cQ8>lBcxx^1@Qh z9{72>_G2T<@ATCot%ui2w%wq;X;;3Mg2U~moQ#1KEu-((#$0)|3xu)2penY~st0)4 zZTwo=VmELJN4t%#g`1sl#(9Le9JFd7Awd9fe zwMsdBv$j!gc|l9|XW!mfo?D@%$kKbMtOpl7gO>qK zoIZ%%&X<)BYt4q@MgJE#FdN|`M>saSi;1@ddU0%a4Zl`5*u380cIv61VJ>5lM|F=^#P9^?k4JG7SVxq+qqO-6%qK%9boI#o5a z4iENJa6^vk=3a&TexEHXZTf$OZM`qoL0;IQCHv|IGYh0TyB+89I_jZ=}Hh7m>sO^iJ2jUbNEvvS5FhcVHJw7)1_ z%^DaefuzM9A|4Vywi8=RY{zoBn46E6e?O@;5#40kPOW81fLaGAtH{X+;D~ugaKtu@ zc+yTS+cyLjpfRu`l)-SqBWLSo3+xGAVjE_Vqn4OCY)i;b6A{1lY_oQiCg*90fh}@^Os$&a*Ne4jXnZrQay!aXFs}Yzy5a8HPZVo6h@MDDF zJ#txoP-`YKvXIl&kB0Z-EFd^yDzD5!6+O8O>iqi@pNocE5w zo;?q%p&okQg$mF?J#^MhAJRu|2q5S9a-OSx5<$LC>9Yt3*`IUf7j8)S|KiKP`SKO~ zTC6L7%LBtDo!x!u^gUlL@Z|@`!3&IB=8fs}BVT^v%g?qhn~ovxYx)mgesR*T2D<^% z!*0Nf^cO?^+dYjgv0{`i;Na8vZgUG=*f|ByUphR0xUi1r6mGo5Eh6~YRD(Cz1K0}~ zV<%uQ_5k)`-(NcW{yr2QL&TyB<@=MBX|^VTLx;5^n)pVYSn}Wz?RyXOJaYHBc4|c> zYtB`04$4C>X&dxJcIc@XgX@oK`Et)u?OO3}pv}wLQcbj%;v22I{MW0RHx9eaWbMpk zJqT2yUi6kLmTQT{WpurX{W!`57el2HRH__h$#f4#2hTD^ib#c*(1amtjv<))aZ@&GNIw!_NjMr05_D zOi?I08Y0mYooJdVIx~nO(b53hnS2X${4gOx`CiZCn$6+O8&M7NHZItuCSr#+2i=m7 z(v$3@!(h6>O-B=l&iBrlKkurF1>U(83$B_u?<()2O7E1J^Qye_=Xo=#yw}WJIK!JU z%{#rKeBq)66;7-+%{22?%9uc+a}1~TetEaMHwwRmS42X&Xx6#wb&sj~LC*4{Hn42H)M zivk`z-4sLNc;pyDTbU@8KfDDta^^n}ptd|K;^p-xv^V^(-{g?PUel7r07NR<5(78B zrVYhTD4z;MRkg^^%afHoL=$=FcNk%Odk`8E+uW8ZC+-!K&ku7_56P@Mq z8}$g;_;oF15CjhJR0lXEzemr!lg|#mgJ9JiG^_5QV$>b2(qmB0<_I>YBM2YdTF@18 z>FZj0dwzG`l6W@YSh)k6dL3*M_re$u5|yGMIF;jxqiEPod4)Xry4F)nkTIvU?6zEw zrF{fegzL3agJb^NE_k%DbCVCdEBvQB#AkA!mISk{UO)D5d!SeKv*VPOR=|}l8EyyU z+{aY|OB6H14V3pDO@un5c!+p+4~I~&bQJr$Y95zIh}yAa}oikudAVO zMA5u>gnp8U%YUL@6Ropf)W+%7&aZ&|6Ef z-WY5ciBCgsELB)|8Y(pel6)v0>h0g?zhKc9mslP~0I=XcU1xMzSYSW$GANdeckdak ziNd?JWbXhW@77zg$Z&6w&}Fzc_5#o1rT|xe72K8!xo#Cc_%Ev}OKf^T{O#@7)-6EC5z)+3U%zoDK(AYSzc1hkMffz1e( zwhv5-K>+Up*rgoJ+eCJLOB;~SjxcXWN4z5)q{sr0lc_L0&IuyiOA+7@@s6H|zTJN5 zOla`cZ)xp)6NHN@1){+P%!2~a=78HD$Qli8|4v116QKX|0G-eq@<3Zmi>zOS_{LJ$ z9M)odabTS5P^v*Q>7XWOQ&Y+%{SpiVu$Vyay&E(EpZ6s|P#GDVqag=sOcP!F0V=6K zXB>e{J@WRqwV8_W6f+Z1@CQZ12^0ellSd8NVp8M6gBpkJAl0CHf!&p3P)zN?(6T-l zN(mDu&Q%7F5;kP;hRL>dg+-*rUIJ+hfm>J+MgkJ4!pDR$GzC_h#*@2lzw|{4(=dnt zS8G)GP!e2WP2mb_4GDq~$;KOw{oVwMV3SFjq6m%q367LMDRdu@CW7OGHUI-UFFt=2 z4hMa3e4R+g$BxjjQ543Pv2r%9EmzLpg`9fKOf*y7TGDZ&U>AZ9!8$N?P+BS|ng4{s zK@_3$Ap}Ju1A$NsMlN|*D-ga-^CnHsR0 z#fc!G-Iyp3MO4V|-_w$OgLzf-ZPXk4Pux;VZPq_PoocDAC(k%Q?aFK@&L3H(bFErx zKeCJuB%x9cRIa5CI3x;cs8D~>R!3aQ8YjAd+`E!TbfY$+n94;7O%*-s&9efpdiof( zj7EI3B^ufop9_!>t-*={>|kbsk7O0;KwHPob{<)$Xr~%lv^UK@qn0{b!^0@~OB0#a z*utYUke5|UMI+0cNOe)EXh&!rYJFT1Z6pG4QE(=S_Ds2%+A}PD3uU8m*NxCqDV+yV zOWg+DY!hkGO>o}rr{Xe?iZR)KX`1D)w>DQBoPs3!kxjsA0p2%T45JmI6z`6Vpp{}Y ztrlbGE>T8Xgl{}OB-qno>F6%-fSp(~W>=2@Jr?0T5?n{ldkQGB;lGCvV^9vFVBFyg zAr}>Ix=71ye^4^NG7Z@=h{WkYA;WuYKc%bB*)VSGGXm6OaCTPlv;10xD;s%ke!W65 zlAd8kvWe1plKZI_LKk{_`qWTgR6#gL-x}(N;2Qbx2U>xz|8`2n`xu@wFq&9qHc2{j z9I){k>c9Qcu52b(U9p`am=8!y233@UUrm9leH!J63V6k*!}~vzCWu*hHFq}MAm-9d zVjkTgu0|}*0(>`NVHio%v2UMI?CAy=0jkM21HduAAinTP3Yh-NG5Z|I03&gpX|TCk z%)!d3K@Q`sDLMY4k&4N$y2$>ZZ6)|!Iw&%28~@9L9m`{{r|&%1FP18_C_-QzgCxeXnL)z%F= zzHhseKhc)V297pt;7C>m4!*y_7nnGXvG@HLop7VVYxq7J1A3i52nPelY3>oO_iDCr zxadtD^IHb{+uP7PcwfO_BS#y0pS}7Ya1StYw4o23VLQhJm-3-I=~Hh1nHzq^FF5t3 z8y_8jjl)G>xgqa;&23@oI7Z((`K@D^Iq)GguJZ$5e#F~yPPS~|g$lm>VkmottQ@bU z%I?qLwZZE?)^g=r@ghb(_OUildal*_%SjtVW2IvUrk>N<2{+=h*&S}spMeL?Yd;F{ zd|=(DTBR^9s;|%Omd?+$*8f!M4o}pQ<$%w%*tY5ebLzt+;5*8({uevXC-D2h@Q1^W z0@&<0Uk0DKuwAC)w=mhRVnX!#+YpAy@ziK5-0G!d6Yg^p_7v7Wn|n-w5FCTu;<2K! zDdK3kEFY*PM_GPxG)gofEJFk9o;oh6$Fy=-PZn2r=i@ea=F)ODrFiF0_nIP!qkdBm z`ivB0-Dt}suy6*90W;>$nWkze3kXIOwt(>Ho6|(LqQD2=g5TzwATkWmQkH+A#YJX{ zR(Lm0v{tdl{U!OCIdiyZl-=Dni5=wJkmDX^r7&IZR97m6-0hH$V#H@+0( zb1|Yjj1y#QmzQS*1D3g3Z;lei%ZNt1y!;M%Tn4*Eq3DOWWZI%$*bZFyQfse4wMXDZ zd-+whX3D8wX}9|0Zs=@AsJT>3<9A!+u7EAymfEVf|G`nJ)*yS z=WCdtl{xF(Mv?V88Z}ZL^6OpX)Ni$W*pd(^Ru?C-QzzBb?oIAm}xxxovEgBK++ zDsfMSm0&c=#vsi$f@Cn>GNk$S29|oGP{!va-qFVsyH;45u|qltzC6RhX_l_n568j) z#po>Q(hmMP1*L+9O@R8(K@Wt`1k03O$e;4n@y%j{ojL>!&D{%u9n!WJ38w(hb>Z~K zA<0Ku1YgXt+b?}+8QUkxcHe6WzR3W`hW6$dBnKeqxVKaR2z+i727Bieha&K>3n9$U z#z5vEZHkk>wq!~F2@dLRACvuoy@#b^BJ-e_Y zA^-nBPXmoD%gYD=8cAL(MLrf|GV*y|-tfH4dh==@nwJl&u~->ocwKmk^(V$T92gs( z7%MhBF>7uOCUy-F0Ct2cuModD$odDq1%TyYjB7ZtLW?{O1GByr=Cj0Kj$z?dW@8D* zrZ9ovdkH~c_u@D)r`2;)<~H5Qel75FWzW{MDa(TjouWK2Yo?z%`O z9Pv#!fK-QCusk?YHmq@Vm}g=kON&5;gmJ&1iS`o8F=ad&#ed8TW_caX)!|#s6`g>-3;IyVqmQizhsYBt|8^c zlFJf@>L4zt2a$c4)*b}Yv>L-q<$#<|wEr1O+DJ9{Xw`m7##mE4{LLhc6yHf|>S23E zruCV|8fuP85LNLFA&8^!sql9ieY}pD_)O!_;NhCYUx|Rv+YqjOI~0LCV6s>ZW#CS_U96!y#ag;o+)XuNJ?#}6 z6r$MUwFG|1+rUEDMd?CBMg`8~*^pQ706}v=S0_bHI=pxb5KzH!P4|*79oCO27z8jG z#4@MWU?=kjJ1q`lHJFPAOn%tyo&KU!#a$2z(h;9TMBSAtB72tQ6oAd?>xPtj{)S9oP7g5`7Q3BRCxlUhXwR`YInPWZs+0@UAw z^~Jlc@H_Hq+i(o*QT3Pyo(wPS$)L4}ml1Dn!k6g-7r6uh)KEP2UOXAJ5r!PtX24x9 z!LkS=1-LDj@G+txkN}?(uvQ9O)>3aA{4#2f%ebtlM#S^8Y7YNovYWww1=JEB0(gVK zQURR*uyM4VxU(BP#JCDkb*nN-F>P$h2isJ>`Rex)JG~u{s zr|mh2&a;FqOV|SU2-_v=`$_zDkPcQ%&MDn?V~q0be(}JT;_uP@_hfx@9{+SiV0paF zWqn&daA$(8BAEOo(H0qJH8Bu~*5{l~U1M_x=5Mn-5R7Pw*=dUl&E#p@ zb;0PR#}3-cf*(C`ylnFaC$;&sEh@t5V)t3wlwim4pV(dtBH+tUZ6CF=D!gjy*9Yqk zOw@n0KF<;;X{DcxR%p+=A${CYEauTj{@_Z6-131QEzL;X6&T-He>u_ulF?81MO(=o zWA$x8e3f3IF9?qQ=v4h_Yi$G9P1i35hjHBu{byax-*;cM{Q!0Y|1!f^l;+b6Rmu{3^&-`stgP1CSgD7qb&ICu`_+0^D=+z3eTl)qq za*pg?gOV>YRY|FC$i?B3)vBabu*8&0_JrDe#3dhRTP4=N?BMZogx%xIPxg2VE5yHy zFx3-nTX9t$*S@;%=uKXFQ$eEy|i^{0lHuHOdUz&|UJ%xbf3 z#UEE%)wj%W`Y&AXmht;SBgol@5&SYGRNPoD-=_z^>9&@Oe#Tp;Jh)T)^h|3ee{%M1 zGp!bbm&MAV`*jQXlSJqyJ1k0abef7@2X&sEj$UhMMUwJ_fQ(E^8@dptPmYo6bb z`8ZneuQ=2M_iZc2Rt8(8%C9jhwPvr+o^m1sreyxLYEXM(71u6QwHo|`8Vzc_vJ^{@ z=f8e=qP+B65N6%KnLr4rs{7lv;{D~Q-K%b~8+?bjt|G?zj#O|z-W;uJTd4Z#08q8$ zag?-`oz8?%KInPm4tXBAjqiRtBByka&>Gv^>Yra?5wLsjzp8MkbDC@s|JBA^cPLv_Q-6c+E`yPeD`pWHX3}zFjO7)Z7U|$)mL9NjF7Ef4vn|R z3mEUN#4y0PdR(o|X_Vd4ip$cnFI)R(s@6ot#=~5TU&9$4Pz_RLNqc;2vek>Bbtrxj zz>>Q&uvB@)aJ(J2DqQvem#rUem93qKcFcGb=IXvtcKIqbE3@k3gJtQe>i0t$y>fPhBjT(w72Cfa%h5CubjjMja!@`#=wqZi{9zY9l#2-@(Ho-BT@S2D2*U-zng zDMUn-6(4!P#Q4`S1Pu3WD`wB-KI0i&bH(^9ay$um+8dQ}-BCU59~;Vk>+zbuAl>t? z;YNtKnp@<^Nk@YnkC>M8`(W@P{&jB9@tEtlYCnY%Tlhn31R=BTZ`+FBuUA8m{M9wt z?Bx*6eX;x0W^~TKh6C7jyH$Vk?fAUJ@^Jk+keAWuru97i#_AI|@2&c2N4p9+@2 zd>AX6zM`k8u{QWrQ9ZcZRt%LEtjbsP3{^G!`5tB(4XVC!q^?{3cSYozuY?Hjt5?wV z{m1IcrT0}mS;1@YZIlM^l3xqfZ-v*M5>JO)OC5FGyzULn%a7OHHu=MGu$l|UbnD;3<5w|d z)8l%)=5(o|4Ia%cuR0#=z3#=x%Y}eui58@TsA|z_yAzm1llGjQEGuUq?z8elXoAbR zY+_wmjC}b-sNIKLHXffD54MYu7ZPC`&U-C5hPu}X2#RRk-?kM^28J6Y%MGY+4dlt! z0B}rMxGYZo8ZL`GiL(2a)GI@%f3%!>2%8^#e#f4Ufz!U&BU?IIPtn+k_YKTL!&# zUBg;-+3ocp=x_io$dChvLx0;=%-bApv&N&k9%Ol?j;i^vAyqufe`EkAa$9~ZwWs}n( zQc%alJ>PPL#`t%7T$dje7jCI>b_VwO*YKcyE!ZgdFBe*DyiPa0oI*!lvijia&{m_B zd~8uj_8VBL+exl?M~%TF=e`kKq!VA19Nym;W-0s{65CHARNX7rydSI{H~ZWTC&A45 z*N~y(Xi(4(4UCj6&*&|zj)$H>$C)XrNJ7bNEGOsN47o8Rxwp?by<`w14F2Iggz(?C z6<4NOjSh{lrOGYsZPs?Tphmn2sDHK$L-oolPKD4to6AbF!et2(YPC}?`!H1D7+3gL zZn#36+}aP&-;lB34?g!eE--vMC!>k{sp z&(aAi!;PY3@q<|TU*FbaEL55AFgorGSFlSc`cZO8APDt8=X<|>7>mijhN?!R4cw@| z1?Dvdlevj=noAS^)`r?ZNjLs$HEfk^ue|J_d31->hKp|7 zEq{0ynt#brEkYg)b@NT-kGY3XmwyEfX1-}3*YBkUb7Xku*W!U2{^d8QwezLmVDQZ* zdEqrQcMZ{7nAK09HRHTulvk!M5tUAI?X{5TI0 zCj4tCTOIw6RsVtsxh}kx|DM*-^LP|gCHxu+9{q$|cUDiA^IPdI`4zUE>U+Zn=(<}> zxG1h}0qcw=m$Q$kW>$TeQBtiL9Aq%f@~rHO;q272$bO`RvoqzSvqAjy9lK$9OUR1+ zYlzPwy}4?M!g{j#@raoaviR3FRs`**0 z{Gkfn+KUQ$16xglHLV=O^_$C9ABC2u_#q)at(S-4Dzl|8SJ-(e4jeW()EhBkFhhULsHWXxVQ9^hx~ow`MN - + From 73157ae3d38e1010d13c5374d0fea3e249b432da Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 2 Nov 2012 12:02:59 -0400 Subject: [PATCH 11/26] Allow each pipeline test the max of 10 hours to run The runtime of these tests is extremely variable -- sometimes they will complete almost instantly, other times they will wait in an LSF queue for 5-10+ hours. Minimize timeout errors by setting the timeout for these tests to the maximum of 10 hours. --- .../pipeline/DataProcessingPipelineTest.scala | 4 ++-- .../PacbioProcessingPipelineTest.scala | 2 +- .../examples/DevNullOutputPipelineTest.scala | 2 +- .../ExampleCountLociPipelineTest.scala | 2 +- .../ExampleCountReadsPipelineTest.scala | 2 +- .../ExampleReadFilterPipelineTest.scala | 2 +- .../ExampleRetryMemoryLimitPipelineTest.scala | 2 +- .../ExampleUnifiedGenotyperPipelineTest.scala | 8 ++++---- .../examples/HelloWorldPipelineTest.scala | 20 +++++++++---------- 9 files changed, 22 insertions(+), 22 deletions(-) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 944ef7977..60c9d9a59 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -28,7 +28,7 @@ import org.testng.annotations.Test import org.broadinstitute.sting.BaseTest class DataProcessingPipelineTest { - @Test + @Test(timeOut=36000000) def testSimpleBAM { val projectName = "test1" val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam" @@ -45,7 +45,7 @@ class DataProcessingPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testBWAPEBAM { val projectName = "test2" val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index 3e9af3e68..dd07cbfdc 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -28,7 +28,7 @@ import org.testng.annotations.Test import org.broadinstitute.sting.BaseTest class PacbioProcessingPipelineTest { - @Test + @Test(timeOut=36000000) def testPacbioProcessingPipeline { val testOut = "exampleBAM.recal.bam" val spec = new PipelineTestSpec diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala index 92c40acb1..6bc6b56db 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala @@ -53,7 +53,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest class DevNullOutputPipelineTest { - @Test + @Test(timeOut=36000000) def testDevNullOutput() { val spec = new PipelineTestSpec spec.name = "devnulloutput" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala index 9d885dda2..f52632a7f 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest class ExampleCountLociPipelineTest { - @Test + @Test(timeOut=36000000) def testCountLoci() { val testOut = "count.out" val spec = new PipelineTestSpec diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala index 1b965d8d2..c23c12719 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest class ExampleCountReadsPipelineTest { - @Test + @Test(timeOut=36000000) def testCountReads() { val spec = new PipelineTestSpec spec.name = "countreads" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala index c6e4c3507..4ffaf7b5c 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala @@ -77,7 +77,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest class ExampleReadFilterPipelineTest { - @Test + @Test(timeOut=36000000) def testExampleReadFilter() { val spec = new PipelineTestSpec spec.name = "examplereadfilter" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala index a9a5928fc..0215a389c 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest class ExampleRetryMemoryLimitPipelineTest { - @Test + @Test(timeOut=36000000) def testRetryMemoryLimit() { val spec = new PipelineTestSpec spec.name = "RetryMemoryLimit" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala index f6fcd7c12..67ac68c28 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala @@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} import org.broadinstitute.sting.BaseTest class ExampleUnifiedGenotyperPipelineTest { - @Test + @Test(timeOut=36000000) def testUnifiedGenotyper() { val spec = new PipelineTestSpec spec.name = "unifiedgenotyper" @@ -51,7 +51,7 @@ class ExampleUnifiedGenotyperPipelineTest { Array("vcf_intervals", BaseTest.validationDataLocation + "intervalTest.1.vcf") ).asInstanceOf[Array[Array[Object]]] - @Test(dataProvider = "ugIntervals") + @Test(dataProvider = "ugIntervals", timeOut=36000000) def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) { val spec = new PipelineTestSpec spec.name = "unifiedgenotyper_with_" + intervalsName @@ -64,7 +64,7 @@ class ExampleUnifiedGenotyperPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testUnifiedGenotyperNoGCOpt() { val spec = new PipelineTestSpec spec.name = "unifiedgenotyper_no_gc_opt" @@ -80,7 +80,7 @@ class ExampleUnifiedGenotyperPipelineTest { @DataProvider(name="resMemReqParams") def getResMemReqParam = Array(Array("mem_free"), Array("virtual_free")).asInstanceOf[Array[Array[Object]]] - @Test(dataProvider = "resMemReqParams") + @Test(dataProvider = "resMemReqParams", timeOut=36000000) def testUnifiedGenotyperResMemReqParam(reqParam: String) { val spec = new PipelineTestSpec spec.name = "unifiedgenotyper_" + reqParam diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala index a43727ba6..50fc529dd 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala @@ -28,7 +28,7 @@ import org.testng.annotations.Test import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} class HelloWorldPipelineTest { - @Test + @Test(timeOut=36000000) def testHelloWorld() { val spec = new PipelineTestSpec spec.name = "HelloWorld" @@ -37,7 +37,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithRunName() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithRunName" @@ -47,7 +47,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithMemoryLimit() { val spec = new PipelineTestSpec spec.name = "HelloWorldMemoryLimit" @@ -57,7 +57,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithPriority() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithPriority" @@ -67,7 +67,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithLsfResource() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithLsfResource" @@ -77,7 +77,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithLsfResourceAndMemoryLimit() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithLsfResourceAndMemoryLimit" @@ -87,7 +87,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithLsfEnvironment() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithLsfEnvironment" @@ -97,7 +97,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithGridEngineResource() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithGridEngineResource" @@ -107,7 +107,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithGridEngineResourceAndMemoryLimit() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithGridEngineResourceAndMemoryLimit" @@ -117,7 +117,7 @@ class HelloWorldPipelineTest { PipelineTest.executeTest(spec) } - @Test + @Test(timeOut=36000000) def testHelloWorldWithGridEngineEnvironment() { val spec = new PipelineTestSpec spec.name = "HelloWorldWithGridEngineEnvironment" From 0ab4022f2301ffa5b68346835de64a5992356018 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 2 Nov 2012 14:30:20 -0400 Subject: [PATCH 12/26] Final r119 tribble jar --- settings/repository/org.broad/tribble-119.jar | Bin 319935 -> 319935 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/settings/repository/org.broad/tribble-119.jar b/settings/repository/org.broad/tribble-119.jar index ab456938aba23540eb8aedbac454841f220c6bba..c74bea398312b68b2f27a60b2061ef2ebf076ff6 100644 GIT binary patch delta 2458 zcmYjRXHZpF5WeT$1*u9^bPS?05ersCQ9#X7U5Rv*ql5`<=ZnzsNGb$kJrVLJ5on z!9Wmn(TOJG#k}amSxKULuD8SgeV?oUHMVTB3fce`uZP&C&p37(H=ClJGrV9h;$8=5 z2{CKpd8VQ!$gv;IpviB=!Mfx{?Z}I>^@Bq1MlxU+_GSou0Q$SyvFwGw@vu);3 zUf}g?D=eTjniDt#G1zZ{1!7##J03q2d`?7r{8}H5Tf#nYf8@qb+zyY_aJzqV9LKn* zc^oTt0LR+h;e*VPQJB@UNS&TKyZYy=>DoO~6LQc>8@qp*p80WMFiQ}uS)wqEoe;*c zlbWtW`>^Sn!>+>+&z`x!N%>~fTB^2YM6nCHmlsD2LbITJI+}0Fw{Wf2*92mgHup~* zr?~gAwJ{gvOxd=-y-Wqc%T&WEMru5sSZb?Y1)B00>-+U>fci8623#=Nt^jPZLfA?J zwUUx!z#f~Z2iYlw*~{+3nZA6_ni*lJ*_s6!<6-h>R*wqGr$Dlot0S19@&Ddiah=A3 zhGO37`D`~1qgbwD@kn1uTf?66!_jbd8T-jDQEWCwqNCUjR8e`lgRR4ZyflW*LdTI9 z=EwUt{KV35N@e$9R)s5~_?=)+3^7+F>x7Po6c)=Bm_CN*&wL^DcbN!6mx){*3jBhFa>Gk}!)4A` zEV^NS?_%*Ie}LyJ#X>$@#(L42m+ILedgEuKIGz&k^DcVHq9Yg9>8e=4pG3?Nn=vG} z=ZkYOcZ5zn{TUvz+Qmj2JRIv2_3?wkS93;3B}fNpq(>=d-FpQ==oRP_()k-G`-e(~ zeDb(q(guEbZ6gh_#*2EBrP26hDGt-6nLHFcLrUUr^vO-~=FjNqDP81;CqB|OPCD6N zvY3SZGj>Wd@b<%<(nB7ph?S0yM$hULshfK|&r20tjUm5D!>H9YX)m%@F6B!>e1)Jy zI>~z`)kx-CgsW{*7{6}SEmd(|5C4!JIN+@k6ZnDWEU(QU_GMub&4llr+You zArk$|Zx9yVpu7n=uonHtav&V>eGc(in+wrsH|9bFqW4W$X7JySseJY(e1~4sJlKR7 zn@3qac@T`Y-!1S%JaLPrd3FoD(4LY{(;Uqom@c1K&(J{-#uIcDf1xAxz6GRGY619Q z{80g{K%7@dYE7zvk#cq+Oh9LMAtk#O!MA897m?jpM8yM(iCkteEWmhwF^#(6HiV*` z(4iLVxeXK1u)ae~qVK>uv|H}LDn#cJ@~ezS${8hq+bp+~kcjT3L{Bb-#pv%TrSV+v z!fLdW?vnrcT^i4)j2lpIfP)|_vI9?E2pMYDqs=%_f^m&trf)F=N^Qi|MESW z%(xP^pdDLD&+@F2;!CTDUse?@gIG;Ng;&!sCDo*WDyJ{r-(Ev~YHEnb_*%jPwN&qM zE#-evN6&Dnj(8i^lk(y9G;e7=@fzPiy8PGx-dMk+ftJhlKGjLPPxBZxQeJ2y=~CVZ zftYXCM7nKjqCRy^um=6k&D8HyGwJ-anbys-h5DXrA^rMW=w3)G&6PKRBig86bQ{&F zYa7tJo#r~yPWg}9iHBR&Ah+~5EJ delta 2458 zcmX|BXHZmE6n^);LtQ|M$m$S8#fSyFC<=r&AQ(}z8jZw&fCY^W1jV4yM9qvM!mRO) z!2-rIqsGB3@D$6^6cwcRz9b5k(a|8*klb@G$q)EWyQh5Tyww-k>5J?v7S5BvTo6nJ zAt5NuVyqY+lr}S6bn#28b;kdFpVI$YCo++OE|er2A+{P5h8?OVOVsm*7FZ$fbaj^y zbH<+`6t!V){iudbd?Q-9_(l3&Twb}s2L!z0K-ku@0or5+K)_+S+8Y!|Hgw155|7R zuq+-JR&S3nv560g^lf+<(wT202>2JMHEeg(M(iD|OWM8I$Ry*q;B2LZhMv0XiCb7`&<6@vlIkBOD!pL)cQQO(^b6ESTY~!`}IvIT}`MdBTRNK z0Ci3TS!PNzDS4)(+ky)6^pnIz_8v-%QJM597s;s{cU49Z7Aa*-?75i`~Q z`=)NsDI{zN#vKbH+mT_#ZaGOdK_PP`dBTK4G2{Z)lRaX|EcC?1l6YiMc@j@np&&0x zBs0-)Adv*K`gK2&Ok9MreLtzd6H$VXkjG|pZftfs8rGd8Ni3u%gY0laOLRFYL8=tD z+5y+adSdH@7JV-{I3$}il zyW<1Bw-2CA3s=jj7UCG?vPx93D%Y(=HBzJ04HqwsWOUDXF7z8L1i@e-S4IJQV5?m7 z6JJ|k=UEFxFN(^G1>y%&6rL>;3oTK|S}nQ{LZK&K^mj(V?WkC9go4p=(ajcx>6gVa z6$)aW*n}>*O)t(t!?1ku*e59Dw22LB6jJ&`V=D}yTXV-2%R$=9M|yPfl=llk5MBuU z6Uq-URdx@N%-G}y9i%l(=v7OD>@kzkL}>(mS&HjaX$EtJPm|Kwjy`%x{%noDzS21+ zJPweqFlvs6NVXHOepZ4s4PQS@kRCEmS(0>k1SVc_QtDzB-!oDfV`I>7k^`X2^@_9; zyI0QZr7(6vP$K=zYNl67Hta4hw@T6M`QR?8f?)8&Kcr4q6iO`Mdq&h;HS{u8y*x0$HKzpaI&@kA_+9-|!9e!X+;%ew;nG@GSA2xP=%Lb4v@sk_* zkIS57dPK?c@Gi+ouKY_fQ_gON;~0*|>>_7O0yBAYESO`aq*!>zCfl|R5**O-A_d%7 zH76O$S^ej~fF0{mx(E?0&#Fr>8)GvsL8!5q5G23+9X#mog*-tAKD2jvqYfq`X6OKy zEH~=F8&RDN%MfF;VGUwSHhhKXo&!q|59Po@#Eu;J8qq5k<{=))<@VNG_!9N$3M@kW zNr5jATNGH0=zo>_)35S(Jy&5p+Lv78Sa^-cjmv{bw5Q}j4C1>y-cRIph(o>MI;=zV zzX6L)|NAkOPu+lTaZ1aZumLgYCXJ%la}&ZbI`|d@BObX$r=fH|y#;=#Ptx;g4(SJ` z)6=f0e_B3-A*SY2KgH+yy!*fc&QeAJ1mOEe1+WzHvqH|+geq{9a|>Y{8oLU4uxAl` zgZl9zuJ;wuAk)OMPz=QfN-#r_WOH0}d$*P|KDLhXLi7Jn2mYA9q>dk#XFac*S39={uSyqJ{H!6Oq0j*2?R}wbDG) zU(-6kcN?9H>Id3*e0Lk~!?T^&p_dYb{-$%SV%Lpr&h*~#-01B77w Id;`S(3mt0_!vFvP From eae2d019cf6f9b5b359a3bbc0b0066f490464f44 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 2 Nov 2012 19:01:59 -0400 Subject: [PATCH 13/26] Refuse to package the GATK from a non-clean working directory Packaging from a non-clean working directory can result in an incorrect jar. Now that we have external collaborators packaging and distributing the GATK, not enforcing the clean requirement has become far too dangerous. At the same time, invoking "clean" automatically through a direct dependency would also be dangerous -- instead, it's better to error out if a packaging target is invoked from a non-clean working dir. --- build.xml | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/build.xml b/build.xml index c6b1afc56..aa5792419 100644 --- a/build.xml +++ b/build.xml @@ -891,7 +891,7 @@ - + @@ -1011,6 +1011,24 @@ + + + + + + + + + + + + + + + + + + From b07106b3a74505d827a1041870d51616d4a6a24f Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Nov 2012 14:39:58 -0800 Subject: [PATCH 16/26] Reimplement the allele biased downsampling to be smarter. Now we don't blindly pull n% of reads off of each allele. Instead, we try all possible genotype conformations for the contaminating sample and choose the one that provides the best genotype for the target sample (based heuristically on allele balance). This method allows us to save some of the reads that belong to the target sample, which should make Daniel M happy. Added unit tests to test the biased downsampling functionality. --- .../AlleleBiasedDownsamplingUtils.java | 114 ++++++++++++++++-- ...AlleleBiasedDownsamplingUtilsUnitTest.java | 108 +++++++++++++++++ 2 files changed, 209 insertions(+), 13 deletions(-) create mode 100755 protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index 59357e1c4..1a7b4da51 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -67,7 +67,6 @@ public class AlleleBiasedDownsamplingUtils { alleleStratifiedElements[baseIndex].add(pe); } - // Down-sample *each* allele by the contamination fraction applied to the entire pileup. // Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later. int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor final TreeSet elementsToKeep = new TreeSet(new Comparator() { @@ -78,12 +77,21 @@ public class AlleleBiasedDownsamplingUtils { } }); + // make a listing of allele counts + final int[] alleleCounts = new int[4]; + for ( int i = 0; i < 4; i++ ) + alleleCounts[i] = alleleStratifiedElements[i].size(); + + // do smart down-sampling + final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); + for ( int i = 0; i < 4; i++ ) { final ArrayList alleleList = alleleStratifiedElements[i]; - if ( alleleList.size() <= numReadsToRemove ) - logAllElements(alleleList, log); + // if we don't need to remove any reads, keep them all + if ( alleleList.size() <= targetAlleleCounts[i] ) + elementsToKeep.addAll(alleleList); else - elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove, log)); + elementsToKeep.addAll(downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log)); } // clean up pointers so memory can be garbage collected if needed @@ -93,6 +101,59 @@ public class AlleleBiasedDownsamplingUtils { return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList(elementsToKeep)); } + private static int scoreAlleleCounts(final int[] alleleCounts) { + final int maxIndex = MathUtils.maxElementIndex(alleleCounts); + final int maxCount = alleleCounts[maxIndex]; + + int nonMaxCount = 0; + for ( int i = 0; i < 4; i++ ) { + if ( i != maxIndex ) + nonMaxCount += alleleCounts[i]; + } + + // try to get the best score: in the het case the counts should be equal and in the hom case the non-max should be zero + return Math.min(Math.abs(maxCount - nonMaxCount), Math.abs(nonMaxCount)); + } + + /** + * Computes an allele biased version of the given pileup + * + * @param alleleCounts the original pileup + * @param numReadsToRemove fraction of total reads to remove per allele + * @return allele biased pileup + */ + protected static int[] runSmartDownsampling(final int[] alleleCounts, final int numReadsToRemove) { + final int numAlleles = alleleCounts.length; + + int maxScore = scoreAlleleCounts(alleleCounts); + int[] alleleCountsOfMax = alleleCounts; + + final int numReadsToRemovePerAllele = numReadsToRemove / 2; + + for ( int i = 0; i < numAlleles; i++ ) { + for ( int j = i; j < numAlleles; j++ ) { + final int[] newCounts = alleleCounts.clone(); + + // split these cases so we don't lose on the floor (since we divided by 2) + if ( i == j ) { + newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemove); + } else { + newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemovePerAllele); + newCounts[j] = Math.max(0, newCounts[j] - numReadsToRemovePerAllele); + } + + final int score = scoreAlleleCounts(newCounts); + + if ( score < maxScore ) { + maxScore = score; + alleleCountsOfMax = newCounts; + } + } + } + + return alleleCountsOfMax; + } + /** * Performs allele biased down-sampling on a pileup and computes the list of elements to keep * @@ -102,7 +163,15 @@ public class AlleleBiasedDownsamplingUtils { * @return the list of pileup elements TO KEEP */ private static List downsampleElements(final ArrayList elements, final int numElementsToRemove, final PrintStream log) { + if ( numElementsToRemove == 0 ) + return elements; + final int pileupSize = elements.size(); + if ( numElementsToRemove == pileupSize ) { + logAllElements(elements, log); + return new ArrayList(0); + } + final BitSet itemsToRemove = new BitSet(pileupSize); for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) { itemsToRemove.set(selectedIndex); @@ -132,15 +201,25 @@ public class AlleleBiasedDownsamplingUtils { for ( final List reads : alleleReadMap.values() ) totalReads += reads.size(); - // Down-sample *each* allele by the contamination fraction applied to the entire pileup. int numReadsToRemove = (int)(totalReads * downsamplingFraction); - final List readsToRemove = new ArrayList(numReadsToRemove * alleleReadMap.size()); - for ( final List reads : alleleReadMap.values() ) { - if ( reads.size() <= numReadsToRemove ) { - readsToRemove.addAll(reads); - logAllReads(reads, log); - } else { - readsToRemove.addAll(downsampleReads(reads, numReadsToRemove, log)); + + // make a listing of allele counts + final List alleles = new ArrayList(alleleReadMap.keySet()); + alleles.remove(Allele.NO_CALL); // ignore the no-call bin + final int numAlleles = alleles.size(); + final int[] alleleCounts = new int[numAlleles]; + for ( int i = 0; i < numAlleles; i++ ) + alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size(); + + // do smart down-sampling + final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove); + + final List readsToRemove = new ArrayList(numReadsToRemove); + for ( int i = 0; i < numAlleles; i++ ) { + final List alleleBin = alleleReadMap.get(alleles.get(i)); + + if ( alleleBin.size() > targetAlleleCounts[i] ) { + readsToRemove.addAll(downsampleReads(alleleBin, alleleBin.size() - targetAlleleCounts[i], log)); } } @@ -156,13 +235,22 @@ public class AlleleBiasedDownsamplingUtils { * @return the list of pileup elements TO REMOVE */ private static List downsampleReads(final List reads, final int numElementsToRemove, final PrintStream log) { + final ArrayList readsToRemove = new ArrayList(numElementsToRemove); + + if ( numElementsToRemove == 0 ) + return readsToRemove; + final int pileupSize = reads.size(); + if ( numElementsToRemove == pileupSize ) { + logAllReads(reads, log); + return reads; + } + final BitSet itemsToRemove = new BitSet(pileupSize); for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) { itemsToRemove.set(selectedIndex); } - ArrayList readsToRemove = new ArrayList(pileupSize - numElementsToRemove); for ( int i = 0; i < pileupSize; i++ ) { if ( itemsToRemove.get(i) ) { final GATKSAMRecord read = reads.get(i); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java new file mode 100755 index 000000000..7c2f5619a --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2010 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.downsampling; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + + +/** + * Basic unit test for AlleleBiasedDownsamplingUtils + */ +public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest { + + + @Test + public void testSmartDownsampling() { + + final int[] idealHetAlleleCounts = new int[]{0, 50, 0, 50}; + final int[] idealHomAlleleCounts = new int[]{0, 100, 0, 0}; + + // no contamination, no removal + testOneCase(0, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + + // hom sample, het contaminant, different alleles + testOneCase(5, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + testOneCase(0, 0, 5, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + testOneCase(0, 0, 0, 5, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + + // hom sample, hom contaminant, different alleles + testOneCase(10, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + testOneCase(0, 0, 10, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + testOneCase(0, 0, 0, 10, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts); + + // het sample, het contaminant, different alleles + testOneCase(5, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 0, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + + // het sample, hom contaminant, different alleles + testOneCase(10, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 0, 10, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + + // hom sample, het contaminant, overlapping alleles + final int[] enhancedHomAlleleCounts = new int[]{0, 105, 0, 0}; + testOneCase(5, 5, 0, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts); + testOneCase(0, 5, 5, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts); + testOneCase(0, 5, 0, 5, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts); + + // hom sample, hom contaminant, overlapping alleles + testOneCase(0, 10, 0, 0, 0.1, 100, idealHomAlleleCounts, new int[]{0, 110, 0, 0}); + + // het sample, het contaminant, overlapping alleles + testOneCase(5, 5, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 5, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 5, 0, 5, 0.1, 100, idealHetAlleleCounts, new int[]{0, 55, 0, 55}); + testOneCase(5, 0, 0, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 0, 5, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + + // het sample, hom contaminant, overlapping alleles + testOneCase(0, 10, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + testOneCase(0, 0, 0, 10, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts); + } + + private static void testOneCase(final int addA, final int addC, final int addG, final int addT, final double contaminationFraction, + final int pileupSize, final int[] initialCounts, final int[] targetCounts) { + + final int[] actualCounts = initialCounts.clone(); + actualCounts[0] += addA; + actualCounts[1] += addC; + actualCounts[2] += addG; + actualCounts[3] += addT; + + final int[] results = AlleleBiasedDownsamplingUtils.runSmartDownsampling(actualCounts, (int)(pileupSize * contaminationFraction)); + Assert.assertTrue(countsAreEqual(actualCounts, targetCounts)); + } + + private static boolean countsAreEqual(final int[] counts1, final int[] counts2) { + for ( int i = 0; i < 4; i++ ) { + if ( counts1[i] != counts2[i] ) + return false; + } + return true; + } +} From 0a2dded09395f7f09f157f86791ad92433403d47 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Nov 2012 16:07:40 -0800 Subject: [PATCH 17/26] Fixes for bugs uncovered by unit tests --- .../AlleleBiasedDownsamplingUtils.java | 25 ++++++++++++------- ...AlleleBiasedDownsamplingUtilsUnitTest.java | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java index 1a7b4da51..a61614481 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java @@ -102,17 +102,24 @@ public class AlleleBiasedDownsamplingUtils { } private static int scoreAlleleCounts(final int[] alleleCounts) { - final int maxIndex = MathUtils.maxElementIndex(alleleCounts); - final int maxCount = alleleCounts[maxIndex]; + if ( alleleCounts.length < 2 ) + return 0; - int nonMaxCount = 0; - for ( int i = 0; i < 4; i++ ) { - if ( i != maxIndex ) - nonMaxCount += alleleCounts[i]; - } + // sort the counts (in ascending order) + final int[] alleleCountsCopy = alleleCounts.clone(); + Arrays.sort(alleleCountsCopy); - // try to get the best score: in the het case the counts should be equal and in the hom case the non-max should be zero - return Math.min(Math.abs(maxCount - nonMaxCount), Math.abs(nonMaxCount)); + final int maxCount = alleleCountsCopy[alleleCounts.length - 1]; + final int nextBestCount = alleleCountsCopy[alleleCounts.length - 2]; + + int remainderCount = 0; + for ( int i = 0; i < alleleCounts.length - 2; i++ ) + remainderCount += alleleCountsCopy[i]; + + // try to get the best score: + // - in the het case the counts should be equal with nothing else + // - in the hom case the non-max should be zero + return Math.min(maxCount - nextBestCount + remainderCount, Math.abs(nextBestCount + remainderCount)); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java index 7c2f5619a..be19d3ef4 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java @@ -95,7 +95,7 @@ public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest { actualCounts[3] += addT; final int[] results = AlleleBiasedDownsamplingUtils.runSmartDownsampling(actualCounts, (int)(pileupSize * contaminationFraction)); - Assert.assertTrue(countsAreEqual(actualCounts, targetCounts)); + Assert.assertTrue(countsAreEqual(results, targetCounts)); } private static boolean countsAreEqual(final int[] counts1, final int[] counts2) { From 15b8c08132c59cfa08ed0e7b5a08f1d893da8029 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Nov 2012 20:53:33 -0800 Subject: [PATCH 18/26] Apparently CIGAR elements can have 0 length according to the spec, but 0Ms were causing left alignment of indels to fail. Fixed. --- .../src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 4f1e66ba2..585578958 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -646,7 +646,7 @@ public class AlignmentUtils { // get the indel element and move it left one base CigarElement ce = cigar.getCigarElement(indexOfIndel - 1); - elements.add(new CigarElement(ce.getLength() - 1, ce.getOperator())); + elements.add(new CigarElement(Math.max(ce.getLength() - 1, 0), ce.getOperator())); elements.add(cigar.getCigarElement(indexOfIndel)); if (indexOfIndel + 1 < cigar.numCigarElements()) { ce = cigar.getCigarElement(indexOfIndel + 1); From 2da76db9452c50277e1269c3fa4d8248e97dcd8a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Nov 2012 22:23:05 -0800 Subject: [PATCH 19/26] Updating integration tests --- .../UnifiedGenotyperIntegrationTest.java | 68 +++++++++---------- .../HaplotypeCallerIntegrationTest.java | 18 ++--- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 9212d0e53..d3e77e002 100755 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a")); + Arrays.asList("847605f4efafef89529fe0e496315edd")); executeTest("test MultiSample Pilot1", spec); } @@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de")); + Arrays.asList("5b31b811072a4df04524e13604015f9b")); executeTest("test MultiSample Pilot2 with alleles passed in", spec1); } @@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("24532eb381724cd74e99370da28d49ed")); + Arrays.asList("d9992e55381afb43742cc9b30fcd7538")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("062a946160eec1d0fc135d58ca654ff4")); + Arrays.asList("fea530fdc8677e10be4cc11625fa5376")); executeTest("test SingleSample Pilot2", spec); } @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("94dc17d76d841f1d3a36160767ffa034")); + Arrays.asList("704888987baacff8c7b273b8ab9938d0")); executeTest("test Multiple SNP alleles", spec); } @@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("9106d01ca0d0a8fedd068e72d509f380")); + Arrays.asList("e14c9b1f9f34d6c16de445bfa385be89")); executeTest("test reverse trim", spec); } @@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("d847acf841ba8ba653f996ce4869f439")); + Arrays.asList("fb204e821a24d03bd3a671b6e01c449a")); executeTest("test mismatched PLs", spec); } @@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad"; + private final static String COMPRESSED_OUTPUT_MD5 = "5b8f477c287770b5769b05591e35bc2d"; @Test public void testCompressedOutput() { @@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinBaseQualityScore() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1, - Arrays.asList("56157d930da6ccd224bce1ca93f11e41")); + Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff")); executeTest("test min_base_quality_score 26", spec); } @@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603")); + Arrays.asList("55760482335497086458b09e415ecf54")); executeTest("test SLOD", spec); } @@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNDA() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("480437dd6e2760f4ab3194431519f331")); + Arrays.asList("938e888a40182878be4c3cc4859adb69")); executeTest("test NDA", spec); } @@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("22c039412fd387dde6125b07c9a74a25")); + Arrays.asList("7dc186d420487e4e156a24ec8dea0951")); executeTest("test using comp track", spec); } @@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testOutputParameterSitesOnly() { - testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb"); + testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9"); } @Test public void testOutputParameterAllConfident() { - testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c"); + testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f"); } @Test public void testOutputParameterAllSites() { - testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841"); + testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434"); } private void testOutputParameters(final String args, final String md5) { @@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("df524e98903d96ab9353bee7c16a69de")); + Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb")); executeTest("test confidence 1", spec1); } @@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" ); + testHeterozosity( 0.01, "8dd37249e0a80afa86594c3f1e720760" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" ); + testHeterozosity( 1.0 / 1850, "040d169e20fda56f8de009a6015eb384" ); } private void testHeterozosity(final double arg, final String md5) { @@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835")); + Arrays.asList("0e4713e4aa44f4f8fcfea7138295a627")); executeTest(String.format("test multiple technologies"), spec); } @@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52")); + Arrays.asList("46ea5d1ceb8eed1d0db63c3577915d6c")); executeTest(String.format("test calling with BAQ"), spec); } @@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("3593495aab5f6204c65de0b073a6ff65")); + Arrays.asList("50329e15e5139be9e3b643f0b3ba8a53")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("8b486a098029d5a106b0a37eff541c15")); + Arrays.asList("2b85e3bd6bf981afaf7324666740d74b")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("18efedc50cae2aacaba372265e38310b")); + Arrays.asList("a6fd46eff78827060451a62cffd698a7")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f")); + Arrays.asList("b8129bf754490cc3c76191d8cc4ec93f")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("578c0540f4f2052a634a829bcb9cc27d")); + Arrays.asList("591332fa0b5b22778cf820ee257049d2")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSampleIndels1() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e")); + Arrays.asList("a4761d7f25e7a62f34494801c98a0da7")); List result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1, - Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352")); + Arrays.asList("c526c234947482d1cd2ffc5102083a08")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("857b8e5df444463ac27f665c4f67fbe2")); + Arrays.asList("90adefd39ed67865b0cb275ad0f07383")); executeTest("test minIndelFraction 0.0", spec); } @@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("81d4c7d9010fd6733b2997bc378e7471")); + Arrays.asList("2fded43949e258f8e9f68893c61c1bdd")); executeTest("test minIndelFraction 0.25", spec); } @@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testNsInCigar() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1, - Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f")); + Arrays.asList("d6d40bacd540a41f305420dfea35e04a")); executeTest("test calling on reads with Ns in CIGAR", spec); } @@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba")); + Arrays.asList("c1077662411164182c5f75478344f83d")); executeTest("test calling on a ReducedRead BAM", spec); } @Test public void testReducedBamSNPs() { - testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e"); + testReducedCalling("SNP", "f5ccbc96d0d66832dd9b3c5cb6507db4"); } @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5"); + testReducedCalling("INDEL", "3c02ee5187933bed44dc416a2e28511f"); } @@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testContaminationDownsampling() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1, - Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae")); + Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb")); executeTest("test contamination_percentage_to_filter 0.20", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index d00f5b61d..6828dbcb5 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e"); + HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215"); + HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f"); } @Test public void testHaplotypeCallerMultiSampleGGA() { - HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469"); + HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8"); } @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784")); executeTest("HCTestStructuralIndels: ", spec); } @@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9")); + Arrays.asList("425f1a0fb00d7145edf1c55e54346fae")); executeTest("HC calling on a ReducedRead BAM", spec); } } From 17ab3a39d55ce389c45f24c81349263e8cd4dad7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 8 Nov 2012 14:35:23 -0500 Subject: [PATCH 20/26] Make the --intermediate_csv_file argument un-hidden. --- .../gatk/walkers/bqsr/RecalibrationArgumentCollection.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java index fc7d8a8a4..e5704a1e2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java @@ -75,8 +75,9 @@ public class RecalibrationArgumentCollection { /** * If not provided, then a temporary file is created and then deleted upon completion. + * For advanced users only. */ - @Hidden + @Advanced @Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false) public File RECAL_CSV_FILE = null; From e9183d9fe0ada286ed91bced4feb1e004e10ad56 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 8 Nov 2012 15:07:47 -0500 Subject: [PATCH 21/26] Fix bugs as reported on the forum: BED needs to be explicitly set as the default output format and the output didn't actually adhere to the BED spec. --- .../sting/gatk/walkers/coverage/CallableLoci.java | 4 ++-- .../coverage/CallableLociWalkerIntegrationTest.java | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java index 58ddd0879..48019efea 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java @@ -191,7 +191,7 @@ public class CallableLoci extends LocusWalker Date: Fri, 9 Nov 2012 08:33:55 -0500 Subject: [PATCH 22/26] Fixed nasty bug in BQSR csv file creation: numbers larger than 999 in the Errors column were printed out with commas (which looks like a separate column). This wasn't caught earlier because there are no integration tests covering the csv. I'll add one into unstable in a sec. --- .../broadinstitute/sting/utils/recalibration/RecalDatum.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java index e3348d3de..207988749 100755 --- a/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java +++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/RecalDatum.java @@ -199,7 +199,7 @@ public class RecalDatum { @Override public String toString() { - return String.format("%.2f,%,2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality()); + return String.format("%.2f,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality()); } public String stringForCSV() { From e93d46191004f554bdd75b42d07b098241b9715e Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Nov 2012 09:11:04 -0500 Subject: [PATCH 23/26] Adding integration test to BQSR for the csv file --- .../gatk/walkers/bqsr/BQSRIntegrationTest.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java index b839382dc..f6ec47760 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java @@ -90,6 +90,21 @@ public class BQSRIntegrationTest extends WalkerTest { executeTest("testBQSRFailWithoutDBSNP", spec); } + @Test + public void testBQSRCSV() { + WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( + " -T BaseRecalibrator" + + " -R " + b36KGReference + + " -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" + + " -knownSites " + b36dbSNP129 + + " -L 1:10,000,000-10,200,000" + + " -o /dev/null" + + " --plot_pdf_file /dev/null" + + " --intermediate_csv_file %s", + Arrays.asList("d1c38a3418979400630e2bca1140689c")); + executeTest("testBQSR-CSVfile", spec); + } + @Test public void testBQSRFailWithSolidNoCall() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( From 525cf331f4e6568479582c44f7c7b8912cdf7773 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 13 Nov 2012 11:52:47 -0500 Subject: [PATCH 24/26] Don't catch a User Error and re-throw as a Reviewed Exception. That makes Eric unhappy. --- .../sting/gatk/io/storage/VariantContextWriterStorage.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 31f6d5954..8e4633869 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -198,8 +198,6 @@ public class VariantContextWriterStorage implements Storage Date: Tue, 13 Nov 2012 11:53:12 -0500 Subject: [PATCH 25/26] Having a malformed GATK report is a User Error --- .../broadinstitute/sting/gatk/report/GATKReportVersion.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java index b5a5e0443..b51fb17f0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.report; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; public enum GATKReportVersion { /** @@ -91,6 +92,6 @@ public enum GATKReportVersion { if (header.startsWith("#:GATKReport.v1.1")) return GATKReportVersion.V1_1; - throw new ReviewedStingException("Unknown GATK report version in header: " + header); + throw new UserException.BadInput("The GATK report has an unknown/unsupported version in the header: " + header); } } From ba41f65759724b650d8996185bc4761f50bda466 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 13 Nov 2012 11:53:39 -0500 Subject: [PATCH 26/26] Protect against NPEs in SelectVariants by checking for missing Genotypes --- .../sting/gatk/walkers/variantutils/SelectVariants.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index d1b7cb96f..d28fe34d6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -659,7 +659,10 @@ public class SelectVariants extends RodWalker implements TreeR return (g !=null && !g.isHomRef() && (g.isCalled() || (g.isFiltered() && !EXCLUDE_FILTERED))); } - private boolean haveSameGenotypes(Genotype g1, Genotype g2) { + private boolean haveSameGenotypes(final Genotype g1, final Genotype g2) { + if ( g1 == null || g2 == null ) + return false; + if ((g1.isCalled() && g2.isFiltered()) || (g2.isCalled() && g1.isFiltered()) || (g1.isFiltered() && g2.isFiltered() && EXCLUDE_FILTERED))