From d79b5f0931db2a65138c636c23c67d602688e480 Mon Sep 17 00:00:00 2001 From: sathibault Date: Wed, 8 May 2013 11:01:20 -0500 Subject: [PATCH 001/172] Adding Convey HC-1 HMM acceleration --- .../LikelihoodCalculationEngine.java | 38 ++++++++---- .../sting/utils/pairhmm/CnyPairHMM.java | 59 +++++++++++++++++++ .../sting/utils/pairhmm/BatchPairHMM.java | 16 +++++ 3 files changed, 103 insertions(+), 10 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 8697833a6..62d4d17fd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -90,7 +90,10 @@ public class LikelihoodCalculationEngine { pairHMM = new Log10PairHMM(false); break; case LOGLESS_CACHING: - pairHMM = new LoglessPairHMM(); + if (CnyPairHMM.isAvailable()) + pairHMM = new CnyPairHMM(); + else + pairHMM = new LoglessPairHMM(); break; default: throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING."); @@ -151,6 +154,8 @@ public class LikelihoodCalculationEngine { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) + final BatchPairHMM batchPairHMM = (pairHMM instanceof BatchPairHMM) ? (BatchPairHMM)pairHMM : null; + final Vector batchedReads = new Vector(reads.size()); final int numHaplotypes = haplotypes.size(); final Map alleleVersions = new HashMap(numHaplotypes); for ( final Haplotype haplotype : haplotypes ) { @@ -177,16 +182,29 @@ public class LikelihoodCalculationEngine { readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); } - for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { - final Haplotype haplotype = haplotypes.get(jjj); - final boolean isFirstHaplotype = jjj == 0; - final double log10l = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), - read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype); - - perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); - } + if ( batchPairHMM != null ) { + batchPairHMM.batchAdd(haplotypes, read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP); + batchedReads.add(read); + } else { + for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { + final Haplotype haplotype = haplotypes.get(jjj); + final boolean isFirstHaplotype = jjj == 0; + final double log10l = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), + read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype); + + perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); + } + } } - + if ( batchPairHMM != null ) { + for( final GATKSAMRecord read : batchedReads ) { + final double[] likelihoods = batchPairHMM.batchResult(); + for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { + final Haplotype haplotype = haplotypes.get(jjj); + perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), likelihoods[jjj]); + } + } + } return perReadAlleleLikelihoodMap; } diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java new file mode 100644 index 000000000..51611bb08 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -0,0 +1,59 @@ +package org.broadinstitute.sting.utils.pairhmm; + +import java.util.*; + +import org.broadinstitute.sting.utils.haplotype.Haplotype; + +public final class CnyPairHMM extends PairHMM implements BatchPairHMM { + private static class HmmInput { + public List haplotypes; + public byte[] readBases; + public byte[] readQuals; + public byte[] insertionGOP; + public byte[] deletionGOP; + public byte[] overallGCP; + }; + + private static boolean loaded = false; + private List pending = new LinkedList(); + + static public boolean isAvailable() { + return true; + } + + public void batchAdd(final List haplotypes, + final byte[] readBases, + final byte[] readQuals, + final byte[] insertionGOP, + final byte[] deletionGOP, + final byte[] overallGCP) { + HmmInput test=new HmmInput(); + test.haplotypes=haplotypes; + test.readBases=readBases; + test.readQuals=readQuals; + test.insertionGOP=insertionGOP; + test.deletionGOP=deletionGOP; + test.overallGCP=overallGCP; + pending.add(test); + } + + public double[] batchResult() { + HmmInput test=pending.remove(0); + double[] results=new double[test.haplotypes.size()]; + for (int i=0; i haplotypes, + final byte[] readBases, + final byte[] readQuals, + final byte[] insertionGOP, + final byte[] deletionGOP, + final byte[] overallGCP); + + public double[] batchResult(); +} From 195f0c3e983abbeb22ec1fbbcec4862ca789121e Mon Sep 17 00:00:00 2001 From: sathibault Date: Fri, 17 May 2013 08:30:23 -0500 Subject: [PATCH 002/172] Disable CnyPairHMM --- .../sting/utils/pairhmm/CnyPairHMM.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java index 51611bb08..a8f07a2ea 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -18,7 +18,16 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { private List pending = new LinkedList(); static public boolean isAvailable() { - return true; + return false; + } + + public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) { + if (!loaded) { + // System.loadLibrary("gmvhdl_gatk_hmm"); + // initFpga(); + loaded = true; + } + System.out.println("FPGA HMM Initialized"); } public void batchAdd(final List haplotypes, From de2a2a4cc752e671d7e3a5bb7ddbb19d57257f4f Mon Sep 17 00:00:00 2001 From: sathibault Date: Mon, 3 Jun 2013 07:30:32 -0500 Subject: [PATCH 003/172] Added command-line flag to disble FPGA Completed integration with FPGA driver --- .../haplotypecaller/HaplotypeCaller.java | 6 +- .../LikelihoodCalculationEngine.java | 13 +- .../sting/utils/pairhmm/CnyPairHMM.java | 164 +++++++++++++++--- .../sting/utils/pairhmm/BatchPairHMM.java | 2 +- 4 files changed, 159 insertions(+), 26 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 6ea543f25..cf6fa20ca 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -315,6 +315,10 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Argument(fullName="allowCyclesInKmerGraphToGeneratePaths", shortName="allowCyclesInKmerGraphToGeneratePaths", doc="If specified, we will allow cycles in the kmer graphs to generate paths with multiple copies of the path sequenece rather than just the shortest paths", required = false) protected boolean allowCyclesInKmerGraphToGeneratePaths = false; + @Hidden + @Argument(fullName="noFpga", shortName="noFpga", doc="If provided, disables the use of the FPGA HMM implementation", required = false) + protected boolean noFpga = false; + // the UG engines private UnifiedGenotyperEngine UG_engine = null; @@ -435,7 +439,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem if ( graphWriter != null ) assemblyEngine.setGraphWriter(graphWriter); if ( useLowQualityBasesForAssembly ) assemblyEngine.setMinBaseQualityToUseInAssembly((byte)1); - likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM ); + likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM, noFpga ); final MergeVariantsAcrossHaplotypes variantMerger = mergeVariantsViaLD ? new LDMerger(DEBUG, 10, 1) : new MergeVariantsAcrossHaplotypes(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 62d4d17fd..d4bec64c7 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -80,7 +80,11 @@ public class LikelihoodCalculationEngine { */ private final double EXPECTED_ERROR_RATE_PER_BASE = 0.02; - public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType ) { + public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType) { + this(constantGCP, debug, hmmType, false); + } + + public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType, final boolean noFpga) { switch (hmmType) { case EXACT: @@ -90,7 +94,7 @@ public class LikelihoodCalculationEngine { pairHMM = new Log10PairHMM(false); break; case LOGLESS_CACHING: - if (CnyPairHMM.isAvailable()) + if (!noFpga && CnyPairHMM.isAvailable()) pairHMM = new CnyPairHMM(); else pairHMM = new LoglessPairHMM(); @@ -191,16 +195,17 @@ public class LikelihoodCalculationEngine { final boolean isFirstHaplotype = jjj == 0; final double log10l = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype); - + System.err.println(Integer.toString(jjj) + ": " + Double.toString(log10l)); perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); } } } if ( batchPairHMM != null ) { for( final GATKSAMRecord read : batchedReads ) { - final double[] likelihoods = batchPairHMM.batchResult(); + final double[] likelihoods = batchPairHMM.batchGetResult(); for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { final Haplotype haplotype = haplotypes.get(jjj); + System.err.println(Integer.toString(jjj) + ": " + Double.toString(likelihoods[jjj])); perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), likelihoods[jjj]); } } diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java index a8f07a2ea..746c0add1 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -1,33 +1,91 @@ package org.broadinstitute.sting.utils.pairhmm; +import java.io.File; import java.util.*; +import java.lang.reflect.*; import org.broadinstitute.sting.utils.haplotype.Haplotype; public final class CnyPairHMM extends PairHMM implements BatchPairHMM { private static class HmmInput { - public List haplotypes; public byte[] readBases; public byte[] readQuals; public byte[] insertionGOP; public byte[] deletionGOP; public byte[] overallGCP; + public List haplotypes; }; + private static class ResultQueue { + private int offset; + private List batchResults; + + public ResultQueue() { + batchResults = new LinkedList(); + offset = 0; + } + + public void push(double[] results) { + batchResults.add(results); + } + + public double pop() { + double[] results = batchResults.get(0); + double top = results[offset++]; + if (offset == results.length) { + batchResults.remove(0); + offset = 0; + } + return top; + } + } + + final static String libPath = "/opt/convey/personalities/32100.1.1.1.0"; + final static String libName = "gmvhdl_gatk_hmm"; + private static boolean loaded = false; - private List pending = new LinkedList(); + private List batchRequests = new LinkedList(); + private ResultQueue resultQueue = new ResultQueue(); static public boolean isAvailable() { - return false; + if (!loaded) { + File library = new File(libPath + "/lib" + libName + ".so"); + return library.exists(); + } + return true; } + private native void initFpga(); + private native int dequeueRequirement(int reflen, int readlen); + private native int enqueue(byte[] haplotypeBases, + byte[] readBases, + byte[] readQuals, + byte[] insertionGOP, + byte[] deletionGOP, + byte[] overallGCP, + int hapStartIndex, + boolean recacheReadValues); + private native int flushQueue(); + private native int dequeue(double[] results); + private native double softHmm(byte[] haplotypeBases, + byte[] readBases, + byte[] readQuals, + byte[] insertionGOP, + byte[] deletionGOP, + byte[] overallGCP, + int hapStartIndex, + boolean recacheReadValues); + + public native void reportStats(); + public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) { if (!loaded) { - // System.loadLibrary("gmvhdl_gatk_hmm"); - // initFpga(); + addLibraryPath(libPath); + System.loadLibrary(libName); + initFpga(); loaded = true; + System.out.println("FPGA HMM Initialized"); } - System.out.println("FPGA HMM Initialized"); } public void batchAdd(final List haplotypes, @@ -36,21 +94,44 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) { - HmmInput test=new HmmInput(); - test.haplotypes=haplotypes; - test.readBases=readBases; - test.readQuals=readQuals; - test.insertionGOP=insertionGOP; - test.deletionGOP=deletionGOP; - test.overallGCP=overallGCP; - pending.add(test); + final int numHaplotypes = haplotypes.size(); + HmmInput test = new HmmInput(); + test.readBases = readBases; + test.readQuals = readQuals; + test.insertionGOP = insertionGOP; + test.deletionGOP = deletionGOP; + test.overallGCP = overallGCP; + test.haplotypes = haplotypes; + batchRequests.add(test); + for (int jjj = 0; jjj < numHaplotypes; jjj++) { + final boolean recacheReadValues = (jjj == 0); + final Haplotype haplotype = haplotypes.get(jjj); + enqueuePrepare(haplotype.getBases(), readBases); + if (enqueue(haplotype.getBases(), readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, recacheReadValues) == 0) + throw new RuntimeException("FPGA queue overflow in batchAdd"); + } } - public double[] batchResult() { - HmmInput test=pending.remove(0); - double[] results=new double[test.haplotypes.size()]; - for (int i=0; i 0) { + results = new double[n]; + if (dequeue(results) != n) + System.out.println("queue underflow in enqueuePrepare"); + resultQueue.push(results); + } + + final HmmInput test = batchRequests.remove(0); + final int numHaplotypes = test.haplotypes.size(); + results = new double[numHaplotypes]; + for (int jjj = 0; jjj < numHaplotypes; jjj++) { + results[jjj] = resultQueue.pop(); + if (results[jjj]<-60.0) { + final Haplotype haplotype = test.haplotypes.get(jjj); + results[jjj]=softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); + } } return results; } @@ -64,5 +145,48 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { final int hapStartIndex, final boolean recacheReadValues ) { return 0.0; - } + } + + private void enqueuePrepare(byte[] haplotypeBases, byte[] readBases) { + double[] results = null; + int n = dequeueRequirement(haplotypeBases.length, readBases.length); + if (n>0) { + results = new double[n]; + if (dequeue(results)!=n) + System.out.println("queue underflow in enqueuePrepare"); + } else if (n<0) { + n = flushQueue(); + if (n > 0) { + results = new double[n]; + if (dequeue(results) != n) + System.out.println("queue underflow in enqueuePrepare"); + } + } + + if (results != null) + resultQueue.push(results); + } + + public static void addLibraryPath(String pathToAdd) { + try { + final Field usrPathsField = ClassLoader.class.getDeclaredField("usr_paths"); + usrPathsField.setAccessible(true); + + //get array of paths + final String[] paths = (String[])usrPathsField.get(null); + + //check if the path to add is already present + for(String path : paths) { + if(path.equals(pathToAdd)) { + return; + } + } + + //add the new path + final String[] newPaths = Arrays.copyOf(paths, paths.length + 1); + newPaths[newPaths.length-1] = pathToAdd; + usrPathsField.set(null, newPaths); + } catch (Exception ex) { + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java index 8f9ae22ae..3b794b00e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java @@ -12,5 +12,5 @@ public interface BatchPairHMM { final byte[] deletionGOP, final byte[] overallGCP); - public double[] batchResult(); + public double[] batchGetResult(); } From 3db8908ae8fcbff7b5a0e8073aae9c1b2e240d6d Mon Sep 17 00:00:00 2001 From: sathibault Date: Thu, 20 Jun 2013 08:28:58 -0500 Subject: [PATCH 004/172] Remove debug print statement --- .../walkers/haplotypecaller/LikelihoodCalculationEngine.java | 1 - 1 file changed, 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 2359840dd..60abc6a68 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -250,7 +250,6 @@ public class LikelihoodCalculationEngine { final double[] likelihoods = batchPairHMM.batchGetResult(); for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { final Haplotype haplotype = haplotypes.get(jjj); - System.err.println(Integer.toString(jjj) + ": " + Double.toString(likelihoods[jjj])); if ( haplotype.isNonReference() ) bestNonReflog10L = Math.max(bestNonReflog10L, likelihoods[jjj]); else From e691fa3e1985f558e832f6da26960a64ce6c7f7f Mon Sep 17 00:00:00 2001 From: Scott Thibault Date: Fri, 28 Jun 2013 08:52:09 -0500 Subject: [PATCH 005/172] FPGA null pointer bug fix --- .../walkers/haplotypecaller/LikelihoodCalculationEngine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 60abc6a68..3cdad37ea 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -180,7 +180,7 @@ public class LikelihoodCalculationEngine { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) - final BatchPairHMM batchPairHMM = (pairHMM instanceof BatchPairHMM) ? (BatchPairHMM)pairHMM : null; + final BatchPairHMM batchPairHMM = (pairHMM.get() instanceof BatchPairHMM) ? (BatchPairHMM)pairHMM.get() : null; final Vector batchedReads = new Vector(reads.size()); final int numHaplotypes = haplotypes.size(); final Map alleleVersions = new LinkedHashMap<>(numHaplotypes); @@ -456,4 +456,4 @@ public class LikelihoodCalculationEngine { } throw new ReviewedStingException( "No reference haplotype found in the list of haplotypes!" ); } -} \ No newline at end of file +} From aba55dbb23375fc9b6c40c2d99a594efec3f6fa3 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Wed, 10 Jul 2013 14:29:13 -0400 Subject: [PATCH 006/172] Moved some HC parameters related to active region extensions to command line arguments so that they're more easily modified. Some of these parameters need tinkering in order to call some large indels. See GSA-891 and subtasks for particular examples thereof. --- .../haplotypecaller/HaplotypeCaller.java | 29 ++++++++++++++----- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 18c93f2fb..65d0bf413 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -466,6 +466,27 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="minObservationsForKmerToBeSolid", shortName="minObservationsForKmerToBeSolid", doc = "A k-mer must be seen at least these times for it considered to be solid", required=false) protected int minObservationsForKmerToBeSolid = 20; + /** + * the maximum extent into the full active region extension that we're willing to go in genotyping our events + */ + @Hidden + @Argument(fullName="maxDiscARExtension", shortName="maxDiscARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for discovery", required=false) + protected int MAX_DISCOVERY_ACTIVE_REGION_EXTENSION = 25; + + @Hidden + @Argument(fullName="maxGGAARExtension", shortName="maxGGAARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", required=false) + protected int MAX_GGA_ACTIVE_REGION_EXTENSION = 300; + + /** + * Include at least this many bases around an event for calling it + */ + @Hidden + @Argument(fullName="paddingAroundIndels", shortName="paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required=false) + protected int PADDING_AROUND_OTHERS_FOR_CALLING = 150; + + @Hidden + @Argument(fullName="paddingAroundSNPs", shortName="paddingAroundSNPs", doc = "Include at least this many bases around an event for calling snps", required=false) + protected int PADDING_AROUND_SNPS_FOR_CALLING = 20; // ----------------------------------------------------------------------------------------------- // done with Haplotype caller parameters @@ -492,14 +513,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // reference base padding size private static final int REFERENCE_PADDING = 500; - // include at least this many bases around an event for calling it - private final static int PADDING_AROUND_SNPS_FOR_CALLING = 20; - private final static int PADDING_AROUND_OTHERS_FOR_CALLING = 150; - - // the maximum extent into the full active region extension that we're willing to go in genotyping our events - private final static int MAX_DISCOVERY_ACTIVE_REGION_EXTENSION = 25; - private final static int MAX_GGA_ACTIVE_REGION_EXTENSION = 100; - private ActiveRegionTrimmer trimmer = null; private final static int maxReadsInRegionPerSample = 1000; // TODO -- should be an argument From 7458b59bb3b973fb3321c76a0ab1e67bfcaa6c5e Mon Sep 17 00:00:00 2001 From: sathibault Date: Thu, 11 Jul 2013 11:08:46 -0500 Subject: [PATCH 007/172] Fixed batching bug. --- .../sting/utils/pairhmm/CnyPairHMM.java | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java index 746c0add1..8de29dc16 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -113,6 +113,7 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { } public double[] batchGetResult() { + int iii, numHaplotypes; double[] results; int n = flushQueue(); @@ -123,16 +124,27 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { resultQueue.push(results); } - final HmmInput test = batchRequests.remove(0); - final int numHaplotypes = test.haplotypes.size(); + numHaplotypes = 0; + for (Iterator it=batchRequests.listIterator(); it.hasNext(); ) { + HmmInput h = it.next(); + numHaplotypes += h.haplotypes.size(); + } + results = new double[numHaplotypes]; - for (int jjj = 0; jjj < numHaplotypes; jjj++) { - results[jjj] = resultQueue.pop(); - if (results[jjj]<-60.0) { - final Haplotype haplotype = test.haplotypes.get(jjj); - results[jjj]=softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); + iii = 0; + while (!batchRequests.isEmpty()) { + HmmInput test = batchRequests.remove(0); + int testSize = test.haplotypes.size(); + for (int jjj = 0; jjj < testSize; jjj++) { + results[iii] = resultQueue.pop(); + if (results[iii]<-60.0) { + final Haplotype haplotype = test.haplotypes.get(jjj); + results[iii] = softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); + } + iii++; } } + return results; } From 23fe3e449a82a8b82094b1175a339f94a7ac0dc3 Mon Sep 17 00:00:00 2001 From: sathibault Date: Thu, 11 Jul 2013 11:30:37 -0500 Subject: [PATCH 008/172] Revert "Fixed batching bug." This reverts commit 3e56c83d0eec7c374e5f187d1ef124d42ecc071e. --- .../sting/utils/pairhmm/CnyPairHMM.java | 26 +++++-------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java index 8de29dc16..746c0add1 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -113,7 +113,6 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { } public double[] batchGetResult() { - int iii, numHaplotypes; double[] results; int n = flushQueue(); @@ -124,27 +123,16 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { resultQueue.push(results); } - numHaplotypes = 0; - for (Iterator it=batchRequests.listIterator(); it.hasNext(); ) { - HmmInput h = it.next(); - numHaplotypes += h.haplotypes.size(); - } - + final HmmInput test = batchRequests.remove(0); + final int numHaplotypes = test.haplotypes.size(); results = new double[numHaplotypes]; - iii = 0; - while (!batchRequests.isEmpty()) { - HmmInput test = batchRequests.remove(0); - int testSize = test.haplotypes.size(); - for (int jjj = 0; jjj < testSize; jjj++) { - results[iii] = resultQueue.pop(); - if (results[iii]<-60.0) { - final Haplotype haplotype = test.haplotypes.get(jjj); - results[iii] = softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); - } - iii++; + for (int jjj = 0; jjj < numHaplotypes; jjj++) { + results[jjj] = resultQueue.pop(); + if (results[jjj]<-60.0) { + final Haplotype haplotype = test.haplotypes.get(jjj); + results[jjj]=softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); } } - return results; } From b16c7ce0506ad8c8a93854515a131f3c2dca1ab8 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 28 Jun 2013 07:41:06 -0400 Subject: [PATCH 010/172] A whole slew of improvements to the Haplotype Caller and related code. 1. Some minor refactorings and claenup (e.g. removing unused imports) throughout. 2. Updates to the KB assessment functionality: a. Exclude duplicate reads when checking to see whether there's enough coverage to make a call. b. Lower the threshold on FS for FPs that would easily be filtered since it's only single sample calling. 3. Make the HC consistent in how it treats the pruning factor. As part of this I removed and archived the DeBruijn assembler. 4. Improvements to the likelihoods for the HC a. We now include a "tristate" correction in the PairHMM (just like we do with UG). Basically, we need to divide e by 3 because the observed base could have come from any of the non-observed alleles. b. We now correct overlapping read pairs. Note that the fragments are not merged (which we know is dangerous). Rather, the overlapping bases are just down-weighted so that their quals are not more than Q20 (or more specifically, half of the phred-scaled PCR error rate); mismatching bases are turned into Q0s for now. c. We no longer run contamination removal by default in the UG or HC. The exome tends to have real sites with off kilter allele balances and we occasionally lose them to contamination removal. 5. Improved the dangling tail merging implementation. --- .../StandardCallerArgumentCollection.java | 4 +- .../DiploidSNPGenotypeLikelihoods.java | 3 +- .../haplotypecaller/DeBruijnAssembler.java | 263 ------------------ .../haplotypecaller/DeBruijnGraphBuilder.java | 150 ---------- .../haplotypecaller/HaplotypeCaller.java | 49 ++-- .../haplotypecaller/LocalAssemblyEngine.java | 6 +- .../haplotypecaller/graphs/BaseGraph.java | 25 +- .../graphs/LowWeightChainPruner.java | 2 +- .../{DeBruijnGraph.java => TestGraph.java} | 28 +- .../readthreading/ReadThreadingAssembler.java | 2 +- .../readthreading/ReadThreadingGraph.java | 56 ++-- .../sting/utils/pairhmm/LoglessPairHMM.java | 5 +- ...perGeneralPloidySuite1IntegrationTest.java | 2 +- ...perGeneralPloidySuite2IntegrationTest.java | 2 +- ...dGenotyperIndelCallingIntegrationTest.java | 20 +- .../UnifiedGenotyperIntegrationTest.java | 27 +- ...GenotyperNormalCallingIntegrationTest.java | 20 +- ...dGenotyperReducedReadsIntegrationTest.java | 6 +- .../DeBruijnAssemblerUnitTest.java | 199 ------------- .../DeBruijnAssemblyGraphUnitTest.java | 124 --------- ...lexAndSymbolicVariantsIntegrationTest.java | 8 +- .../HaplotypeCallerGVCFIntegrationTest.java | 12 +- .../HaplotypeCallerIntegrationTest.java | 26 +- ...aplotypeCallerParallelIntegrationTest.java | 2 +- .../LocalAssemblyEngineUnitTest.java | 59 ++-- .../graphs/BaseGraphUnitTest.java | 70 +---- .../graphs/SeqGraphUnitTest.java | 2 +- .../ReadThreadingGraphUnitTest.java | 12 +- .../NanoSchedulerIntegrationTest.java | 2 +- .../sting/utils/pairhmm/PairHMMUnitTest.java | 25 +- .../sting/utils/fragments/FragmentUtils.java | 70 +++++ .../sting/utils/pairhmm/Log10PairHMM.java | 7 +- .../sting/utils/pairhmm/N2MemoryPairHMM.java | 4 + .../smithwaterman/SWPairwiseAlignment.java | 40 ++- .../fragments/FragmentUtilsUnitTest.java | 40 ++- 35 files changed, 347 insertions(+), 1025 deletions(-) delete mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java delete mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnGraphBuilder.java rename protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/{DeBruijnGraph.java => TestGraph.java} (89%) delete mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java delete mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblyGraphUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index 70ee049f3..c324488c9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -54,8 +54,6 @@ import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.File; -import java.io.PrintStream; -import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; @@ -147,7 +145,7 @@ public class StandardCallerArgumentCollection { */ @Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false) public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION; - public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05; + public static final double DEFAULT_CONTAMINATION_FRACTION = 0.0; /** * This argument specifies a file with two columns "sample" and "contamination" specifying the contamination level for those samples. diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java index 93df9f091..f3b26f295 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java @@ -52,6 +52,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.genotyper.DiploidGenotype; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -94,7 +95,7 @@ import static java.lang.Math.pow; */ public class DiploidSNPGenotypeLikelihoods implements Cloneable { - public final static double DEFAULT_PCR_ERROR_RATE = 1e-4; + public final static double DEFAULT_PCR_ERROR_RATE = FragmentUtils.DEFAULT_PCR_ERROR_RATE; protected final static int FIXED_PLOIDY = 2; protected final static int MAX_PLOIDY = FIXED_PLOIDY + 1; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java deleted file mode 100644 index 1a59cdb63..000000000 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssembler.java +++ /dev/null @@ -1,263 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk.walkers.haplotypecaller; - -import com.google.java.contract.Requires; -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.haplotype.Haplotype; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.broadinstitute.sting.utils.sam.ReadUtils; - -import java.io.File; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedList; -import java.util.List; - -/** - * DeBruijn assembler for the HaplotypeCaller - * - * User: ebanks, rpoplin - * Date: Mar 14, 2011 - */ -public class DeBruijnAssembler extends LocalAssemblyEngine { - private final static Logger logger = Logger.getLogger(DeBruijnAssembler.class); - - // TODO -- this number is very low, and limits our ability to explore low-frequency variants. It should - // TODO -- be increased to a large number of eliminated altogether when moving to the bubble caller where - // TODO -- we are no longer considering a combinatorial number of haplotypes as the number of bubbles increases - private final static int NUM_PATHS_PER_GRAPH = 25; - private static final int KMER_OVERLAP = 5; // the additional size of a valid chunk of sequence, used to string together k-mers - private static final int GRAPH_KMER_STEP = 6; - private static final int GGA_MODE_ARTIFICIAL_COUNTS = 1000; - - private final int minKmer; - private final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; - - protected DeBruijnAssembler() { - this(25, -1); - } - - public DeBruijnAssembler(final int minKmer, final int onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms) { - super(NUM_PATHS_PER_GRAPH); - this.minKmer = minKmer; - this.onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms = onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms; - } - - @Override - protected List assemble(final List reads, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { - final List results = new LinkedList<>(); - - final int maxKmer = ReadUtils.getMaxReadLength(reads) - KMER_OVERLAP - 1; - if( maxKmer < minKmer) { - // Reads are too small for assembly so don't try to create any assembly graphs - return Collections.emptyList(); - } - // create the graph for each possible kmer - for( int kmer = maxKmer; kmer >= minKmer; kmer -= GRAPH_KMER_STEP ) { - if ( debugGraphTransformations && kmer > onlyBuildKmersOfThisSizeWhenDebuggingGraphAlgorithms) - continue; - - if ( debug ) logger.info("Creating de Bruijn graph for " + kmer + " kmer using " + reads.size() + " reads"); - DeBruijnGraph graph = createGraphFromSequences(reads, kmer, refHaplotype, activeAlleleHaplotypes); - if( graph != null ) { // graphs that fail during creation ( for example, because there are cycles in the reference graph ) will show up here as a null graph object - // do a series of steps to clean up the raw assembly graph to make it analysis-ready - if ( debugGraphTransformations ) graph.printGraph(new File("unpruned.dot"), pruneFactor); - - if ( shouldErrorCorrectKmers() ) { - throw new UserException("Error correction no longer supported because of the " + - "incredibly naive way this was implemented. The command line argument remains because some" + - " future subsystem will actually go and error correct the reads"); - } - - results.add(cleanupSeqGraph(graph.convertToSequenceGraph())); - - if ( debugGraphTransformations ) // we only want to use one graph size - break; - } - } - - return results; - } - - @Requires({"reads != null", "kmerLength > 0", "refHaplotype != null"}) - protected DeBruijnGraph createGraphFromSequences( final List reads, final int kmerLength, final Haplotype refHaplotype, final List activeAlleleHaplotypes ) { - final DeBruijnGraph graph = new DeBruijnGraph(kmerLength); - final DeBruijnGraphBuilder builder = new DeBruijnGraphBuilder(graph); - - // First pull kmers from the reference haplotype and add them to the graph - if ( ! addReferenceKmersToGraph(builder, refHaplotype.getBases()) ) - // something went wrong, so abort right now with a null graph - return null; - - // add the artificial GGA haplotypes to the graph - if ( ! addGGAKmersToGraph(builder, activeAlleleHaplotypes) ) - // something went wrong, so abort right now with a null graph - return null; - - // now go through the graph already seeded with the reference sequence and add the read kmers to it - if ( ! addReadKmersToGraph(builder, reads) ) - // some problem was detected adding the reads to the graph, return null to indicate we failed - return null; - - graph.cleanNonRefPaths(); - return graph; - } - - /** - * Add the high-quality kmers from the artificial GGA haplotypes to the graph - * - * @param builder a debruijn graph builder to add the read kmers to - * @param activeAlleleHaplotypes a list of haplotypes to add to the graph for GGA mode - * @return true if we successfully added the read kmers to the graph without corrupting it in some way - */ - protected boolean addGGAKmersToGraph(final DeBruijnGraphBuilder builder, final List activeAlleleHaplotypes) { - - final int kmerLength = builder.getKmerSize(); - - for( final Haplotype haplotype : activeAlleleHaplotypes ) { - final int end = haplotype.length() - kmerLength; - for( int start = 0; start < end; start++ ) { - builder.addKmerPairFromSeqToGraph( haplotype.getBases(), start, GGA_MODE_ARTIFICIAL_COUNTS ); - } - } - - // always returns true now, but it's possible that we'd add kmers and decide we don't like the graph in some way - return true; - } - - /** - * Add the high-quality kmers from the reads to the graph - * - * @param builder a debruijn graph builder to add the read kmers to - * @param reads a non-null list of reads whose kmers we want to add to the graph - * @return true if we successfully added the read kmers to the graph without corrupting it in some way - */ - protected boolean addReadKmersToGraph(final DeBruijnGraphBuilder builder, final List reads) { - final int kmerLength = builder.getKmerSize(); - - // Next pull kmers out of every read and throw them on the graph - for( final GATKSAMRecord read : reads ) { - final byte[] sequence = read.getReadBases(); - final byte[] qualities = read.getBaseQualities(); - final int[] reducedReadCounts = read.getReducedReadCounts(); // will be null if read is not reduced - if ( sequence.length > kmerLength + KMER_OVERLAP ) { - int lastGood = -1; // the index of the last good base we've seen - for( int end = 0; end < sequence.length; end++ ) { - if ( qualities[end] < minBaseQualityToUseInAssembly ) { - lastGood = -1; // reset the last good base - } else if ( lastGood == -1 ) { - lastGood = end; // we're at a good base, the last good one is us - } else if ( end - kmerLength >= lastGood ) { - // end - kmerLength (the start) is after the lastGood base, so that kmer is good - final int start = end - kmerLength; - // how many observations of this kmer have we seen? A normal read counts for 1, but - // a reduced read might imply a higher multiplicity for our the edge - int countNumber = 1; - if ( read.isReducedRead() ) { - // compute mean number of reduced read counts in current kmer span - // precise rounding can make a difference with low consensus counts - // TODO -- optimization: should extend arrayMax function to take start stop values - countNumber = MathUtils.arrayMax(Arrays.copyOfRange(reducedReadCounts, start, end)); - } - - builder.addKmerPairFromSeqToGraph(sequence, start, countNumber); - } - } - } - } - - builder.flushKmersToGraph(false); - - // always returns true now, but it's possible that we'd add reads and decide we don't like the graph in some way - return true; - } - - /** - * Add the kmers from the reference sequence to the DeBruijnGraph - * - * @param builder the graph to add the reference kmers to. Must be empty - * @param refSequence the reference sequence from which we'll get our kmers - * @return true if we succeeded in creating a good graph from the reference sequence, false otherwise - */ - protected boolean addReferenceKmersToGraph(final DeBruijnGraphBuilder builder, final byte[] refSequence) { - if ( builder == null ) throw new IllegalArgumentException("graph cannot be null"); - if ( builder.getGraph().vertexSet().size() != 0 ) - throw new IllegalArgumentException("Reference sequences must be added before any other vertices, but got a graph with " + builder.getGraph().vertexSet().size() + " vertices in it already: " + builder.getGraph()); - if ( refSequence == null ) throw new IllegalArgumentException("refSequence cannot be null"); - - final int kmerLength = builder.getKmerSize(); - if( refSequence.length < kmerLength + KMER_OVERLAP ) { - // not enough reference sequence to build a kmer graph of this length, return null - return false; - } - - final int kmersInSequence = refSequence.length - kmerLength + 1; - for( int iii = 0; iii < kmersInSequence - 1; iii++ ) { - builder.addKmerPairFromSeqToGraph(refSequence, iii, 1); - } - builder.flushKmersToGraph(true); - - // we expect that every kmer in the sequence is unique, so that the graph has exactly kmersInSequence vertices - if ( builder.getGraph().vertexSet().size() != kmersInSequence ) { - if( debug ) logger.info("Cycle detected in reference graph for kmer = " + kmerLength + " ...skipping"); - return false; - } - - return true; - } - - @Override - public String toString() { - return "DeBruijnAssembler{" + - "minKmer=" + minKmer + - '}'; - } -} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnGraphBuilder.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnGraphBuilder.java deleted file mode 100644 index 0f66082c6..000000000 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnGraphBuilder.java +++ /dev/null @@ -1,150 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk.walkers.haplotypecaller; - -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; - -/** - * Fast approach to building a DeBruijnGraph - * - * Follows the model: - * - * for each X that has bases for the final graph: - * addKmer pair (single kmer with kmer size + 1 spanning the pair) - * - * flushKmersToGraph - * - * User: depristo - * Date: 4/7/13 - * Time: 4:14 PM - */ -public class DeBruijnGraphBuilder { - /** The size of the kmer graph we want to build */ - private final int kmerSize; - - /** The graph we're going to add kmers to */ - private final DeBruijnGraph graph; - - /** keeps counts of all kmer pairs added since the last flush */ - private final KMerCounter counter; - - /** - * Create a new builder that will write out kmers to graph - * - * @param graph a non-null graph that can contain already added kmers - */ - public DeBruijnGraphBuilder(final DeBruijnGraph graph) { - if ( graph == null ) throw new IllegalArgumentException("Graph cannot be null"); - this.kmerSize = graph.getKmerSize(); - this.graph = graph; - this.counter = new KMerCounter(kmerSize + 1); - } - - /** - * The graph we're building - * @return a non-null graph - */ - public DeBruijnGraph getGraph() { - return graph; - } - - /** - * The kmer size of our graph - * @return positive integer - */ - public int getKmerSize() { - return kmerSize; - } - - /** - * Higher-level interface to #addKmersToGraph that adds a pair of kmers from a larger sequence of bytes to this - * graph. The kmers start at start (first) and start + 1 (second) have have length getKmerSize(). The - * edge between them is added with isRef and multiplicity - * - * @param sequence a sequence of bases from which we want to extract a pair of kmers - * @param start the start of the first kmer in sequence, must be between 0 and sequence.length - 2 - getKmerSize() - * @param multiplicity what's the multiplicity of the edge between these two kmers - */ - public void addKmerPairFromSeqToGraph( final byte[] sequence, final int start, final int multiplicity ) { - if ( sequence == null ) throw new IllegalArgumentException("Sequence cannot be null"); - if ( start < 0 ) throw new IllegalArgumentException("start must be >= 0 but got " + start); - if ( start + 1 + getKmerSize() > sequence.length ) throw new IllegalArgumentException("start " + start + " is too big given kmerSize " + getKmerSize() + " and sequence length " + sequence.length); - final Kmer kmerPair = new Kmer(sequence, start, getKmerSize() + 1); - addKmerPair(kmerPair, multiplicity); - } - - /** - * Add a single kmer pair to this builder - * @param kmerPair a kmer pair is a single kmer that has kmerSize + 1 bp, where 0 -> kmersize and 1 -> kmersize + 1 - * will have an edge added to this - * @param multiplicity the desired multiplicity of this edge - */ - public void addKmerPair(final Kmer kmerPair, final int multiplicity) { - if ( kmerPair.length() != kmerSize + 1 ) throw new IllegalArgumentException("kmer pair must be of length kmerSize + 1 = " + kmerSize + 1 + " but got " + kmerPair.length()); - counter.addKmer(kmerPair, multiplicity); - } - - /** - * Flushes the currently added kmers to the graph - * - * After this function is called the builder is reset to an empty state - * - * This flushing is expensive, so many kmers should be added to the builder before flushing. The most - * efficient workflow is to add all of the kmers of a particular class (all ref bases, or all read bases) - * then and do one flush when completed - * - * @param addRefEdges should the kmers present in the builder be added to the graph with isRef = true for the edges? - */ - public void flushKmersToGraph(final boolean addRefEdges) { - for ( final KMerCounter.CountedKmer countedKmer : counter.getCountedKmers() ) { - final byte[] first = countedKmer.getKmer().subKmer(0, kmerSize).bases(); - final byte[] second = countedKmer.getKmer().subKmer(1, kmerSize).bases(); - graph.addKmersToGraph(first, second, addRefEdges, countedKmer.getCount()); - } - counter.clear(); - } -} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 65d0bf413..f18e37480 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -77,6 +77,8 @@ import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.fragments.FragmentCollection; +import org.broadinstitute.sting.utils.fragments.FragmentUtils; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.gvcf.GVCFWriter; import org.broadinstitute.sting.utils.haplotype.*; @@ -236,22 +238,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @ArgumentCollection private StandardCallerArgumentCollection SCAC = new StandardCallerArgumentCollection(); - // ----------------------------------------------------------------------------------------------- - // arguments to control internal behavior of the debruijn assembler - // ----------------------------------------------------------------------------------------------- - - @Advanced - @Argument(fullName="useDebruijnAssembler", shortName="useDebruijnAssembler", doc="If specified, we will use the old DeBruijn assembler. Depreciated as of 2.6", required = false) - protected boolean useDebruijnAssembler = false; - - @Advanced - @Argument(fullName="minKmerForDebruijnAssembler", shortName="minKmerForDebruijnAssembler", doc="Minimum kmer length to use in the debruijn assembly graph", required = false) - protected int minKmerForDebruijnAssembler = 11; - - @Advanced - @Argument(fullName="onlyUseKmerSizeForDebruijnAssembler", shortName="onlyUseKmerSizeForDebruijnAssembler", doc="If specified, we will only build kmer graphs with this kmer size in the debruijn", required = false) - protected int onlyUseKmerSizeForDebruijnAssembler = -1; - // ----------------------------------------------------------------------------------------------- // arguments to control internal behavior of the read threading assembler // ----------------------------------------------------------------------------------------------- @@ -349,7 +335,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // ----------------------------------------------------------------------------------------------- @Advanced - @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false) + @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with < X supporting kmers are pruned from the graph", required = false) protected int MIN_PRUNE_FACTOR = 2; @Advanced @@ -621,9 +607,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // create and setup the assembler final int maxAllowedPathsForReadThreadingAssembler = Math.max(maxPathsPerSample * nSamples, MIN_PATHS_PER_GRAPH); - assemblyEngine = useDebruijnAssembler - ? new DeBruijnAssembler(minKmerForDebruijnAssembler, onlyUseKmerSizeForDebruijnAssembler) - : new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples); + assemblyEngine = new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples); assemblyEngine.setErrorCorrectKmers(errorCorrectKmers); assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR); @@ -870,7 +854,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In */ protected AssemblyResult assembleReads(final ActiveRegion activeRegion, final List activeAllelesToGenotype) { // Create the reference haplotype which is the bases from the reference that make up the active region - finalizeActiveRegion(activeRegion); // merge overlapping fragments, clip adapter and low qual tails + finalizeActiveRegion(activeRegion); // handle overlapping fragments, clip adapter and low qual tails final byte[] fullReferenceWithPadding = activeRegion.getActiveRegionReference(referenceReader, REFERENCE_PADDING); final GenomeLoc paddedReferenceLoc = getPaddedLoc(activeRegion); @@ -1097,8 +1081,15 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } } + // TODO -- Performance optimization: we partition the reads by sample 4 times right now; let's unify that code. + + final List downsampledReads = DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart); + + // handle overlapping read pairs from the same fragment + cleanOverlappingReadPairs(downsampledReads); + activeRegion.clearReads(); - activeRegion.addAll(DownsamplingUtils.levelCoverageByPosition(ReadUtils.sortReadsByCoordinate(readsToUse), maxReadsInRegionPerSample, minReadsPerAlignmentStart)); + activeRegion.addAll(downsampledReads); } private Set filterNonPassingReads( final org.broadinstitute.sting.utils.activeregion.ActiveRegion activeRegion ) { @@ -1138,7 +1129,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In return returnMap; } - /** * Are we emitting a reference confidence in some form, or not? * @return true if we are @@ -1146,4 +1136,17 @@ public class HaplotypeCaller extends ActiveRegionWalker, In private boolean emitReferenceConfidence(){ return emitReferenceConfidence != ReferenceConfidenceMode.NONE; } + + /** + * Clean up reads/bases that overlap within read pairs + * + * @param reads the list of reads to consider + */ + private void cleanOverlappingReadPairs(final List reads) { + for ( final List perSampleReadList : splitReadsBySample(reads).values() ) { + final FragmentCollection fragmentCollection = FragmentUtils.create(perSampleReadList); + for ( final List overlappingPair : fragmentCollection.getOverlappingPairs() ) + FragmentUtils.adjustQualsOfOverlappingPairedFragments(overlappingPair); + } + } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 27178c78f..165fb71db 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -77,7 +77,7 @@ public abstract class LocalAssemblyEngine { * If false, we will only write out a region around the reference source */ private final static boolean PRINT_FULL_GRAPH_FOR_DEBUGGING = true; - public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 8; + public static final byte DEFAULT_MIN_BASE_QUALITY_TO_USE = (byte) 10; private static final int MIN_HAPLOTYPE_REFERENCE_LENGTH = 30; protected final int numBestHaplotypesPerGraph; @@ -301,9 +301,7 @@ public abstract class LocalAssemblyEngine { printDebugGraphTransform(seqGraph, new File("sequenceGraph.2.zipped.dot")); // now go through and prune the graph, removing vertices no longer connected to the reference chain - // IMPORTANT: pruning must occur before we call simplifyGraph, as simplifyGraph adds 0 weight - // edges to maintain graph connectivity. - seqGraph.pruneGraph(pruneFactor); + seqGraph.removeSingletonOrphanVertices(); seqGraph.removeVerticesNotConnectedToRefRegardlessOfEdgeDirection(); printDebugGraphTransform(seqGraph, new File("sequenceGraph.3.pruned.dot")); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java index 2b37d90c2..bd179ef41 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java @@ -71,7 +71,7 @@ public class BaseGraph extends Default private final int kmerSize; /** - * Construct a DeBruijnGraph with kmerSize + * Construct a TestGraph with kmerSize * @param kmerSize */ public BaseGraph(final int kmerSize, final EdgeFactory edgeFactory) { @@ -472,28 +472,11 @@ public class BaseGraph extends Default } /** - * Prune all edges from this graph that have multiplicity <= pruneFactor and remove all orphaned singleton vertices as well - * - * @param pruneFactor all edges with multiplicity <= this factor that aren't ref edges will be removed - */ - public void pruneGraph( final int pruneFactor ) { - final List edgesToRemove = new ArrayList<>(); - for( final E e : edgeSet() ) { - if( e.getPruningMultiplicity() <= pruneFactor && !e.isRef() ) { // remove non-reference edges with weight less than or equal to the pruning factor - edgesToRemove.add(e); - } - } - removeAllEdges(edgesToRemove); - - removeSingletonOrphanVertices(); - } - - /** - * Prune all chains from this graph where all edges in the path have multiplicity <= pruneFactor + * Prune all chains from this graph where any edge in the path has multiplicity < pruneFactor * * @see LowWeightChainPruner for more information * - * @param pruneFactor all edges with multiplicity <= this factor that aren't ref edges will be removed + * @param pruneFactor all edges with multiplicity < this factor that aren't ref edges will be removed */ public void pruneLowWeightChains( final int pruneFactor ) { final LowWeightChainPruner pruner = new LowWeightChainPruner<>(pruneFactor); @@ -503,7 +486,7 @@ public class BaseGraph extends Default /** * Remove all vertices in the graph that have in and out degree of 0 */ - protected void removeSingletonOrphanVertices() { + public void removeSingletonOrphanVertices() { // Run through the graph and clean up singular orphaned nodes final List verticesToRemove = new LinkedList<>(); for( final V v : vertexSet() ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java index 27b6bd902..520267dee 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/LowWeightChainPruner.java @@ -96,7 +96,7 @@ public class LowWeightChainPruner { } /** - * Traverse the edges in the path and determine if any are either ref edges or have weight above + * Traverse the edges in the path and determine if any are either ref edges or have weight above or equal to * the pruning factor and should therefore not be pruned away. * * @param path the path in question diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/TestGraph.java similarity index 89% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnGraph.java rename to protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/TestGraph.java index 0200ce4a2..8c79a5efe 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeBruijnGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/TestGraph.java @@ -49,17 +49,16 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import com.google.java.contract.Ensures; import org.jgrapht.EdgeFactory; -import java.util.Arrays; import java.util.HashMap; import java.util.Map; /** - * A DeBruijn kmer graph + * A Test kmer graph * * User: rpoplin * Date: 2/6/13 */ -public final class DeBruijnGraph extends BaseGraph { +public final class TestGraph extends BaseGraph { /** * Edge factory that creates non-reference multiplicity 1 edges */ @@ -71,33 +70,20 @@ public final class DeBruijnGraph extends BaseGraph { } /** - * Create an empty DeBruijnGraph with default kmer size + * Create an empty TestGraph with default kmer size */ - public DeBruijnGraph() { + public TestGraph() { this(11); } /** - * Create an empty DeBruijnGraph with kmer size + * Create an empty TestGraph with kmer size * @param kmerSize kmer size, must be >= 1 */ - public DeBruijnGraph(int kmerSize) { + public TestGraph(int kmerSize) { super(kmerSize, new MyEdgeFactory()); } - /** - * Pull kmers out of the given long sequence and throw them on in the graph - * @param sequence byte array holding the sequence with which to build the assembly graph - * @param KMER_LENGTH the desired kmer length to use - * @param isRef if true the kmers added to the graph will have reference edges linking them - */ - public void addSequenceToGraph( final byte[] sequence, final int KMER_LENGTH, final boolean isRef ) { - if( sequence.length < KMER_LENGTH + 1 ) { throw new IllegalArgumentException("Provided sequence is too small for the given kmer length"); } - final int kmersInSequence = sequence.length - KMER_LENGTH + 1; - for( int iii = 0; iii < kmersInSequence - 1; iii++ ) { - addKmersToGraph(Arrays.copyOfRange(sequence, iii, iii + KMER_LENGTH), Arrays.copyOfRange(sequence, iii + 1, iii + 1 + KMER_LENGTH), isRef, 1); - } - } /** * Add edge to assembly graph connecting the two kmers @@ -129,7 +115,7 @@ public final class DeBruijnGraph extends BaseGraph { @Ensures({"result != null"}) public SeqGraph convertToSequenceGraph() { final SeqGraph seqGraph = new SeqGraph(getKmerSize()); - final Map vertexMap = new HashMap(); + final Map vertexMap = new HashMap<>(); // create all of the equivalent seq graph vertices for ( final DeBruijnVertex dv : vertexSet() ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java index d575f14a5..6500196d3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingAssembler.java @@ -190,7 +190,7 @@ public class ReadThreadingAssembler extends LocalAssemblyEngine { // look at all chains in the graph that terminate in a non-ref node (dangling sinks) and see if // we can recover them by merging some N bases from the chain back into the reference - if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(); + if ( recoverDanglingTails ) rtgraph.recoverDanglingTails(pruneFactor); // remove all heading and trailing paths if ( removePathsNotConnectedToRef ) rtgraph.removePathsNotConnectedToRef(); diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 7d7df2c06..47d14e185 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -58,6 +58,7 @@ import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.sam.AlignmentUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.smithwaterman.SWPairwiseAlignment; +import org.broadinstitute.sting.utils.smithwaterman.SWParameterSet; import org.broadinstitute.sting.utils.smithwaterman.SmithWaterman; import org.jgrapht.EdgeFactory; import org.jgrapht.alg.CycleDetector; @@ -93,6 +94,9 @@ public class ReadThreadingGraph extends BaseGraph 0"); // generate the CIGAR string from Smith-Waterman between the dangling tail and reference paths - final DanglingTailMergeResult danglingTailMergeResult = generateCigarAgainstReferencePath(vertex); + final DanglingTailMergeResult danglingTailMergeResult = generateCigarAgainstReferencePath(vertex, pruneFactor); // if the CIGAR is too complex (or couldn't be computed) then we do not allow the merge into the reference path if ( danglingTailMergeResult == null || ! cigarIsOkayToMerge(danglingTailMergeResult.cigar) ) @@ -301,13 +306,14 @@ public class ReadThreadingGraph extends BaseGraph elements = cigar.getCigarElements(); + final int numElements = elements.size(); // don't allow more than a couple of different ops - if ( elements.size() > 3 ) + if ( numElements > MAX_CIGAR_COMPLEXITY ) return false; // the last element must be an M - if ( elements.get(elements.size() - 1).getOperator() != CigarOperator.M ) + if ( elements.get(numElements - 1).getOperator() != CigarOperator.M ) return false; // TODO -- do we want to check whether the Ms mismatch too much also? @@ -334,7 +340,8 @@ public class ReadThreadingGraph extends BaseGraph altPath = findPathToLowestCommonAncestorOfReference(vertex); - if ( altPath == null || isRefSource(altPath.get(0)) ) + final List altPath = findPathToLowestCommonAncestorOfReference(vertex, pruneFactor); + if ( altPath == null || isRefSource(altPath.get(0)) || altPath.size() < MIN_DANGLING_TAIL_LENGTH ) return null; // now get the reference path from the LCA @@ -361,24 +369,32 @@ public class ReadThreadingGraph extends BaseGraph findPathToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex) { + protected List findPathToLowestCommonAncestorOfReference(final MultiDeBruijnVertex vertex, final int pruneFactor) { final LinkedList path = new LinkedList<>(); MultiDeBruijnVertex v = vertex; while ( ! isReferenceNode(v) && inDegreeOf(v) == 1 ) { - path.addFirst(v); - v = getEdgeSource(incomingEdgeOf(v)); + final MultiSampleEdge edge = incomingEdgeOf(v); + // if it has too low a weight, don't use it (or previous vertexes) for the path + if ( edge.getPruningMultiplicity() < pruneFactor ) + path.clear(); + // otherwise it is safe to use + else + path.addFirst(v); + v = getEdgeSource(edge); } path.addFirst(v); @@ -453,7 +469,12 @@ public class ReadThreadingGraph extends BaseGraph uniqueKmers.size(); } - public void recoverDanglingTails() { + /** + * Try to recover dangling tails + * + * @param pruneFactor the prune factor to use in ignoring chain pieces + */ + public void recoverDanglingTails(final int pruneFactor) { if ( ! alreadyBuilt ) throw new IllegalStateException("recoverDanglingTails requires the graph be already built"); int attempted = 0; @@ -461,7 +482,7 @@ public class ReadThreadingGraph extends BaseGraph= kmerSize ) { // if the sequence is long enough to get some value out of, add it to the graph final String name = read.getReadName() + "_" + start + "_" + end; - addSequence(name, read.getReadGroup().getSample(), read.getReadBases(), start, stop, reducedReadCounts, false); + addSequence(name, read.getReadGroup().getSample(), read.getReadBases(), start, end, reducedReadCounts, false); } lastGood = -1; // reset the last good base diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java index 184a2689d..49148c152 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java @@ -59,6 +59,9 @@ public final class LoglessPairHMM extends N2MemoryPairHMM { protected static final double INITIAL_CONDITION = Math.pow(2, 1020); protected static final double INITIAL_CONDITION_LOG10 = Math.log10(INITIAL_CONDITION); + // we divide e by 3 because the observed base could have come from any of the non-observed alleles + protected static final double TRISTATE_CORRECTION = 3.0; + private static final int matchToMatch = 0; private static final int indelToMatch = 1; private static final int matchToInsertion = 2; @@ -146,7 +149,7 @@ public final class LoglessPairHMM extends N2MemoryPairHMM { for (int j = startIndex; j < haplotypeBases.length; j++) { final byte y = haplotypeBases[j]; prior[i+1][j+1] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? - QualityUtils.qualToProb(qual) : QualityUtils.qualToErrorProb(qual) ); + QualityUtils.qualToProb(qual) : (QualityUtils.qualToErrorProb(qual) / (doNotUseTristateCorrection ? 1.0 : TRISTATE_CORRECTION)) ); } } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index aaa3b1284..460b80121 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -79,6 +79,6 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy1_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "98f4d78aad745c6e853b81b2e4e207b4"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 1", "LSV_INDEL_DISC_NOREF_p1", "INDEL", "4dd1b38f0389e339ce8a05956956aa8a"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 0eb89adc7..48f36ccc6 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -58,7 +58,7 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testINDEL_maxAltAlleles2_ploidy3_Pools_noRef() { - executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","25902d7a6a0c00c60c2d5845dfaa1a4c"); + executor.PC_LSV_Test_NoRef(" -maxAltAlleles 2 -ploidy 3","LSV_INDEL_DISC_NOREF_p3","INDEL","39f559996f8d429839c585bbab68dbde"); } @Test(enabled = true) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java index 65a569cdc..7fa716168 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIndelCallingIntegrationTest.java @@ -56,8 +56,8 @@ import java.util.List; public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { - private final static String baseCommandIndels = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommandIndels = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; // -------------------------------------------------------------------------------------------------------------- // @@ -73,7 +73,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("ef8151aa699da3272c1ae0986d16ca21")); + Arrays.asList("3c8727ee6e2a6f10ab728c4869dd5b92")); executeTest(String.format("test indel caller in SLX"), spec); } @@ -88,7 +88,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -minIndelCnt 1" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("7f88229ccefb74513efb199b61183cb8")); + Arrays.asList("0cbe889e03bab6512680ecaebd52c536")); executeTest(String.format("test indel caller in SLX with low min allele count"), spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,500,000", 1, - Arrays.asList("1928ad48bcd0ca180e046bc235cfb3f4")); + Arrays.asList("c6f0fa039ca5672469838bc9f52c72d3")); executeTest(String.format("test indel calling, multiple technologies"), spec); } @@ -111,7 +111,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("6663e434a7b549f23bfd52db90e53a1a")); + Arrays.asList("475f8148123792064130faf9f9030fec")); executeTest("test MultiSample Pilot2 indels with alleles passed in", spec); } @@ -121,7 +121,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("581c552664e536df6d0f102fb0d10e5a")); + Arrays.asList("a7e4e1bd128424d46cffdd538b220074")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec); } @@ -136,7 +136,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L " + result.get(0).getAbsolutePath(), 1, - Arrays.asList("5596851d19582dd1af3901b7d703ae0a")); + Arrays.asList("8682738c2c66b502cdbf7db466a5c3e2")); executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2); } @@ -176,7 +176,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction0() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.0", 1, - Arrays.asList("862d82c8aa35f1da4f9e67b5b48dfe52")); + Arrays.asList("d3721bee5edaa31fdd35edd7aa75feb3")); executeTest("test minIndelFraction 0.0", spec); } @@ -184,7 +184,7 @@ public class UnifiedGenotyperIndelCallingIntegrationTest extends WalkerTest { public void testMinIndelFraction25() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( assessMinIndelFraction + " -minIndelFrac 0.25", 1, - Arrays.asList("8d9fc96be07db791737ac18135de4d63")); + Arrays.asList("a5b6d7b32953500d936d3dff512a6254")); executeTest("test minIndelFraction 0.25", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 532982853..ded8189b3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -64,8 +64,8 @@ import java.util.Collections; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; + private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommandNoCmdLineHeaderStdout = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam"; // -------------------------------------------------------------------------------------------------------------- // @@ -85,7 +85,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSLOD() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("4aa226c00a242047cf427d0919003048")); + Arrays.asList("bc8a4e4ceb46776169b47146805c882a")); executeTest("test SLOD", spec); } @@ -101,7 +101,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testCompTrack() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1, - Arrays.asList("50937942e3d228614d2531c3be237709")); + Arrays.asList("21185d9a7519356ba672757f5a522971")); executeTest("test using comp track", spec); } @@ -175,12 +175,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- @Test public void testHeterozyosity1() { - testHeterozosity( 0.01, "3b66f82dbb746875638e076bf51a1583" ); + testHeterozosity( 0.01, "2f3051caa785c7c1e2a8b23fa4da90b1" ); } @Test public void testHeterozyosity2() { - testHeterozosity( 1.0 / 1850, "714c1795334c7c62c046a75479381ae6" ); + testHeterozosity( 1.0 / 1850, "228df9e38580d8ffe1134da7449fa35e" ); } private void testHeterozosity(final double arg, final String md5) { @@ -196,7 +196,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "6f79205f7ed8006470f056f6805db6c8"; + private final static String COMPRESSED_OUTPUT_MD5 = "eebec02fdde9937bffaf44902ace6207"; @Test public void testCompressedOutput() { @@ -217,24 +217,25 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "d408b4661b820ed86272415b8ea08780"; + String md5 = "1f3fad09a63269c36e871e7ee04ebfaa"; + final String myCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, + myCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, Arrays.asList(md5)); executeTest("test parallelization (single thread)", spec1); GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1, + myCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 2", 1, Arrays.asList(md5)); executeTest("test parallelization (2 threads)", spec2); GenomeAnalysisEngine.resetRandomGenerator(); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - baseCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1, + myCommand + " -dt NONE -G none --contamination_fraction_to_filter 0.0 -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000 -nt 4", 1, Arrays.asList(md5)); executeTest("test parallelization (4 threads)", spec3); } @@ -252,7 +253,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -o %s" + " -L 1:10,000,000-10,100,000", 1, - Arrays.asList("31be725b2a7c15e9769391ad940c0587")); + Arrays.asList("9f4e663e3b156b14fd55df3f5f0336a5")); executeTest(String.format("test multiple technologies"), spec); } @@ -271,7 +272,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { " -L 1:10,000,000-10,100,000" + " -baq CALCULATE_AS_NECESSARY", 1, - Arrays.asList("dcc5cec42730567982def16da4a7f286")); + Arrays.asList("260bb73e2900334d5c3ff8123be0d2d8")); executeTest(String.format("test calling with BAQ"), spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java index 1bfbbac17..afec97b84 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperNormalCallingIntegrationTest.java @@ -53,7 +53,7 @@ import java.util.Arrays; public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ - private final static String baseCommand = "-T UnifiedGenotyper --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; + private final static String baseCommand = "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; // -------------------------------------------------------------------------------------------------------------- // @@ -64,7 +64,7 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("a9466c1e3ce1fc4bac83086b25a6df54")); + Arrays.asList("7f26ca78e550afa28df11d593c90ec9a")); executeTest("test MultiSample Pilot1", spec); } @@ -88,22 +88,22 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("aaadb2a355d87344eabb6ac4495a11e4")); + Arrays.asList("02b521fe88a6606a29c12c0885c3debd")); executeTest("test SingleSample Pilot2", spec); } @Test public void testMultipleSNPAlleles() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, - Arrays.asList("06c85e8eab08b67244cf38fc785aca22")); + "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1, + Arrays.asList("dd5ad3beaa75319bb2ef1434d2dd9f73")); executeTest("test Multiple SNP alleles", spec); } @Test public void testBadRead() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, + "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH -I " + privateTestDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1, Arrays.asList("d915535c1458733f09f82670092fcab6")); executeTest("test bad read", spec); } @@ -111,16 +111,16 @@ public class UnifiedGenotyperNormalCallingIntegrationTest extends WalkerTest{ @Test public void testReverseTrim() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, - Arrays.asList("f3da1ff1e49a831af055ca52d6d07dd7")); + "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1, + Arrays.asList("a973298b2801b80057bea88507e2858d")); executeTest("test reverse trim", spec); } @Test public void testMismatchedPLs() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, - Arrays.asList("20ff311f363c51b7385a76f6f296759c")); + "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1, + Arrays.asList("8d91d98c4e79897690d3c6918b6ac761")); executeTest("test mismatched PLs", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java index 33810e255..3b5690046 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperReducedReadsIntegrationTest.java @@ -62,7 +62,7 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, Arrays.asList("ffde0d5e23523e4bd9e7e18f62d37d0f")); executeTest("test calling on a ReducedRead BAM", spec); } @@ -74,13 +74,13 @@ public class UnifiedGenotyperReducedReadsIntegrationTest extends WalkerTest { @Test public void testReducedBamINDELs() { - testReducedCalling("INDEL", "4b4902327fb132f9aaab3dd5ace934e1"); + testReducedCalling("INDEL", "942930038cf7fc9a80b969461aaa9aa6"); } private void testReducedCalling(final String model, final String md5) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-10,500,000 -glm " + model, 1, + "-T UnifiedGenotyper --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.reduced.bam -o %s -L 20:10,000,000-10,500,000 -glm " + model, 1, Arrays.asList(md5)); executeTest("test calling on a ReducedRead BAM with " + model, spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java deleted file mode 100644 index 95592241d..000000000 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblerUnitTest.java +++ /dev/null @@ -1,199 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk.walkers.haplotypecaller; - -/** - * Created by IntelliJ IDEA. - * User: rpoplin - * Date: 3/27/12 - */ - -import net.sf.samtools.*; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; -import org.broadinstitute.sting.utils.haplotype.Haplotype; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.sam.AlignmentUtils; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.*; - -public class DeBruijnAssemblerUnitTest extends BaseTest { - private final static boolean DEBUG = false; - - @Test(enabled = !DEBUG) - public void testReferenceCycleGraph() { - String refCycle = "ATCGAGGAGAGCGCCCCGAGATATATATATATATATTTGCGAGCGCGAGCGTTTTAAAAATTTTAGACGGAGAGATATATATATATGGGAGAGGGGATATATATATATCCCCCC"; - String noCycle = "ATCGAGGAGAGCGCCCCGAGATATTATTTGCGAGCGCGAGCGTTTTAAAAATTTTAGACGGAGAGATGGGAGAGGGGATATATAATATCCCCCC"; - final DeBruijnGraph g1 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(refCycle.getBytes(), true), Collections.emptyList()); - final DeBruijnGraph g2 = new DeBruijnAssembler().createGraphFromSequences(new ArrayList(), 10, new Haplotype(noCycle.getBytes(), true), Collections.emptyList()); - - Assert.assertTrue(g1 == null, "Reference cycle graph should return null during creation."); - Assert.assertTrue(g2 != null, "Reference non-cycle graph should not return null during creation."); - } - - private static class MockBuilder extends DeBruijnGraphBuilder { - public final List addedPairs = new LinkedList(); - - private MockBuilder(final int kmerSize) { - super(new DeBruijnGraph(kmerSize)); - } - - @Override - public void addKmerPair(Kmer kmerPair, int multiplicity) { - logger.info("addKmerPair" + kmerPair); - addedPairs.add(kmerPair); - } - - @Override - public void flushKmersToGraph(boolean addRefEdges) { - // do nothing - } - } - - @DataProvider(name = "AddReadKmersToGraph") - public Object[][] makeAddReadKmersToGraphData() { - List tests = new ArrayList(); - - // this functionality can be adapted to provide input data for whatever you might want in your data - final String bases = "ACGTAACCGGTTAAACCCGGGTTT"; - final int readLen = bases.length(); - final List allBadStarts = new ArrayList(readLen); - for ( int i = 0; i < readLen; i++ ) allBadStarts.add(i); - - for ( final int kmerSize : Arrays.asList(3, 4, 5) ) { - for ( final int nBadQuals : Arrays.asList(0, 1, 2) ) { - for ( final List badStarts : Utils.makePermutations(allBadStarts, nBadQuals, false) ) { - tests.add(new Object[]{bases, kmerSize, badStarts}); - } - } - } - - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "AddReadKmersToGraph", enabled = ! DEBUG) - public void testAddReadKmersToGraph(final String bases, final int kmerSize, final List badQualsSites) { - final int readLen = bases.length(); - final DeBruijnAssembler assembler = new DeBruijnAssembler(); - final MockBuilder builder = new MockBuilder(kmerSize); - - final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000); - - final byte[] quals = Utils.dupBytes((byte)20, bases.length()); - for ( final int badSite : badQualsSites ) quals[badSite] = 0; - final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, readLen); - read.setReadBases(bases.getBytes()); - read.setBaseQualities(quals); - - final Set expectedBases = new HashSet(); - final Set expectedStarts = new LinkedHashSet(); - for ( int i = 0; i < readLen; i++) { - boolean good = true; - for ( int j = 0; j < kmerSize + 1; j++ ) { // +1 is for pairing - good &= i + j < readLen && quals[i+j] >= assembler.getMinBaseQualityToUseInAssembly(); - } - if ( good ) { - expectedStarts.add(i); - expectedBases.add(bases.substring(i, i + kmerSize + 1)); - } - } - - assembler.addReadKmersToGraph(builder, Arrays.asList(read)); - Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); - for ( final Kmer addedKmer : builder.addedPairs ) { - Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); - } - } - - @DataProvider(name = "AddGGAKmersToGraph") - public Object[][] makeAddGGAKmersToGraphData() { - List tests = new ArrayList(); - - // this functionality can be adapted to provide input data for whatever you might want in your data - final String bases = "ACGTAACCGGTTAAACCCGGGTTT"; - final int readLen = bases.length(); - final List allBadStarts = new ArrayList(readLen); - for ( int i = 0; i < readLen; i++ ) allBadStarts.add(i); - - for ( final int kmerSize : Arrays.asList(3, 4, 5) ) { - tests.add(new Object[]{bases, kmerSize}); - } - - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "AddGGAKmersToGraph", enabled = ! DEBUG) - public void testAddGGAKmersToGraph(final String bases, final int kmerSize) { - final int readLen = bases.length(); - final DeBruijnAssembler assembler = new DeBruijnAssembler(); - final MockBuilder builder = new MockBuilder(kmerSize); - - final Set expectedBases = new HashSet(); - final Set expectedStarts = new LinkedHashSet(); - for ( int i = 0; i < readLen; i++) { - boolean good = true; - for ( int j = 0; j < kmerSize + 1; j++ ) { // +1 is for pairing - good &= i + j < readLen; - } - if ( good ) { - expectedStarts.add(i); - expectedBases.add(bases.substring(i, i + kmerSize + 1)); - } - } - - assembler.addGGAKmersToGraph(builder, Arrays.asList(new Haplotype(bases.getBytes()))); - Assert.assertEquals(builder.addedPairs.size(), expectedStarts.size()); - for ( final Kmer addedKmer : builder.addedPairs ) { - Assert.assertTrue(expectedBases.contains(new String(addedKmer.bases())), "Couldn't find kmer " + addedKmer + " among all expected kmers " + expectedBases); - } - } -} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblyGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblyGraphUnitTest.java deleted file mode 100644 index a13618dae..000000000 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/DeBruijnAssemblyGraphUnitTest.java +++ /dev/null @@ -1,124 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk.walkers.haplotypecaller; - -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.DeBruijnGraph; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -/** - * Created with IntelliJ IDEA. - * User: rpoplin - * Date: 2/8/13 - */ - -public class DeBruijnAssemblyGraphUnitTest { - private class GetReferenceBytesTestProvider extends BaseTest.TestDataProvider { - public byte[] refSequence; - public byte[] altSequence; - public int KMER_LENGTH; - - public GetReferenceBytesTestProvider(String ref, String alt, int kmer) { - super(GetReferenceBytesTestProvider.class, String.format("Testing reference bytes. kmer = %d, ref = %s, alt = %s", kmer, ref, alt)); - refSequence = ref.getBytes(); - altSequence = alt.getBytes(); - KMER_LENGTH = kmer; - } - - public byte[] expectedReferenceBytes() { - return refSequence; - } - - public byte[] calculatedReferenceBytes() { - DeBruijnGraph graph = new DeBruijnGraph(); - graph.addSequenceToGraph(refSequence, KMER_LENGTH, true); - if( altSequence.length > 0 ) { - graph.addSequenceToGraph(altSequence, KMER_LENGTH, false); - } - return graph.getReferenceBytes(graph.getReferenceSourceVertex(), graph.getReferenceSinkVertex(), true, true); - } - } - - @DataProvider(name = "GetReferenceBytesTestProvider") - public Object[][] GetReferenceBytesTests() { - new GetReferenceBytesTestProvider("GGTTAACC", "", 3); - new GetReferenceBytesTestProvider("GGTTAACC", "", 4); - new GetReferenceBytesTestProvider("GGTTAACC", "", 5); - new GetReferenceBytesTestProvider("GGTTAACC", "", 6); - new GetReferenceBytesTestProvider("GGTTAACC", "", 7); - new GetReferenceBytesTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", "", 6); - new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "", 66); - new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "", 76); - - new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 3); - new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 4); - new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 5); - new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 6); - new GetReferenceBytesTestProvider("GGTTAACC", "GGTTAACC", 7); - new GetReferenceBytesTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", "GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", 6); - new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 66); - new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", 76); - - new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 3); - new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 4); - new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 5); - new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 6); - new GetReferenceBytesTestProvider("GGTTAACC", "AAAAAAAAAAAAA", 7); - new GetReferenceBytesTestProvider("GGTTAACCATGCAGACGGGAGGCTGAGCGAGAGTTTT", "AAAAAAAAAAAAA", 6); - new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 66); - new GetReferenceBytesTestProvider("AATACCATTGGAGTTTTTTTCCAGGTTAAGATGGTGCATTGAATCCACCCATCTACTTTTGCTCCTCCCAAAACTCACTAAAACTATTATAAAGGGATTTTGTTTAAAGACACAAACTCATGAGGACAGAGAGAACAGAGTAGACAATAGTGGGGGAAAAATAAGTTGGAAGATAGAAAACAGATGGGTGAGTGGTAATCGACTCAGCAGCCCCAAGAAAGCTGAAACCCAGGGAAAGTTAAGAGTAGCCCTATTTTCATGGCAAAATCCAAGGGGGGGTGGGGAAAGAAAGAAAAACAGAAAAAAAAATGGGAATTGGCAGTCCTAGATATCTCTGGTACTGGGCAAGCCAAAGAATCAGGATAACTGGGTGAAAGGTGATTGGGAAGCAGTTAAAATCTTAGTTCCCCTCTTCCACTCTCCGAGCAGCAGGTTTCTCTCTCTCATCAGGCAGAGGGCTGGAGAT", "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", 76); - - return GetReferenceBytesTestProvider.getTests(GetReferenceBytesTestProvider.class); - } - - @Test(dataProvider = "GetReferenceBytesTestProvider", enabled = true) - public void testGetReferenceBytes(GetReferenceBytesTestProvider cfg) { - Assert.assertEquals(cfg.calculatedReferenceBytes(), cfg.expectedReferenceBytes(), "Reference sequences do not match"); - } -} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 1f7236c39..74f7929ee 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -57,14 +57,14 @@ import static org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCal public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends WalkerTest { private void HCTestComplexVariants(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4"; + final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering -R %s -I %s", REF, bam) + " -L 20:10028767-10028967 -L 20:10431524-10431924 -L 20:10723661-10724061 -L 20:10903555-10903955 --no_cmdline_in_header -o %s -minPruning 4"; final WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCallerComplexVariants: args=" + args, spec); } @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "12ed9d67139e7a94d67e9e6c06ac6e16"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "df7be117bd3d256c4a5fbde925ecd19b"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -88,12 +88,12 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "b7a01525c00d02b3373513a668a43c6a"); + "b787be740423b950f8529ccc838fabdd"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "a2a42055b068334f415efb07d6bb9acd"); + "8e6a2002c59eafb78bdbf1db9660164b"); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 8ab2c0779..c7b242249 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -63,12 +63,12 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "2b54e4e948144030a829175bcd295e47"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "ba1bb72caa06c1962a202b2012c266cb"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "a841d9e94fb832066a04f13bdc62b101"}); - tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "6cc95c47368a568fb9e1eb8578f96b0b"}); - tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "2703f1c0c27b3c977689604b5f78b61f"}); - tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "b54e36bbb4dc6c3b786349fa267d1f6c"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "55faaae5617857e2b29848367999aa3e"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "e32b7fc4de29ed141dcafc0d789d5ed6"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "ecac86e8ef4856e6dfa306c436e9b545"}); + tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "7cb1e431119df00ec243a6a115fa74b8"}); + tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "7828256b82df377cc3a26a55dbf68f91"}); + tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "e41e0acf172a994e938a150390badd39"}); return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 43095dcf3..007a5838c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -71,19 +71,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { final static String INTERVALS_FILE = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.test.intervals"; private void HCTest(String bam, String args, String md5) { - final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; + final String base = String.format("-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering -R %s -I %s -L %s", REF, bam, INTERVALS_FILE) + " --no_cmdline_in_header -o %s -minPruning 3"; final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); executeTest("testHaplotypeCaller: args=" + args, spec); } @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "baa5a2eedc8f06ce9f8f98411ee09f8a"); + HCTest(CEUTRIO_BAM, "", "c0b1b64c6005cd3640ffde5dbc10174b"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "f09e03d41238697b23f95716a12667cb"); + HCTest(NA12878_BAM, "", "439ce9024f04aad08eab1526d887e295"); } @Test(enabled = false) // can't annotate the rsID's yet @@ -94,7 +94,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "130d36448faeb7b8d4bce4be12dacd3a"); + "b09437f11db40abd49195110e50692c2"); } @Test @@ -110,7 +110,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "7c20aa62633f4ce8ebf12950fbf05ec0"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "c57c463542304fb7b2576e531faca89e"); } private void HCTestNearbySmallIntervals(String bam, String args, String md5) { @@ -147,7 +147,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerNearbySmallIntervals() { - HCTestNearbySmallIntervals(NA12878_BAM, "", "0ddc56f0a0fbcfefda79aa20b2ecf603"); + HCTestNearbySmallIntervals(NA12878_BAM, "", "75820a4558a559b3e1636fdd1b776ea2"); } // This problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -157,7 +157,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller --disableDithering -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("0689d2c202849fd05617648eaf429b9a")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("976463812534ac164a64c5d0c3ec988a")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @@ -185,16 +185,16 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("5fe9310addf881bed4fde2354e59ce34")); + "-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, + Arrays.asList("277aa95b01fa4d4e0086a2fabf7f3d7e")); executeTest("HC calling on a ReducedRead BAM", spec); } @Test public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("26a9917f6707536636451266de0116c3")); + "-T HaplotypeCaller --contamination_fraction_to_filter 0.05 --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, + Arrays.asList("6a9222905c740b9208bf3c67478514eb")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } @@ -208,7 +208,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("c5c63d03e1c4bbe32f06902acd4a10f9")); + Arrays.asList("58a0089e6ebf7cee414adb7a6002d43f")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -217,7 +217,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-11,000,000 -D " + b37dbSNP132 + " -L " + hg19Intervals + " -isr INTERSECTION", 1, - Arrays.asList("f0b2a96040429908cce17327442eec29")); + Arrays.asList("1352cbe1404aefc94eb8e044539a9882")); executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index 3b17725f9..aabc41f36 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "e800f6bb3a820da5c6b29f0195480796"}); + tests.add(new Object[]{nct, "6f8c3cac54eb1460e2c65fe00978b1c1"}); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java index 2fda56665..9dcc18275 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngineUnitTest.java @@ -87,15 +87,6 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); } - private enum Assembler {DEBRUIJN_ASSEMBLER, READ_THREADING_ASSEMBLER} - private LocalAssemblyEngine createAssembler(final Assembler type) { - switch ( type ) { - case DEBRUIJN_ASSEMBLER: return new DeBruijnAssembler(); - case READ_THREADING_ASSEMBLER: return new ReadThreadingAssembler(); - default: throw new IllegalStateException("Unexpected " + type); - } - } - @DataProvider(name = "AssembleIntervalsData") public Object[][] makeAssembleIntervalsData() { List tests = new ArrayList(); @@ -107,12 +98,10 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { final int stepSize = 200; final int nReadsToUse = 5; - for ( final Assembler assembler : Assembler.values() ) { - for ( int startI = start; startI < end; startI += stepSize) { - final int endI = startI + windowSize; - final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, startI, endI); - tests.add(new Object[]{assembler, refLoc, nReadsToUse}); - } + for ( int startI = start; startI < end; startI += stepSize) { + final int endI = startI + windowSize; + final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, startI, endI); + tests.add(new Object[]{new ReadThreadingAssembler(), refLoc, nReadsToUse}); } return tests.toArray(new Object[][]{}); @@ -130,13 +119,11 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { final int variantStepSize = 1; final int nReadsToUse = 5; - for ( final Assembler assembler : Assembler.values() ) { - for ( int startI = start; startI < end; startI += stepSize) { - final int endI = startI + windowSize; - final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, startI, endI); - for ( int variantStart = windowSize / 2 - 10; variantStart < windowSize / 2 + 10; variantStart += variantStepSize ) { - tests.add(new Object[]{assembler, refLoc, nReadsToUse, variantStart}); - } + for ( int startI = start; startI < end; startI += stepSize) { + final int endI = startI + windowSize; + final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, startI, endI); + for ( int variantStart = windowSize / 2 - 10; variantStart < windowSize / 2 + 10; variantStart += variantStepSize ) { + tests.add(new Object[]{new ReadThreadingAssembler(), refLoc, nReadsToUse, variantStart}); } } @@ -144,7 +131,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { } @Test(dataProvider = "AssembleIntervalsData") - public void testAssembleRef(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse) { + public void testAssembleRef(final ReadThreadingAssembler assembler, final GenomeLoc loc, final int nReadsToUse) { final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); final List reads = new LinkedList(); @@ -163,7 +150,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { } @Test(dataProvider = "AssembleIntervalsWithVariantData") - public void testAssembleRefAndSNP(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { + public void testAssembleRefAndSNP(final ReadThreadingAssembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); final Allele refBase = Allele.create(refBases[variantSite], true); final Allele altBase = Allele.create((byte)(refBase.getBases()[0] == 'A' ? 'C' : 'A'), false); @@ -172,7 +159,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { } @Test(dataProvider = "AssembleIntervalsWithVariantData") - public void testAssembleRefAndDeletion(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { + public void testAssembleRefAndDeletion(final ReadThreadingAssembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); for ( int deletionLength = 1; deletionLength < 10; deletionLength++ ) { final Allele refBase = Allele.create(new String(refBases).substring(variantSite, variantSite + deletionLength + 1), true); @@ -183,7 +170,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { } @Test(dataProvider = "AssembleIntervalsWithVariantData") - public void testAssembleRefAndInsertion(final Assembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { + public void testAssembleRefAndInsertion(final ReadThreadingAssembler assembler, final GenomeLoc loc, final int nReadsToUse, final int variantSite) { final byte[] refBases = seq.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases(); for ( int insertionLength = 1; insertionLength < 10; insertionLength++ ) { final Allele refBase = Allele.create(refBases[variantSite], false); @@ -193,7 +180,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { } } - private void testAssemblyWithVariant(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final int nReadsToUse, final VariantContext site) { + private void testAssemblyWithVariant(final ReadThreadingAssembler assembler, final byte[] refBases, final GenomeLoc loc, final int nReadsToUse, final VariantContext site) { final String preRef = new String(refBases).substring(0, site.getStart()); final String postRef = new String(refBases).substring(site.getEnd() + 1, refBases.length); final byte[] altBases = (preRef + site.getAlternateAllele(0).getBaseString() + postRef).getBytes(); @@ -217,7 +204,7 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { } - private List assemble(final Assembler assembler, final byte[] refBases, final GenomeLoc loc, final List reads) { + private List assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final GenomeLoc loc, final List reads) { final Haplotype refHaplotype = new Haplotype(refBases, true); final Cigar c = new Cigar(); c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); @@ -225,9 +212,8 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { final ActiveRegion activeRegion = new ActiveRegion(loc, null, true, genomeLocParser, 0); activeRegion.addAll(reads); - final LocalAssemblyEngine engine = createAssembler(assembler); // logger.warn("Assembling " + activeRegion + " with " + engine); - return engine.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.emptyList(), null); + return assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.emptyList(), null); } @DataProvider(name = "SimpleAssemblyTestData") @@ -239,30 +225,25 @@ public class LocalAssemblyEngineUnitTest extends BaseTest { final int windowSize = 200; final int end = start + windowSize; - final Map edgeExcludesByAssembler = new EnumMap<>(Assembler.class); - edgeExcludesByAssembler.put(Assembler.DEBRUIJN_ASSEMBLER, 26); - edgeExcludesByAssembler.put(Assembler.READ_THREADING_ASSEMBLER, 25); // TODO -- decrease to zero when the edge calling problem is fixed + final int excludeVariantsWithinXbp = 25; // TODO -- decrease to zero when the edge calling problem is fixed final String ref = new String(seq.getSubsequenceAt(contig, start, end).getBases()); final GenomeLoc refLoc = genomeLocParser.createGenomeLoc(contig, start, end); - for ( final Assembler assembler : Assembler.values() ) { - final int excludeVariantsWithXbp = edgeExcludesByAssembler.get(assembler); for ( int snpPos = 0; snpPos < windowSize; snpPos++) { - if ( snpPos > excludeVariantsWithXbp && (windowSize - snpPos) >= excludeVariantsWithXbp ) { + if ( snpPos > excludeVariantsWithinXbp && (windowSize - snpPos) >= excludeVariantsWithinXbp ) { final byte[] altBases = ref.getBytes(); altBases[snpPos] = altBases[snpPos] == 'A' ? (byte)'C' : (byte)'A'; final String alt = new String(altBases); - tests.add(new Object[]{"SNP at " + snpPos, assembler, refLoc, ref, alt}); + tests.add(new Object[]{"SNP at " + snpPos, new ReadThreadingAssembler(), refLoc, ref, alt}); } } - } return tests.toArray(new Object[][]{}); } @Test(dataProvider = "SimpleAssemblyTestData") - public void testSimpleAssembly(final String name, final Assembler assembler, final GenomeLoc loc, final String ref, final String alt) { + public void testSimpleAssembly(final String name, final ReadThreadingAssembler assembler, final GenomeLoc loc, final String ref, final String alt) { final byte[] refBases = ref.getBytes(); final byte[] altBases = alt.getBytes(); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java index f9cbc6c73..1c539e404 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraphUnitTest.java @@ -49,7 +49,6 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import org.broadinstitute.sting.BaseTest; import org.testng.Assert; import org.testng.annotations.BeforeMethod; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -231,7 +230,7 @@ public class BaseGraphUnitTest extends BaseTest { final File tmp = File.createTempFile("tmp", "dot"); tmp.deleteOnExit(); new SeqGraph().printGraph(tmp, 10); - new DeBruijnGraph().printGraph(tmp, 10); + new TestGraph().printGraph(tmp, 10); } @Test @@ -248,71 +247,6 @@ public class BaseGraphUnitTest extends BaseTest { Assert.assertEquals(actualSet, expectedSet); } - @Test(enabled = true) - public void testPruneGraph() { - DeBruijnGraph graph = new DeBruijnGraph(); - DeBruijnGraph expectedGraph = new DeBruijnGraph(); - - DeBruijnVertex v = new DeBruijnVertex("ATGG"); - DeBruijnVertex v2 = new DeBruijnVertex("ATGGA"); - DeBruijnVertex v3 = new DeBruijnVertex("ATGGT"); - DeBruijnVertex v4 = new DeBruijnVertex("ATGGG"); - DeBruijnVertex v5 = new DeBruijnVertex("ATGGC"); - DeBruijnVertex v6 = new DeBruijnVertex("ATGGCCCCCC"); - - graph.addVertex(v); - graph.addVertex(v2); - graph.addVertex(v3); - graph.addVertex(v4); - graph.addVertex(v5); - graph.addVertex(v6); - graph.addEdge(v, v2, new BaseEdge(false, 1)); - graph.addEdge(v2, v3, new BaseEdge(false, 3)); - graph.addEdge(v3, v4, new BaseEdge(false, 5)); - graph.addEdge(v4, v5, new BaseEdge(false, 3)); - graph.addEdge(v5, v6, new BaseEdge(false, 2)); - - expectedGraph.addVertex(v2); - expectedGraph.addVertex(v3); - expectedGraph.addVertex(v4); - expectedGraph.addVertex(v5); - expectedGraph.addEdge(v2, v3, new BaseEdge(false, 3)); - expectedGraph.addEdge(v3, v4, new BaseEdge(false, 5)); - expectedGraph.addEdge(v4, v5, new BaseEdge(false, 3)); - - graph.pruneGraph(2); - - Assert.assertTrue(BaseGraph.graphEquals(graph, expectedGraph)); - - graph = new DeBruijnGraph(); - expectedGraph = new DeBruijnGraph(); - - graph.addVertex(v); - graph.addVertex(v2); - graph.addVertex(v3); - graph.addVertex(v4); - graph.addVertex(v5); - graph.addVertex(v6); - graph.addEdge(v, v2, new BaseEdge(true, 1)); - graph.addEdge(v2, v3, new BaseEdge(false, 3)); - graph.addEdge(v3, v4, new BaseEdge(false, 5)); - graph.addEdge(v4, v5, new BaseEdge(false, 3)); - - expectedGraph.addVertex(v); - expectedGraph.addVertex(v2); - expectedGraph.addVertex(v3); - expectedGraph.addVertex(v4); - expectedGraph.addVertex(v5); - expectedGraph.addEdge(v, v2, new BaseEdge(true, 1)); - expectedGraph.addEdge(v2, v3, new BaseEdge(false, 3)); - expectedGraph.addEdge(v3, v4, new BaseEdge(false, 5)); - expectedGraph.addEdge(v4, v5, new BaseEdge(false, 3)); - - graph.pruneGraph(2); - - Assert.assertTrue(BaseGraph.graphEquals(graph, expectedGraph)); - } - @Test(enabled = true) public void testGetBases() { @@ -324,7 +258,7 @@ public class BaseGraphUnitTest extends BaseTest { vertexes.add(new DeBruijnVertex(testString.substring(i, i + kmerSize))); } - final String result = new String(new DeBruijnGraph().getBasesForPath(vertexes)); + final String result = new String(new TestGraph().getBasesForPath(vertexes)); Assert.assertEquals(result, testString.substring(kmerSize - 1)); } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java index c72f426be..473df8fb0 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraphUnitTest.java @@ -72,7 +72,7 @@ public class SeqGraphUnitTest extends BaseTest { } public SeqGraph calcGraph() { - final DeBruijnGraph deBruijnGraph = new DeBruijnGraph(); + final TestGraph deBruijnGraph = new TestGraph(); final int kmersInSequence = sequence.length - KMER_LENGTH + 1; for (int i = 0; i < kmersInSequence - 1; i++) { // get the kmers diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java index ed91cccb3..1c59f9f7e 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraphUnitTest.java @@ -212,8 +212,8 @@ public class ReadThreadingGraphUnitTest extends BaseTest { tests.add(new Object[]{"CCAAAAAAAAAA", "AAAAAAAAAA", "1M2D10M", true, 10}); // deletion tests.add(new Object[]{"AAAAAAAA", "CAAAAAAA", "9M", true, 7}); // 1 snp tests.add(new Object[]{"AAAAAAAA", "CAAGATAA", "9M", true, 2}); // several snps - tests.add(new Object[]{"AAAAA", "C", "1M4D1M", true, -1}); // funky SW alignment - tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", true, 1}); // very little data + tests.add(new Object[]{"AAAAA", "C", "1M4D1M", false, -1}); // funky SW alignment + tests.add(new Object[]{"AAAAA", "CA", "1M3D2M", false, 1}); // very little data tests.add(new Object[]{"AAAAAAA", "CAAAAAC", "8M", true, -1}); // ends in mismatch tests.add(new Object[]{"AAAAAA", "CGAAAACGAA", "1M2I4M2I2M", false, 0}); // alignment is too complex @@ -253,7 +253,13 @@ public class ReadThreadingGraphUnitTest extends BaseTest { Assert.assertTrue(altSink != null, "We did not find a non-reference sink"); // confirm that the SW alignment agrees with our expectations - final ReadThreadingGraph.DanglingTailMergeResult result = rtgraph.generateCigarAgainstReferencePath(altSink); + final ReadThreadingGraph.DanglingTailMergeResult result = rtgraph.generateCigarAgainstReferencePath(altSink, 0); + + if ( result == null ) { + Assert.assertFalse(cigarIsGood); + return; + } + Assert.assertTrue(cigar.equals(result.cigar.toString()), "SW generated cigar = " + result.cigar.toString()); // confirm that the goodness of the cigar agrees with our expectations diff --git a/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java index f9a4985b0..337f23afe 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerIntegrationTest.java @@ -67,7 +67,7 @@ public class NanoSchedulerIntegrationTest extends WalkerTest { for ( final int nct : Arrays.asList(1, 2) ) { // tests.add(new Object[]{ "SNP", "a1c7546f32a8919a3f3a70a04b2e8322", nt, nct }); //// tests.add(new Object[]{ "INDEL", "0a6d2be79f4f8a4b0eb788cc4751b31b", nt, nct }); - tests.add(new Object[]{ "BOTH", "aad3a398273ec795e363268997247bd8", nt, nct }); + tests.add(new Object[]{ "BOTH", "a80925b58735828158491f77ae64998b", nt, nct }); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java index 2499183a6..7334eec3f 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/PairHMMUnitTest.java @@ -56,6 +56,7 @@ import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.Utils; import org.testng.Assert; +import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -68,11 +69,18 @@ public class PairHMMUnitTest extends BaseTest { private final static boolean ALLOW_READS_LONGER_THAN_HAPLOTYPE = true; private final static boolean DEBUG = false; final static boolean EXTENSIVE_TESTING = true; - final PairHMM exactHMM = new Log10PairHMM(true); // the log truth implementation - final PairHMM originalHMM = new Log10PairHMM(false); // the reference implementation - final PairHMM loglessHMM = new LoglessPairHMM(); + final N2MemoryPairHMM exactHMM = new Log10PairHMM(true); // the log truth implementation + final N2MemoryPairHMM originalHMM = new Log10PairHMM(false); // the reference implementation + final N2MemoryPairHMM loglessHMM = new LoglessPairHMM(); - private List getHMMs() { + @BeforeClass + public void initialize() { + exactHMM.doNotUseTristateCorrection(); + originalHMM.doNotUseTristateCorrection(); + loglessHMM.doNotUseTristateCorrection(); + } + + private List getHMMs() { return Arrays.asList(exactHMM, originalHMM, loglessHMM); } @@ -592,8 +600,13 @@ public class PairHMMUnitTest extends BaseTest { public Object[][] makeUninitializedHMMs() { List tests = new ArrayList(); - tests.add(new Object[]{new LoglessPairHMM()}); - tests.add(new Object[]{new Log10PairHMM(true)}); + final LoglessPairHMM myLoglessPairHMM = new LoglessPairHMM(); + myLoglessPairHMM.doNotUseTristateCorrection(); + tests.add(new Object[]{myLoglessPairHMM}); + + final Log10PairHMM myLog10PairHMM = new Log10PairHMM(true); + myLog10PairHMM.doNotUseTristateCorrection(); + tests.add(new Object[]{myLog10PairHMM}); return tests.toArray(new Object[][]{}); } diff --git a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java index 5d882ba8c..e88065c4a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/fragments/FragmentUtils.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.fragments; import com.google.java.contract.Ensures; import com.google.java.contract.Requires; +import net.sf.picard.util.QualityUtil; import net.sf.samtools.Cigar; import net.sf.samtools.CigarElement; import net.sf.samtools.CigarOperator; @@ -60,6 +61,11 @@ import java.util.*; * Time: 10:09 PM */ public final class FragmentUtils { + + public final static double DEFAULT_PCR_ERROR_RATE = 1e-4; + public final static int DEFAULT_PCR_ERROR_QUAL = QualityUtil.getPhredScoreFromErrorProbability(DEFAULT_PCR_ERROR_RATE); + public final static int HALF_OF_DEFAULT_PCR_ERROR_QUAL = DEFAULT_PCR_ERROR_QUAL / 2; + protected final static byte MIN_QUAL_BAD_OVERLAP = 16; private FragmentUtils() {} // private constructor @@ -189,6 +195,70 @@ public final class FragmentUtils { return create(reads, reads.size(), SamRecordGetter); } + public static void adjustQualsOfOverlappingPairedFragments( final List overlappingPair ) { + if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); } + + final GATKSAMRecord firstRead = overlappingPair.get(0); + final GATKSAMRecord secondRead = overlappingPair.get(1); + + if ( secondRead.getSoftStart() < firstRead.getSoftStart() ) { + adjustQualsOfOverlappingPairedFragments(secondRead, firstRead); + } else { + adjustQualsOfOverlappingPairedFragments(firstRead, secondRead); + } + } + + /** + * Merge two overlapping reads from the same fragment into a single super read, if possible + * + * firstRead and secondRead must be part of the same fragment (though this isn't checked). Looks + * at the bases and alignment, and tries its best to create a meaningful synthetic single super read + * that represents the entire sequenced fragment. + * + * Assumes that firstRead starts before secondRead (according to their soft clipped starts) + * + * @param clippedFirstRead the left most read + * @param clippedSecondRead the right most read + * + * @return a strandless merged read of first and second, or null if the algorithm cannot create a meaningful one + */ + public static void adjustQualsOfOverlappingPairedFragments(final GATKSAMRecord clippedFirstRead, final GATKSAMRecord clippedSecondRead) { + if ( clippedFirstRead == null ) throw new IllegalArgumentException("clippedFirstRead cannot be null"); + if ( clippedSecondRead == null ) throw new IllegalArgumentException("clippedSecondRead cannot be null"); + if ( ! clippedFirstRead.getReadName().equals(clippedSecondRead.getReadName()) ) throw new IllegalArgumentException("attempting to merge two reads with different names " + clippedFirstRead + " and " + clippedSecondRead); + + // don't adjust fragments that do not overlap + if ( clippedFirstRead.getAlignmentEnd() < clippedSecondRead.getAlignmentStart() || clippedFirstRead.getReferenceIndex() != clippedSecondRead.getReferenceIndex() ) + return; + + final Pair pair = ReadUtils.getReadCoordinateForReferenceCoordinate(clippedFirstRead, clippedSecondRead.getAlignmentStart()); + final int firstReadStop = ( pair.getSecond() ? pair.getFirst() + 1 : pair.getFirst() ); + final int numOverlappingBases = Math.min(clippedFirstRead.getReadLength() - firstReadStop, clippedSecondRead.getReadLength()); + + final byte[] firstReadBases = clippedFirstRead.getReadBases(); + final byte[] firstReadQuals = clippedFirstRead.getBaseQualities(); + final byte[] secondReadBases = clippedSecondRead.getReadBases(); + final byte[] secondReadQuals = clippedSecondRead.getBaseQualities(); + + for ( int i = 0; i < numOverlappingBases; i++ ) { + final int firstReadIndex = firstReadStop + i; + final byte firstReadBase = firstReadBases[firstReadIndex]; + final byte secondReadBase = secondReadBases[i]; + + if ( firstReadBase == secondReadBase ) { + firstReadQuals[firstReadIndex] = (byte) Math.min(firstReadQuals[firstReadIndex], HALF_OF_DEFAULT_PCR_ERROR_QUAL); + secondReadQuals[i] = (byte) Math.min(secondReadQuals[i], HALF_OF_DEFAULT_PCR_ERROR_QUAL); + } else { + // TODO -- use the proper statistical treatment of the quals from DiploidSNPGenotypeLikelihoods.java + firstReadQuals[firstReadIndex] = 0; + secondReadQuals[i] = 0; + } + } + + clippedFirstRead.setBaseQualities(firstReadQuals); + clippedSecondRead.setBaseQualities(secondReadQuals); + } + public static List mergeOverlappingPairedFragments( final List overlappingPair ) { if( overlappingPair.size() != 2 ) { throw new ReviewedStingException("Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2."); } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java index ddc1a4559..a75c9426c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/Log10PairHMM.java @@ -32,6 +32,8 @@ import org.broadinstitute.sting.utils.QualityUtils; import java.util.Arrays; +import static java.lang.Math.log10; + /** * Util class for performing the pair HMM for local alignment. Figure 4.3 in Durbin 1998 book. * @@ -51,6 +53,9 @@ public final class Log10PairHMM extends N2MemoryPairHMM { private static final int matchToDeletion = 4; private static final int deletionToDeletion = 5; + // we divide e by 3 because the observed base could have come from any of the non-observed alleles + protected final static double log10_3 = log10(3.0); + /** * Create an uninitialized PairHMM * @@ -148,7 +153,7 @@ public final class Log10PairHMM extends N2MemoryPairHMM { for (int j = startIndex; j < haplotypeBases.length; j++) { final byte y = haplotypeBases[j]; prior[i+1][j+1] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ? - QualityUtils.qualToProbLog10(qual) : QualityUtils.qualToErrorProbLog10(qual) ); + QualityUtils.qualToProbLog10(qual) : (QualityUtils.qualToErrorProbLog10(qual) - (doNotUseTristateCorrection ? 0.0 : log10_3)) ); } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java index a091a0716..1b277d3d8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/N2MemoryPairHMM.java @@ -44,6 +44,10 @@ abstract class N2MemoryPairHMM extends PairHMM { protected double[][] insertionMatrix = null; protected double[][] deletionMatrix = null; + // only used for debugging purposes + protected boolean doNotUseTristateCorrection = false; + protected void doNotUseTristateCorrection() { doNotUseTristateCorrection = true; } + /** * Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths * diff --git a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java index 1abf9f836..e730870c6 100644 --- a/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java +++ b/public/java/src/org/broadinstitute/sting/utils/smithwaterman/SWPairwiseAlignment.java @@ -69,10 +69,20 @@ public class SWPairwiseAlignment implements SmithWaterman { * Add softclips for the overhangs */ SOFTCLIP, + /* * Treat the overhangs as proper insertions/deletions */ INDEL, + + /* + * Treat the overhangs as proper insertions/deletions for leading (but not trailing) overhangs. + * This is useful e.g. when we want to merge dangling tails in an assembly graph: because we don't + * expect the dangling tail to reach the end of the reference path we are okay ignoring trailing + * deletions - but leading indels are still very much relevant. + */ + LEADING_INDEL, + /* * Just ignore the overhangs */ @@ -125,10 +135,11 @@ public class SWPairwiseAlignment implements SmithWaterman { * * @param seq1 the first sequence we want to align * @param seq2 the second sequence we want to align + * @param parameters the SW parameters to use * @param strategy the overhang strategy to use */ - public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2, final OVERHANG_STRATEGY strategy) { - this(SWParameterSet.ORIGINAL_DEFAULT.parameters); + public SWPairwiseAlignment(final byte[] seq1, final byte[] seq2, final SWParameterSet parameters, final OVERHANG_STRATEGY strategy) { + this(parameters.parameters); overhang_strategy = strategy; align(seq1, seq2); } @@ -226,7 +237,7 @@ public class SWPairwiseAlignment implements SmithWaterman { final int[] gap_size_h = new int[n+1]; // we need to initialize the SW matrix with gap penalties if we want to keep track of indels at the edges of alignments - if ( overhang_strategy == OVERHANG_STRATEGY.INDEL ) { + if ( overhang_strategy == OVERHANG_STRATEGY.INDEL || overhang_strategy == OVERHANG_STRATEGY.LEADING_INDEL ) { // initialize the first row sw[1] = parameters.w_open; double currentValue = parameters.w_open; @@ -371,7 +382,7 @@ public class SWPairwiseAlignment implements SmithWaterman { p1 = refLength; p2 = altLength; } else { - // look for largest score. we use >= combined with the traversal direction + // look for the largest score on the rightmost column. we use >= combined with the traversal direction // to ensure that if two scores are equal, the one closer to diagonal gets picked for ( int i = 1, data_offset = altLength+1+altLength ; i < refLength+1 ; i++, data_offset += (altLength+1) ) { // data_offset is the offset of [i][m] @@ -380,18 +391,21 @@ public class SWPairwiseAlignment implements SmithWaterman { } } - for ( int j = 1, data_offset = refLength*(altLength+1)+1 ; j < altLength+1 ; j++, data_offset++ ) { - // data_offset is the offset of [n][j] - if ( sw[data_offset] > maxscore || sw[data_offset] == maxscore && Math.abs(refLength-j) < Math.abs(p1 - p2)) { - p1 = refLength; - p2 = j ; - maxscore = sw[data_offset]; - segment_length = altLength - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment + // now look for a larger score on the bottom-most row + if ( overhang_strategy != OVERHANG_STRATEGY.LEADING_INDEL ) { + for ( int j = 1, data_offset = refLength*(altLength+1)+1 ; j < altLength+1 ; j++, data_offset++ ) { + // data_offset is the offset of [n][j] + if ( sw[data_offset] > maxscore || sw[data_offset] == maxscore && Math.abs(refLength-j) < Math.abs(p1 - p2)) { + p1 = refLength; + p2 = j ; + maxscore = sw[data_offset]; + segment_length = altLength - j ; // end of sequence 2 is overhanging; we will just record it as 'M' segment + } } } } - List lce = new ArrayList(5); + final List lce = new ArrayList(5); if ( segment_length > 0 && overhang_strategy == OVERHANG_STRATEGY.SOFTCLIP ) { lce.add(makeElement(State.CLIP, segment_length)); @@ -452,7 +466,7 @@ public class SWPairwiseAlignment implements SmithWaterman { } else if ( overhang_strategy == OVERHANG_STRATEGY.IGNORE ) { lce.add(makeElement(state, segment_length + p2)); alignment_offset = p1 - p2; - } else { // overhang_strategy == OVERHANG_STRATEGY.INDEL + } else { // overhang_strategy == OVERHANG_STRATEGY.INDEL || overhang_strategy == OVERHANG_STRATEGY.LEADING_INDEL // take care of the actual alignment lce.add(makeElement(state, segment_length)); diff --git a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java index e2e253d0f..93de7c9cf 100644 --- a/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/fragments/FragmentUtilsUnitTest.java @@ -224,7 +224,7 @@ public class FragmentUtilsUnitTest extends BaseTest { } @Test(enabled = !DEBUG, dataProvider = "MergeFragmentsTest") - public void testMergingTwoReads(final String name, final GATKSAMRecord read1, GATKSAMRecord read2, final GATKSAMRecord expectedMerged) { + public void testMergingTwoReads(final String name, final GATKSAMRecord read1, final GATKSAMRecord read2, final GATKSAMRecord expectedMerged) { final GATKSAMRecord actual = FragmentUtils.mergeOverlappingPairedFragments(read1, read2); if ( expectedMerged == null ) { @@ -349,4 +349,42 @@ public class FragmentUtilsUnitTest extends BaseTest { read.setReadGroup(new GATKSAMReadGroupRecord("foo")); return read; } + + + private static final byte highQuality = 30; + private static final byte overlappingQuality = 20; + + @DataProvider(name = "AdjustFragmentsTest") + public Object[][] createAdjustFragmentsTest() throws Exception { + List tests = new ArrayList(); + + final String leftFlank = "CCC"; + final String rightFlank = "AAA"; + final String allOverlappingBases = "ACGTACGTGGAACCTTAG"; + for ( int overlapSize = 1; overlapSize < allOverlappingBases.length(); overlapSize++ ) { + final String overlappingBases = allOverlappingBases.substring(0, overlapSize); + final byte[] overlappingBaseQuals = new byte[overlapSize]; + for ( int i = 0; i < overlapSize; i++ ) overlappingBaseQuals[i] = highQuality; + final GATKSAMRecord read1 = makeOverlappingRead(leftFlank, highQuality, overlappingBases, overlappingBaseQuals, "", highQuality, 1); + final GATKSAMRecord read2 = makeOverlappingRead("", highQuality, overlappingBases, overlappingBaseQuals, rightFlank, highQuality, leftFlank.length() + 1); + tests.add(new Object[]{read1, read2, overlapSize}); + } + + return tests.toArray(new Object[][]{}); + } + + @Test(enabled = !DEBUG, dataProvider = "AdjustFragmentsTest") + public void testAdjustingTwoReads(final GATKSAMRecord read1, final GATKSAMRecord read2, final int overlapSize) { + FragmentUtils.adjustQualsOfOverlappingPairedFragments(read1, read2); + + for ( int i = 0; i < read1.getReadLength() - overlapSize; i++ ) + Assert.assertEquals(read1.getBaseQualities()[i], highQuality); + for ( int i = read1.getReadLength() - overlapSize; i < read1.getReadLength(); i++ ) + Assert.assertEquals(read1.getBaseQualities()[i], overlappingQuality); + + for ( int i = 0; i < overlapSize; i++ ) + Assert.assertEquals(read2.getBaseQualities()[i], overlappingQuality); + for ( int i = overlapSize; i < read2.getReadLength(); i++ ) + Assert.assertEquals(read2.getBaseQualities()[i], highQuality); + } } From afcf7b96dbf94991ad19a7b76f54b5c530e4ea2f Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Wed, 10 Jul 2013 08:06:54 -0400 Subject: [PATCH 011/172] - Added per-sample AlleleBiasedDownsampling capability to HaplotypeCaller - Added integration test to show that providing a contamination value and providing same value via a file results in the same VCF - overrode default contamination value in test --- .../haplotypecaller/GenotypingEngine.java | 11 +- .../haplotypecaller/HaplotypeCaller.java | 14 +- .../BiasedDownsamplingIntegrationTest.java | 267 +++++------------- 3 files changed, 79 insertions(+), 213 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java index 04173b64f..82029b872 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java @@ -56,6 +56,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.DefaultHashMap; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.haplotype.EventMap; @@ -166,6 +167,8 @@ public class GenotypingEngine { // Walk along each position in the key set and create each event to be outputted final Set calledHaplotypes = new HashSet<>(); final List returnCalls = new ArrayList<>(); + final Map emptyDownSamplingMap = new DefaultHashMap<>(0.0); + for( final int loc : startPosKeySet ) { if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region final List eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype); @@ -197,13 +200,13 @@ public class GenotypingEngine { logger.info("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles()); } - final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION ); + final Map alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().getSampleContamination() ); final GenotypesContext genotypes = calculateGLsForThisEvent( alleleReadMap, mergedVC ); final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), mergedVC.isSNP() ? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL); if( call != null ) { final Map alleleReadMap_annotations = ( USE_FILTERED_READ_MAP_FOR_ANNOTATIONS ? alleleReadMap : - convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, 0.0 ) ); + convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, emptyDownSamplingMap ) ); final Map stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap_annotations, perSampleFilteredReadList, call ); VariantContext annotatedCall = annotationEngine.annotateContextForActiveRegion(tracker, stratifiedReadMap, call); @@ -406,7 +409,7 @@ public class GenotypingEngine { // BUGBUG: ugh, too complicated protected Map convertHaplotypeReadMapToAlleleReadMap( final Map haplotypeReadMap, final Map> alleleMapper, - final double downsamplingFraction ) { + final Map perSampleDownsamplingFraction ) { final Map alleleReadMap = new LinkedHashMap<>(); for( final Map.Entry haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample @@ -423,7 +426,7 @@ public class GenotypingEngine { perReadAlleleLikelihoodMap.add(readEntry.getKey(), alleleMapperEntry.getKey(), maxLikelihood); } } - perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction); // perform contamination downsampling + perReadAlleleLikelihoodMap.performPerAlleleDownsampling(perSampleDownsamplingFraction.get(haplotypeReadMapEntry.getKey())); // perform contamination downsampling alleleReadMap.put(haplotypeReadMapEntry.getKey(), perReadAlleleLikelihoodMap); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index f18e37480..d96450370 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -47,7 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import com.google.java.contract.Ensures; -import net.sf.samtools.*; +import net.sf.samtools.SAMFileWriter; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection; @@ -55,6 +55,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.downsampling.AlleleBiasedDownsamplingUtils; import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.downsampling.DownsamplingUtils; import org.broadinstitute.sting.gatk.filters.BadMateFilter; @@ -70,7 +71,10 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.gatk.walkers.genotyper.afcalc.AFCalcFactory; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.readthreading.ReadThreadingAssembler; -import org.broadinstitute.sting.utils.*; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.QualityUtils; +import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; @@ -552,14 +556,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In simpleUAC.exactCallsLog = null; UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); - // Currently, per-sample contamination level is only implemented for UG if( UAC.CONTAMINATION_FRACTION_FILE !=null) { - throw new UserException("Per-Sample contamination level not supported in Haplotype Caller at this point"); + UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger)); } - // when we do implement per-sample contamination for HC, this will probably be needed. - // UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, samples, logger)); - // initialize the output VCF header annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java index 77c9f96c9..fd1f0de8a 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/BiasedDownsamplingIntegrationTest.java @@ -48,49 +48,24 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.collections.Pair; +import org.junit.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.File; import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; import java.util.Random; public class BiasedDownsamplingIntegrationTest extends WalkerTest { - private final static String baseCommand1 = "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -minIndelFrac 0.0 --dbsnp " + b36dbSNP129; - private final static String baseCommand2 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:1,000,000-5,000,000"; - private final static String baseCommand3 = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000"; + private final static String baseCommandUG = "-T UnifiedGenotyper -R " + hg19Reference + " --no_cmdline_in_header -glm BOTH -L 20:4,000,000-5,000,000"; + private final static String baseCommandHC = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:4,000,000-5,000,000" + " --useFilteredReadsForAnnotations"; + private final String ArtificalBAMLocation = privateTestDir + "ArtificallyContaminatedBams/"; - // -------------------------------------------------------------------------------------------------------------- - // - // testing UnifiedGenotyper contamination down-sampling - // - // -------------------------------------------------------------------------------------------------------------- - - @Test(enabled = false) - public void testContaminationDownsamplingFlat() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1, - Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb")); - executeTest("test contamination_percentage_to_filter 0.20", spec); - } - - @Test(enabled = false) - public void testContaminationDownsamplingFlatAndPerSample() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_per_sample_file " + ArtificalBAMLocation + "NA12878.NA19240.contam.txt --contamination_fraction_to_filter 0.10", 1, - Arrays.asList("53395814dd6990448a01a294ccd69bd2")); - executeTest("test contamination_percentage_to_filter per-sample and .20 overall", spec); - } - - @Test(enabled = false) - public void testContaminationDownsamplingPerSampleOnly() { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand1 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contaminationFile " + ArtificalBAMLocation + "NA19240.contam.txt", 1, - Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb")); - executeTest("test contamination_percentage_to_filter per-sample", spec); - } - // -------------------------------------------------------------------------------------------------------------- // @@ -98,150 +73,49 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - @Test(enabled = false) + @Test private void testDefaultContamination() { final String bam1 = "NA11918.with.1.NA12842.reduced.bam"; final String bam2 = "NA12842.with.1.NA11918.reduced.bam"; WalkerTestSpec spec = new WalkerTestSpec( - baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s ", 1, - Arrays.asList("e2e5a8dd313f8d7e382e7d49dfac59a2")); - executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with default downsampling.", spec); + baseCommandUG + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination .05 ", 1, + Arrays.asList("b13612312ff991cf40ddc44255e76ecd")); + executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " with .05 downsampling.", spec); } - private void testFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1, - Arrays.asList(md5)); - executeTest("test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec); - } - - @Test(enabled = false) - public void testFlatContaminationCase1() { - testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "e2e5a8dd313f8d7e382e7d49dfac59a2"); - } - - @Test(enabled = false) - public void testFlatContaminationCase2() { - testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "549737002f98775fea8f46e7ea174dde"); - } - - @Test(enabled = false) - public void testFlatContaminationCase3() { - testFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "529d82c2a33fcc303a5dc55de2d56979"); - } - - @Test(enabled = false) - public void testFlatContaminationCase4() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.1, "b5689972fbb7d230a372ee5f0da1c6d7"); - } - - @Test(enabled = false) - public void testFlatContaminationCase5() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.2, "9dceee2e921b53fbc1ce137a7e0b7b74"); - } - - @Test(enabled = false) - public void testFlatContaminationCase6() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.2.NA11918.reduced.bam", 0.3, "d6a74061033503af80dcaea065bfa075"); - } - - @Test(enabled = false) - public void testFlatContaminationCase7() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "7d1b5efab58a1b8f9d99fcf5af82f15a"); - } - - @Test(enabled = false) - public void testFlatContaminationCase8() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "a7f8d5c79626aff59d7f426f79d8816e"); - } - - @Test(enabled = false) - public void testFlatContaminationCase9() { - testFlatContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.3, "fcf482398b7c908e3e2d1e4d5da6377b"); - } - - private void testPerSampleContamination(String bam1, String bam2, String persampleFile, final String md5) { - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand2 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contaminationFile " + persampleFile, 1, - Arrays.asList(md5)); - executeTest("test contamination on Artificial Contamination (per-sample) on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase1() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "e00278527a294833259e9e411728e395"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase2() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "a443e793f0b0e2ffce1b751634d706e2"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase3() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "e11d83a7815ce757afbcf7689568cb25"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase4() { - testPerSampleContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "615042eeeffe042bd1c86279d34f80b6"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase5() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.1.txt", "9bc99fc79ca34744bf26cb19ee4ef44d"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase6() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.2.txt", "143626fe5fce765d6c997a64f058a813"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase7() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.3.txt", "f2593674cef894eda4e0be9cf3158f57"); - } - - @Test(enabled = false) - public void testPerSampleContaminationCase8() { - testPerSampleContamination("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.4.txt", "fb7ce0740767ae3896b3e552026da1e4"); - } - - - private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling, final String md5) { - final String command = baseCommand3 + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; - - WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList(md5)); - final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); - - rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result - executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); - - spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList(md5)); - - rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result - executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); - - } // verify that inputing a file with an effectively flat contamination level is equivalent to handing in a flat contamination level - @Test(enabled = false) - public void testPerSampleEqualsFlatContaminationCase1() { - testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0, ""); + + @DataProvider(name="PerSampleEqualFlatContamBams") + public Object[][] makePerSampleEqualFlatContamBams() { + final List tests = new LinkedList(); + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0}) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15}) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3}) ; + + return tests.toArray(new Object[][]{}); } - @Test(enabled = false) - public void testPerSampleEqualsFlatContaminationCase2() { - testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15, ""); - } + @Test(dataProvider = "PerSampleEqualFlatContamBams") + private void testPerSampleEqualsFlat(final String bam1, final String bam2, final String persampleFile, final Double downsampling) { + final String command = baseCommandUG + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; - @Test(enabled = false) - public void testPerSampleEqualsFlatContaminationCase3() { - testPerSampleEqualsFlat("NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3, ""); - } + WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList("")); + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + Pair, List> test1 = executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); + + spec = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList("")); + + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + Pair, List> test2 = executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); + + //verify that the md5s match up. + Assert.assertEquals(test1.getSecond().get(0),test2.getSecond().get(0)); + } // -------------------------------------------------------------------------------------------------------------- // @@ -250,50 +124,39 @@ public class BiasedDownsamplingIntegrationTest extends WalkerTest { // -------------------------------------------------------------------------------------------------------------- - @Test(enabled = false) - public void testHCContaminationDownsamplingFlat() { - final String baseCommand = "-T HaplotypeCaller -R " + b36KGReference + " --no_cmdline_in_header --dbsnp " + b36dbSNP129; - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -contamination 0.20", 1, - Arrays.asList("c3a253467ead7b1cfe9fd9dd310828b1")); - executeTest("HC calling with contamination_percentage_to_filter 0.20", spec); - } - // HaplotypeCaller can only (currently) use flat contamination reduction, not per-sample. Until that is implemented, this test - @Test(enabled = false) - public void testHCCannotProcessPerSampleContamination() { - final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000"; - final String bam1 = "NA11918.with.1.NA12842.reduced.bam"; - final String perSampleFile = ArtificalBAMLocation + "contamination.case.1.txt"; - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -o %s -contaminationFile " + perSampleFile, 1, - UserException.class); - executeTest("HC should fail on per-Sample contamination removal.", spec); + @DataProvider(name="PerSampleEqualFlatContamBamsHC") + public Object[][] makePerSampleEqualFlatContamBamsHC() { + final List tests = new LinkedList(); + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.6.txt", 0.0 }) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.7.txt", 0.15}) ; + tests.add(new Object[]{"NA11918.with.2.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", ArtificalBAMLocation + "contamination.case.8.txt", 0.3}) ; + + return tests.toArray(new Object[][]{}); } - private void testHCFlatContamination(final String bam1, final String bam2, final Double downsampling, final String md5) { - final String baseCommand = "-T HaplotypeCaller -R " + hg19Reference + " --no_cmdline_in_header -L 20:3,000,000-5,000,000"; + @Test(dataProvider = "PerSampleEqualFlatContamBamsHC") + private void testPerSampleEqualsFlatHC(final String bam1, final String bam2, final String persampleFile, final Double downsampling) { + final String command = baseCommandHC + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s "; + + WalkerTestSpec spec = new WalkerTestSpec( command +" -contaminationFile " + persampleFile, 1, Arrays.asList("")); + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + + Pair, List> test1= executeTest("test contamination on Artificial Contamination, with per-sample file on " + bam1 + " and " + bam2 + " with " + persampleFile, spec); + + WalkerTestSpec spec2 = new WalkerTestSpec(command + "-contamination " + downsampling.toString(), 1, Arrays.asList("")); + + rnd.setSeed(123451); // so that the two test cases have a hope of giving the same result + Pair, List> test2=executeTest("test contamination on Artificial Contamination, with flat contamination on " + bam1 + " and " + bam2 + " with " + downsampling.toString(), spec); + + //verify that the md5s match up. + Assert.assertEquals(test1.getSecond().get(0),test2.getSecond().get(0)); - WalkerTestSpec spec = new WalkerTestSpec( - baseCommand + " -I " + ArtificalBAMLocation + bam1 + " -I " + ArtificalBAMLocation + bam2 + " -o %s -contamination " + downsampling.toString(), 1, - Arrays.asList(md5)); - executeTest("HC test contamination on Artificial Contamination (flat) on " + bam1 + " and " + bam2 + " downsampling " + downsampling.toString(), spec); } - @Test(enabled = false) - public void testHCFlatContaminationCase1() { - testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.05, "c3e695381d8627e3922d8c642b66c3ce"); - } - @Test(enabled = false) - public void testHCFlatContaminationCase2() { - testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.1, "002d2b45336d88d7c04e19f9f26e29d9"); - } - @Test(enabled = false) - public void testHCFlatContaminationCase3() { - testHCFlatContamination("NA11918.with.1.NA12842.reduced.bam", "NA12842.with.1.NA11918.reduced.bam", 0.2, "1809a33ac112d1a3bd7a071c566794dd"); - } - -} +} \ No newline at end of file From 8c076143212c03e3c41d1755680ce1db89827681 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 10 Jul 2013 16:16:28 -0400 Subject: [PATCH 012/172] QualifyMissingIntervals: support different formats Problem ------- Qualify Missing Intervals only accepted GATK formatted interval files for it's coding sequence and bait parameters. Solution ------- There is no reason for such limitation, I erased all the code that did the parsing and used IntervalUtils to parse it (therefore, now it handles any type of interval file that the GATK can handle). ps: Also added an average depth column to the output --- .../walkers/diagnostics/missing/Metrics.java | 1 + .../missing/QualifyMissingIntervals.java | 48 +++++-------------- 2 files changed, 12 insertions(+), 37 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java index 5e3da5f4f..9296cc89b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/Metrics.java @@ -91,6 +91,7 @@ final class Metrics { double gccontent() {return refs > 0 ? gccontent/refs : 0.0;} double baseQual() {return reads > 0 ? baseQual/reads : 0.0;} double mapQual() {return reads > 0 ? mapQual/reads : 0.0;} + double depth() {return refs > 0 ? (double) reads/refs : 0.0;} /** * Combines two metrics diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index d0db3ef98..609f11f97 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -61,14 +61,11 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.HelpConstants; +import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; -import org.broadinstitute.sting.utils.text.XReadLines; -import java.io.File; -import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.List; @@ -115,10 +112,10 @@ public final class QualifyMissingIntervals extends LocusWalker protected PrintStream out; @Argument(shortName = "targets", required = true) - public File targetsFile; + public String targetsFile; @Argument(shortName = "cds", required = false) - public File cdsFile = null; + public String cdsFile = null; GATKReport simpleReport; GenomeLocSortedSet target; @@ -129,13 +126,14 @@ public final class QualifyMissingIntervals extends LocusWalker } public void initialize() { - simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "TP", "CD", "LN"); + // if cds file is not provided, just use the targets file (no harm done) + if (cdsFile == null) + cdsFile = targetsFile; + + simpleReport = GATKReport.newSimpleReport("QualifyMissingIntervals", "IN", "GC", "BQ", "MQ", "DP", "TP", "CD", "LN"); final GenomeLocParser parser = getToolkit().getGenomeLocParser(); - target = new GenomeLocSortedSet(parser); - cds = new GenomeLocSortedSet(parser); - parseFile(targetsFile, target, parser); - if (cdsFile != null) - parseFile(cdsFile, cds, parser); + target = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, targetsFile)); + cds = new GenomeLocSortedSet(parser, IntervalUtils.intervalFileToList(parser, cdsFile)); } public Metrics reduceInit() { @@ -183,6 +181,7 @@ public final class QualifyMissingIntervals extends LocusWalker metrics.gccontent(), metrics.baseQual(), metrics.mapQual(), + metrics.depth(), getPositionInTarget(interval), cds.overlaps(interval), interval.size() @@ -192,31 +191,6 @@ public final class QualifyMissingIntervals extends LocusWalker out.close(); } - private static GenomeLoc parseInterval(String s, GenomeLocParser parser) { - if (s.isEmpty()) { - return null; - } - String[] first = s.split(":"); - if (first.length == 2) { - String[] second = first[1].split("\\-"); - return parser.createGenomeLoc(first[0], Integer.decode(second[0]), Integer.decode(second[1])); - } else { - throw new UserException.BadInput("Interval doesn't parse correctly: " + s); - } - } - - private void parseFile(File file, GenomeLocSortedSet set, GenomeLocParser parser) { - try { - for (String s : new XReadLines(file) ) { - GenomeLoc interval = parseInterval(s, parser); - if (interval != null) - set.add(interval, true); - } - } catch (FileNotFoundException e) { - e.printStackTrace(); - } - } - private int getPositionInTarget(GenomeLoc interval) { final List hits = target.getOverlapping(interval); int result = 0; From 5d198d340021c87e17520b76c1adb2f989040494 Mon Sep 17 00:00:00 2001 From: Scott Thibault Date: Mon, 15 Jul 2013 10:16:39 -0500 Subject: [PATCH 014/172] Added write to likelihoods.txt for batch hmm --- .../LikelihoodCalculationEngine.java | 38 ++++++++++++++++--- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index e6fa64e52..e44aedd66 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -290,12 +290,39 @@ public class LikelihoodCalculationEngine { final double[] likelihoods = batchPairHMM.batchGetResult(); for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { final Haplotype haplotype = haplotypes.get(jjj); - if ( haplotype.isNonReference() ) - bestNonReflog10L = Math.max(bestNonReflog10L, likelihoods[jjj]); - else - refLog10l = likelihoods[jjj]; + final double log10l = likelihoods[jjj]; - perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), likelihoods[jjj]); + if ( WRITE_LIKELIHOODS_TO_FILE ) { + final byte[] overallGCP = new byte[read.getReadLength()]; + Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? + // NOTE -- must clone anything that gets modified here so we don't screw up future uses of the read + final byte[] readQuals = read.getBaseQualities().clone(); + final byte[] readInsQuals = read.getBaseInsertionQualities(); + final byte[] readDelQuals = read.getBaseDeletionQualities(); + for( int kkk = 0; kkk < readQuals.length; kkk++ ) { + readQuals[kkk] = (byte) Math.min( 0xff & readQuals[kkk], read.getMappingQuality()); // cap base quality by mapping quality, as in UG + //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated + //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated + // TODO -- why is Q18 hard-coded here??? + readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); + } + likelihoodsStream.printf("%s %s %s %s %s %s %f%n", + haplotype.getBaseString(), + new String(read.getReadBases()), + SAMUtils.phredToFastq(readQuals), + SAMUtils.phredToFastq(readInsQuals), + SAMUtils.phredToFastq(readDelQuals), + SAMUtils.phredToFastq(overallGCP), + log10l); + } + + if ( haplotype.isNonReference() ) + bestNonReflog10L = Math.max(bestNonReflog10L, log10l); + else + refLog10l = log10l; + + + perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); } final double worstRefLog10Allowed = bestNonReflog10L + log10globalReadMismappingRate; @@ -304,6 +331,7 @@ public class LikelihoodCalculationEngine { } } } + return perReadAlleleLikelihoodMap; } From c15751e41eecc9bdf7d2d4da4f39b95870ae27e8 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 15 Jul 2013 13:28:07 -0400 Subject: [PATCH 016/172] SAMReaderID: fix bug with hash code and equals() method -Two SAMReaderIDs that pointed at the same underlying bam file through a relative vs. an absolute path were not being treated as equal, and had different hash codes. This was causing problems in the engine, since SAMReaderIDs are often used as the keys of HashMaps. -Fix: explicitly use the absolute path to the encapsulated bam file in hashCode() and equals() -Added tests to ensure this doesn't break again --- .../sting/gatk/GenomeAnalysisEngine.java | 4 +- .../gatk/datasources/reads/SAMReaderID.java | 4 +- .../gatk/GenomeAnalysisEngineUnitTest.java | 18 ++++++- .../reads/SAMReaderIDUnitTest.java | 49 +++++++++++++++++++ 4 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c4f1a286d..f01e8ad62 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -530,8 +530,8 @@ public class GenomeAnalysisEngine { } if ( duplicateSamFiles.size() > 0 ) { - throw new ArgumentException("The following BAM files appear multiple times in the list of input files: " + - duplicateSamFiles + " BAM files may be specified at most once."); + throw new UserException("The following BAM files appear multiple times in the list of input files: " + + duplicateSamFiles + " BAM files may be specified at most once."); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java index 7efab5fb0..72c037707 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderID.java @@ -98,7 +98,7 @@ public class SAMReaderID implements Comparable { if(!(other instanceof SAMReaderID)) return false; SAMReaderID otherID = (SAMReaderID)other; - return this.samFile.equals(otherID.samFile); + return this.getSamFilePath().equals(otherID.getSamFilePath()); } /** @@ -107,7 +107,7 @@ public class SAMReaderID implements Comparable { */ @Override public int hashCode() { - return samFile.hashCode(); + return samFile.getAbsolutePath().hashCode(); } /** diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index 3f74e0eae..c3bc78551 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -52,7 +52,7 @@ import java.util.List; */ public class GenomeAnalysisEngineUnitTest extends BaseTest { - @Test(expectedExceptions=ArgumentException.class) + @Test(expectedExceptions=UserException.class) public void testDuplicateSamFileHandlingSingleDuplicate() throws Exception { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); @@ -64,7 +64,7 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.checkForDuplicateSamFiles(); } - @Test(expectedExceptions=ArgumentException.class) + @Test(expectedExceptions=UserException.class) public void testDuplicateSamFileHandlingMultipleDuplicates() throws Exception { GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); @@ -78,6 +78,20 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.checkForDuplicateSamFiles(); } + @Test(expectedExceptions=UserException.class) + public void testDuplicateSamFileHandlingAbsoluteVsRelativePath() { + GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); + + final File relativePathToBAMFile = new File("public/testdata/exampleBAM.bam"); + final File absolutePathToBAMFile = new File(relativePathToBAMFile.getAbsolutePath()); + Collection samFiles = new ArrayList(); + samFiles.add(new SAMReaderID(relativePathToBAMFile, new Tags())); + samFiles.add(new SAMReaderID(absolutePathToBAMFile, new Tags())); + + testEngine.setSAMFileIDs(samFiles); + testEngine.checkForDuplicateSamFiles(); + } + @Test public void testEmptyIntervalSetHandling() throws Exception { GenomeLocParser genomeLocParser = new GenomeLocParser(ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000).getSequenceDictionary()); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java new file mode 100644 index 000000000..a594573e5 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderIDUnitTest.java @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.datasources.reads; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.commandline.Tags; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + +public class SAMReaderIDUnitTest extends BaseTest { + + @Test + public void testSAMReaderIDHashingAndEquality() { + // Test to make sure that two SAMReaderIDs that point at the same file via an absolute vs. relative + // path are equal according to equals() and have the same hash code + final File relativePathToBAMFile = new File("public/testdata/exampleBAM.bam"); + final File absolutePathToBAMFile = new File(relativePathToBAMFile.getAbsolutePath()); + final SAMReaderID relativePathSAMReaderID = new SAMReaderID(relativePathToBAMFile, new Tags()); + final SAMReaderID absolutePathSAMReaderID = new SAMReaderID(absolutePathToBAMFile, new Tags()); + + Assert.assertEquals(relativePathSAMReaderID, absolutePathSAMReaderID, "Absolute-path and relative-path SAMReaderIDs not equal according to equals()"); + Assert.assertEquals(relativePathSAMReaderID.hashCode(), absolutePathSAMReaderID.hashCode(), "Absolute-path and relative-path SAMReaderIDs have different hash codes"); + } +} From 9dd109b79ad95fb863e419d0d6f6634a13240a15 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 16 Jul 2013 11:14:48 -0400 Subject: [PATCH 019/172] Last feature request from Reich/Paavo labs: the allSitePLs feature in UG worked but not quite filled requirements. What's needed is the ability to have all 10 PLs for EVERY site, regardless of whether they are variant or not. Previous version only emitted the 10 PLs in reference sites. Problem is that, if all PLs are emitted in all sites and every single site is quad-allelic (only way to have the PLs printed out in a valid way) then the ability to filter variants and to use the INFO fields may be compromised. So, compromise solution is to go back to having biallelic PLs but emit a new FORMAT field, called APL, which has the 10 values, but all other statistics and regular PLs are computed as before. Note that integration test had to be disabled, as the BCF2 codec apparently doesn't support writing into genotype fields other than PL,DP,AD,GQ,FT and GT. --- .../SNPGenotypeLikelihoodsCalculationModel.java | 9 ++------- .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java | 3 +++ .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 1 + .../genotyper/UnifiedGenotyperIntegrationTest.java | 5 ++++- 4 files changed, 10 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 360f88e51..f94baf09f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -147,13 +147,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC // if we only want variants, then we don't need to calculate genotype likelihoods if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) return builder.make(); - // if user requires all PLs at all sites, add all possible alt alleles - else if (UAC.annotateAllSitesWithPLs) { - for ( final byte base : BaseUtils.BASES ) { - if ( base != refBase ) - alleles.add(Allele.create(base)); - } - } else // otherwise, choose any alternate allele (it doesn't really matter) @@ -199,6 +192,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(myLikelihoods, false, true); gb.PL(genotypeLikelihoods); gb.DP(sampleData.depth); + if (UAC.annotateAllSitesWithPLs) + gb.attribute(UnifiedGenotyperEngine.PL_FOR_ALL_SNP_ALLELES_KEY,GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(allLikelihoods, false, true))); genotypes.add(gb.make()); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 54fcad1df..82b93aa55 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -318,6 +318,9 @@ public class UnifiedGenotyper extends LocusWalker, Unif headerInfo.add(new VCFInfoHeaderLine(VCFConstants.REFSAMPLE_DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Total reference sample depth")); } + if (UAC.annotateAllSitesWithPLs) { + headerInfo.add(new VCFFormatHeaderLine(UnifiedGenotyperEngine.PL_FOR_ALL_SNP_ALLELES_KEY, 10, VCFHeaderLineType.Integer, "Phred-scaled genotype likelihoods for all 4 possible bases regardless of whether there is statistical evidence for them. Ordering is always PL for AA AC CC GA GC GG TA TC TG TT.")); + } VCFStandardHeaderLines.addStandardInfoLines(headerInfo, true, VCFConstants.DOWNSAMPLED_KEY, VCFConstants.MLE_ALLELE_COUNT_KEY, diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 9f3368cf8..ec31e1f2f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -79,6 +79,7 @@ public class UnifiedGenotyperEngine { private static final String GPSTRING = "GENERALPLOIDY"; public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA"; + public static final String PL_FOR_ALL_SNP_ALLELES_KEY = "APL"; public static final double HUMAN_SNP_HETEROZYGOSITY = 1e-3; public static final double HUMAN_INDEL_HETEROZYGOSITY = 1e-4; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index ded8189b3..d64ceb953 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -164,7 +164,10 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void emitPLsAtAllSites() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --output_mode EMIT_ALL_SITES -allSitePLs", 1, - Arrays.asList("7cc55db8693759e059a05bc4398f6f69")); + Arrays.asList("552aced1b1ef7e4a554223f4719f9560")); + // GDA: TODO: BCF encoder/decoder doesn't seem to support non-standard values in genotype fields. IE even if there is a field defined in FORMAT and in the header the BCF2 encoder will still fail + spec1.disableShadowBCF(); + executeTest("test all site PLs 1", spec1); } From ba531bd5e6a32db7197b5c22205ffe46410d9942 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 24 Jun 2013 14:06:21 -0400 Subject: [PATCH 020/172] Fixing the 'header is negative' problem in Reduce Reads... again. Previous fixes and tests only covered trailing soft-clips. Now that up front hard-clipping is working properly though, we were failing on those in the tool. Added a patch for this as well as a separate test independent of the soft-clips to make sure that it's working properly. --- .../reducereads/SlidingWindowUnitTest.java | 17 +++++++++++++++-- .../sting/utils/sam/ReadUtils.java | 6 +++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index c9bb2f084..bf45fc298 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -94,7 +94,7 @@ public class SlidingWindowUnitTest extends BaseTest { ////////////////////////////////////////////////////////////////////////////////////// @Test(enabled = true) - public void testLeadingClipThenInsertion() { + public void testLeadingSoftClipThenInsertion() { final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 10); read.setReadBases(Utils.dupBytes((byte) 'A', 10)); @@ -104,8 +104,21 @@ public class SlidingWindowUnitTest extends BaseTest { final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 1); slidingWindow.addRead(read); - Pair, CompressionStash> result = slidingWindow.close(null); + slidingWindow.close(null); + } + @Test(enabled = true) + public void testLeadingHardClipThenInsertion() { + + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 1, 8); + read.setReadBases(Utils.dupBytes((byte) 'A', 8)); + read.setBaseQualities(Utils.dupBytes((byte)30, 8)); + read.setMappingQuality(30); + read.setCigarString("2H2I6M"); + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + slidingWindow.addRead(read); + slidingWindow.close(null); } ////////////////////////////////////////////////////////////////////////////////////// diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java index cf1c9cb8e..f9393cc4b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ReadUtils.java @@ -613,15 +613,15 @@ public class ReadUtils { * Checks if a read starts with an insertion. * * @param cigarForRead the CIGAR to evaluate - * @param ignoreClipOps should we ignore S and H operators when evaluating whether an I operator is at the beginning? + * @param ignoreSoftClipOps should we ignore S operators when evaluating whether an I operator is at the beginning? Note that H operators are always ignored. * @return the element if it's a leading insertion or null otherwise */ - public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreClipOps) { + public static CigarElement readStartsWithInsertion(final Cigar cigarForRead, final boolean ignoreSoftClipOps) { for ( final CigarElement cigarElement : cigarForRead.getCigarElements() ) { if ( cigarElement.getOperator() == CigarOperator.INSERTION ) return cigarElement; - else if ( !ignoreClipOps || (cigarElement.getOperator() != CigarOperator.HARD_CLIP && cigarElement.getOperator() != CigarOperator.SOFT_CLIP) ) + else if ( cigarElement.getOperator() != CigarOperator.HARD_CLIP && ( !ignoreSoftClipOps || cigarElement.getOperator() != CigarOperator.SOFT_CLIP) ) break; } return null; From 605a5ac2e329055c974ad3ee6517ed5c68614379 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Wed, 3 Jul 2013 12:41:01 -0400 Subject: [PATCH 022/172] GATK engine: add ability to do on-the-fly BAM file sample renaming at runtime -User must provide a mapping file via new --sample_rename_mapping_file argument. Mapping file must contain a mapping from absolute bam file path to new sample name (format is described in the docs for the argument). -Requires that each bam file listed in the mapping file contain only one sample in their headers (they may contain multiple read groups for that sample, however). The engine enforces this, and throws a UserException if on-the-fly renaming is requested for a multi-sample bam. -Not all bam files for a traversal need to be listed in the mapping file. -On-the-fly renaming is done as the VERY first step after creating the SAMFileReaders in SAMDataSource (before the headers are even merged), to prevent possible consistency issues. -Renaming is done ONCE at traversal start for each SAMReaders resource creation in the SAMResourcePool; this effectively means once per -nt thread -Comprehensive unit/integration tests Known issues: -if you specify the absolute path to a bam in the mapping file, and then provide a path to that same bam to -I using SYMLINKS, the renaming won't work. The absolute paths will look different to the engine due to the symlink being present in one path and not in the other path. GSA-974 #resolve --- .../sting/gatk/GenomeAnalysisEngine.java | 62 ++++- .../arguments/GATKArgumentCollection.java | 9 + .../gatk/datasources/reads/SAMDataSource.java | 83 +++++- .../gatk/EngineFeaturesIntegrationTest.java | 244 +++++++++++++++++- .../gatk/GenomeAnalysisEngineUnitTest.java | 65 ++++- .../sting/gatk/ReadMetricsUnitTest.java | 8 +- .../reads/SAMDataSourceUnitTest.java | 6 +- .../TraverseActiveRegionsUnitTest.java | 2 +- 8 files changed, 459 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index f01e8ad62..27b030060 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -62,9 +62,11 @@ import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.recalibration.BQSRArgumentSet; +import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor; import java.io.File; +import java.io.FileNotFoundException; import java.util.*; import java.util.concurrent.TimeUnit; @@ -854,6 +856,10 @@ public class GenomeAnalysisEngine { final boolean keepReadsInLIBS = walker instanceof ActiveRegionWalker; + final Map sampleRenameMap = argCollection.sampleRenameMappingFile != null ? + loadSampleRenameMap(argCollection.sampleRenameMappingFile) : + null; + return new SAMDataSource( samReaderIDs, threadAllocation, @@ -869,9 +875,63 @@ public class GenomeAnalysisEngine { includeReadsWithDeletionAtLoci(), argCollection.defaultBaseQualities, removeProgramRecords, - keepReadsInLIBS); + keepReadsInLIBS, + sampleRenameMap); } + /** + * Loads a user-provided sample rename map file for use in on-the-fly sample renaming into an in-memory + * HashMap. This file must consist of lines with two whitespace-separated fields: + * + * absolute_path_to_bam_file new_sample_name + * + * The engine will verify that each bam file contains reads from only one sample when the on-the-fly sample + * renaming feature is being used. + * + * @param sampleRenameMapFile sample rename map file from which to load data + * @return a HashMap containing the contents of the map file, with the keys being the bam file paths and + * the values being the new sample names. + */ + protected Map loadSampleRenameMap( final File sampleRenameMapFile ) { + logger.info("Renaming samples from BAM files on-the-fly using mapping file " + sampleRenameMapFile.getAbsolutePath()); + + final Map sampleRenameMap = new HashMap<>((int)sampleRenameMapFile.length() / 50); + + try { + for ( final String line : new XReadLines(sampleRenameMapFile) ) { + final String[] tokens = line.split("\\s+"); + + if ( tokens.length != 2 ) { + throw new UserException.MalformedFile(sampleRenameMapFile, + String.format("Encountered a line with %s fields instead of the required 2 fields. Line was: %s", + tokens.length, line)); + } + + final File bamFile = new File(tokens[0]); + final String newSampleName = tokens[1]; + + if ( ! bamFile.isAbsolute() ) { + throw new UserException.MalformedFile(sampleRenameMapFile, "Bam file path not absolute at line: " + line); + } + + final SAMReaderID bamID = new SAMReaderID(bamFile, new Tags()); + + if ( sampleRenameMap.containsKey(bamID) ) { + throw new UserException.MalformedFile(sampleRenameMapFile, + String.format("Bam file %s appears more than once", bamFile.getAbsolutePath())); + } + + sampleRenameMap.put(bamID, newSampleName); + } + } + catch ( FileNotFoundException e ) { + throw new UserException.CouldNotReadInputFile(sampleRenameMapFile, e); + } + + return sampleRenameMap; + } + + /** * Opens a reference sequence file paired with an index. Only public for testing purposes * diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index b38f0fc0b..509b875bb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -281,6 +281,15 @@ public class GATKArgumentCollection { @Argument(fullName = "keep_program_records", shortName = "kpr", doc = "Should we override the Walker's default and keep program records from the SAM header", required = false) public boolean keepProgramRecords = false; + @Advanced + @Argument(fullName = "sample_rename_mapping_file", shortName = "sample_rename_mapping_file", + doc = "Rename sample IDs on-the-fly at runtime using the provided mapping file. This option requires that " + + "each BAM file listed in the mapping file have only a single sample specified in its header (though there " + + "may be multiple read groups for that sample). Each line of the mapping file must contain the absolute path " + + "to a BAM file, followed by whitespace, followed by the new sample name for that BAM file.", + required = false) + public File sampleRenameMappingFile = null; + @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) public ValidationExclusion.TYPE unsafe; diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index a36667ec4..ac2ed4a4c 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -31,6 +31,7 @@ import net.sf.samtools.*; import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.RuntimeIOException; import org.apache.log4j.Logger; +import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.ReadMetrics; import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; @@ -47,8 +48,10 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; +import org.broadinstitute.sting.utils.text.XReadLines; import java.io.File; +import java.io.FileNotFoundException; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.*; @@ -131,6 +134,11 @@ public class SAMDataSource { */ private final Map originalToMergedReadGroupMappings = new HashMap(); + /** + * Mapping from bam file ID to new sample name. Used only when doing on-the-fly sample renaming. + */ + private Map sampleRenameMap = null; + /** our log, which we want to capture anything from this class */ private static Logger logger = Logger.getLogger(SAMDataSource.class); @@ -202,7 +210,8 @@ public class SAMDataSource { includeReadsWithDeletionAtLoci, (byte) -1, false, - false); + false, + null); } /** @@ -219,6 +228,8 @@ public class SAMDataSource { * bases will be seen in the pileups, and the deletions will be skipped silently. * @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality. * @param keepReadsInLIBS should we keep a unique list of reads in LIBS? + * @param sampleRenameMap Map of BAM file to new sample ID used during on-the-fly runtime sample renaming. + * Will be null if we're not doing sample renaming. */ public SAMDataSource( Collection samFiles, @@ -235,7 +246,9 @@ public class SAMDataSource { boolean includeReadsWithDeletionAtLoci, byte defaultBaseQualities, boolean removeProgramRecords, - final boolean keepReadsInLIBS) { + final boolean keepReadsInLIBS, + final Map sampleRenameMap) { + this.readMetrics = new ReadMetrics(); this.genomeLocParser = genomeLocParser; @@ -261,6 +274,8 @@ public class SAMDataSource { ReadShard.setReadBufferSize(100000); } + this.sampleRenameMap = sampleRenameMap; + resourcePool = new SAMResourcePool(Integer.MAX_VALUE); SAMReaders readers = resourcePool.getAvailableReaders(); @@ -825,8 +840,31 @@ public class SAMDataSource { if ( totalNumberOfFiles > 0 ) logger.info(String.format("Done initializing BAM readers: total time %.2f", timer.getElapsedTime())); Collection headers = new LinkedList(); - for(SAMFileReader reader: readers.values()) - headers.add(reader.getFileHeader()); + + // Examine the bam headers, perform any requested sample renaming on them, and add + // them to the list of headers to pass to the Picard SamFileHeaderMerger: + for ( final Map.Entry readerEntry : readers.entrySet() ) { + final SAMReaderID readerID = readerEntry.getKey(); + final SAMFileReader reader = readerEntry.getValue(); + final SAMFileHeader header = reader.getFileHeader(); + + // The remappedSampleName will be null if either no on-the-fly sample renaming was requested, + // or the user's sample rename map file didn't contain an entry for this bam file: + final String remappedSampleName = sampleRenameMap != null ? sampleRenameMap.get(readerID) : null; + + // If we've been asked to rename the sample for this bam file, do so now. We'll check to + // make sure this bam only contains reads from one sample before proceeding. + // + // IMPORTANT: relies on the fact that the Picard SamFileHeaderMerger makes a copy of + // the existing read group attributes (including sample name) when merging + // headers, regardless of whether there are read group collisions or not. + if ( remappedSampleName != null ) { + remapSampleName(readerID, header, remappedSampleName); + } + + headers.add(header); + } + headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,headers,true); // update all read groups to GATKSAMRecordReadGroups @@ -837,6 +875,43 @@ public class SAMDataSource { headerMerger.getMergedHeader().setReadGroups(gatkReadGroups); } + /** + * Changes the sample name in the read groups for the provided bam file header to match the + * remappedSampleName. Blows up with a UserException if the header contains more than one + * sample name. + * + * @param readerID ID for the bam file from which the provided header came from + * @param header The bam file header. Will be modified by this call. + * @param remappedSampleName New sample name to replace the existing sample attribute in the + * read groups for the header. + */ + private void remapSampleName( final SAMReaderID readerID, final SAMFileHeader header, final String remappedSampleName ) { + String firstEncounteredSample = null; + + for ( final SAMReadGroupRecord readGroup : header.getReadGroups() ) { + final String thisReadGroupSample = readGroup.getSample(); + + if ( thisReadGroupSample == null ) { + throw new UserException(String.format("On-the fly sample renaming was requested for bam file %s, however this " + + "bam file contains a read group (id: %s) with a null sample attribute", + readerID.getSamFilePath(), readGroup.getId())); + } + else if ( firstEncounteredSample == null ) { + firstEncounteredSample = thisReadGroupSample; + } + else if ( ! firstEncounteredSample.equals(thisReadGroupSample) ) { + throw new UserException(String.format("On-the-fly sample renaming was requested for bam file %s, " + + "however this bam file contains reads from more than one sample " + + "(encountered samples %s and %s in the bam header). The GATK requires that " + + "all bams for which on-the-fly sample renaming is requested " + + "contain reads from only a single sample per bam.", + readerID.getSamFilePath(), firstEncounteredSample, thisReadGroupSample)); + } + + readGroup.setSample(remappedSampleName); + } + } + final private void printReaderPerformance(final int nExecutedTotal, final int nExecutedInTick, final int totalNumberOfFiles, diff --git a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java index aca6cf984..c65d62149 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/EngineFeaturesIntegrationTest.java @@ -26,10 +26,13 @@ package org.broadinstitute.sting.gatk; import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMRecord; import org.broad.tribble.readers.AsciiLineReader; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Output; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.filters.MappingQualityUnavailableFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -45,13 +48,12 @@ import org.broadinstitute.variant.vcf.VCFCodec; import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.testng.Assert; +import org.testng.TestException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; -import java.io.FileInputStream; -import java.io.PrintStream; -import java.util.Arrays; +import java.io.*; +import java.util.*; /** * @@ -278,6 +280,12 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { executeTest("testDefaultBaseQualitiesNoneProvided", testDefaultBaseQualities(null, "")); } + // -------------------------------------------------------------------------------- + // + // Test engine-level cigar consolidation + // + // -------------------------------------------------------------------------------- + @Test public void testGATKEngineConsolidatesCigars() { final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + @@ -297,4 +305,232 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { // Original cigar was 0M3M0M8M. Check that it's been consolidated after running through the GATK engine: Assert.assertEquals(read.getCigarString(), "11M", "Cigar 0M3M0M8M not consolidated correctly by the engine"); } + + // -------------------------------------------------------------------------------- + // + // Test on-the-fly sample renaming + // + // -------------------------------------------------------------------------------- + + // On-the-fly sample renaming test case: one single-sample bam with multiple read groups + @Test + public void testOnTheFlySampleRenamingWithSingleBamFile() throws IOException { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam myNewSampleName")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + + " -R " + b37KGReference + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, Arrays.asList("")); // No MD5s; we only want to check the read groups + + final File outputBam = executeTest("testOnTheFlySampleRenamingWithSingleBamFile", spec).first.get(0); + final SAMFileReader reader = new SAMFileReader(outputBam); + + for ( final SAMReadGroupRecord readGroup : reader.getFileHeader().getReadGroups() ) { + Assert.assertEquals(readGroup.getSample(), "myNewSampleName", String.format("Sample for read group %s not renamed correctly", readGroup.getId())); + } + + reader.close(); + } + + // On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam + @Test + public void testOnTheFlySampleRenamingWithMultipleBamFiles() throws IOException { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam newSampleFor12878", + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12891.HEADERONLY.bam newSampleFor12891", + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam newSampleFor12892")); + + final Map readGroupToNewSampleMap = new HashMap<>(); + for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) { + final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID)); + final SAMFileReader inputBamReader = new SAMFileReader(inputBam); + final String newSampleName = String.format("newSampleFor%s", inputBamID); + for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { + readGroupToNewSampleMap.put(readGroup.getId(), newSampleName); + } + inputBamReader.close(); + } + + final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + + " -R " + b37KGReference + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12891.HEADERONLY.bam" + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, Arrays.asList("")); // No MD5s; we only want to check the read groups + + final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFiles", spec).first.get(0); + final SAMFileReader outputBamReader = new SAMFileReader(outputBam); + + int totalReadGroupsSeen = 0; + for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { + Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()), + String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); + totalReadGroupsSeen++; + } + + Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file"); + + outputBamReader.close(); + } + + // On-the-fly sample renaming test case: three single-sample bams with multiple read groups per bam, + // performing renaming in only SOME of the bams + @Test + public void testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename() throws IOException { + // Rename samples for NA12878 and NA12892, but not for NA12891 + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam newSampleFor12878", + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam newSampleFor12892")); + + final Map readGroupToNewSampleMap = new HashMap<>(); + for ( String inputBamID : Arrays.asList("12878", "12891", "12892") ) { + final File inputBam = new File(privateTestDir + String.format("CEUTrio.HiSeq.WGS.b37.NA%s.HEADERONLY.bam", inputBamID)); + final SAMFileReader inputBamReader = new SAMFileReader(inputBam); + + // Special-case NA12891, which we're not renaming: + final String newSampleName = inputBamID.equals("12891") ? "NA12891" : String.format("newSampleFor%s", inputBamID); + + for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { + readGroupToNewSampleMap.put(readGroup.getId(), newSampleName); + } + inputBamReader.close(); + } + + final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + + " -R " + b37KGReference + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12891.HEADERONLY.bam" + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12892.HEADERONLY.bam" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, Arrays.asList("")); // No MD5s; we only want to check the read groups + + final File outputBam = executeTest("testOnTheFlySampleRenamingWithMultipleBamFilesPartialRename", spec).first.get(0); + final SAMFileReader outputBamReader = new SAMFileReader(outputBam); + + int totalReadGroupsSeen = 0; + for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { + Assert.assertEquals(readGroup.getSample(), readGroupToNewSampleMap.get(readGroup.getId()), + String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); + totalReadGroupsSeen++; + } + + Assert.assertEquals(totalReadGroupsSeen, readGroupToNewSampleMap.size(), "Wrong number of read groups encountered in output bam file"); + + outputBamReader.close(); + } + + // On-the-fly sample renaming test case: two single-sample bams with read group collisions + @Test + public void testOnTheFlySampleRenamingWithReadGroupCollisions() throws IOException { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam newSampleFor12878", + privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam newSampleForNot12878")); + + final Set na12878ReadGroups = new HashSet<>(); + final SAMFileReader inputBamReader = new SAMFileReader(new File(privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam")); + for ( final SAMReadGroupRecord readGroup : inputBamReader.getFileHeader().getReadGroups() ) { + na12878ReadGroups.add(readGroup.getId()); + } + inputBamReader.close(); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + + " -R " + b37KGReference + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.NA12878.HEADERONLY.bam" + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.READ_GROUP_COLLISIONS_WITH_NA12878.HEADERONLY.bam" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, Arrays.asList("")); // No MD5s; we only want to check the read groups + + final File outputBam = executeTest("testOnTheFlySampleRenamingWithReadGroupCollisions", spec).first.get(0); + final SAMFileReader outputBamReader = new SAMFileReader(outputBam); + + int totalReadGroupsSeen = 0; + for ( final SAMReadGroupRecord readGroup : outputBamReader.getFileHeader().getReadGroups() ) { + String expectedSampleName = ""; + if ( na12878ReadGroups.contains(readGroup.getId()) ) { + expectedSampleName = "newSampleFor12878"; + } + else { + expectedSampleName = "newSampleForNot12878"; + } + + Assert.assertEquals(readGroup.getSample(), expectedSampleName, + String.format("Wrong sample for read group %s after on-the-fly renaming", readGroup.getId())); + totalReadGroupsSeen++; + } + + Assert.assertEquals(totalReadGroupsSeen, na12878ReadGroups.size() * 2, "Wrong number of read groups encountered in output bam file"); + + outputBamReader.close(); + } + + // On-the-fly sample renaming test case: a multi-sample bam (this should generate a UserException) + @Test + public void testOnTheFlySampleRenamingWithMultiSampleBam() throws IOException { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "CEUTrio.HiSeq.WGS.b37.MERGED.HEADERONLY.bam myNewSampleName")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T PrintReads" + + " -R " + b37KGReference + + " -I " + privateTestDir + "CEUTrio.HiSeq.WGS.b37.MERGED.HEADERONLY.bam" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " -o %s", + 1, + UserException.class); // expecting a UserException here + + executeTest("testOnTheFlySampleRenamingWithMultiSampleBam", spec); + } + + // On-the-fly sample renaming test case: ensure that walkers can see the remapped sample names in individual reads + @Test + public void testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInReads() throws IOException { + final File sampleRenameMapFile = createTestSampleRenameMapFile( + Arrays.asList(privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam myNewSampleName")); + + final WalkerTestSpec spec = new WalkerTestSpec(" -T OnTheFlySampleRenamingVerifyingTestWalker" + + " -R " + b37KGReference + + " -I " + privateTestDir + "NA12878.HiSeq.b37.chr20.10_11mb.bam" + + " --sample_rename_mapping_file " + sampleRenameMapFile.getAbsolutePath() + + " --newSampleName myNewSampleName" + + " -L 20:10000000-10001000", + 1, Arrays.asList("")); + + // Test is a success if our custom walker doesn't throw an exception + executeTest("testOnTheFlySampleRenamingVerifyWalkerSeesNewSamplesInReads", spec); + } + + private File createTestSampleRenameMapFile( final List contents ) throws IOException { + final File mapFile = createTempFile("TestSampleRenameMapFile", ".tmp"); + final PrintWriter writer = new PrintWriter(mapFile); + + for ( final String line : contents ) { + writer.println(line); + } + writer.close(); + + return mapFile; + } + + public static class OnTheFlySampleRenamingVerifyingTestWalker extends ReadWalker { + @Argument(fullName = "newSampleName", shortName = "newSampleName", doc = "", required = true) + String newSampleName = null; + + public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker metaDataTracker) { + if ( ! newSampleName.equals(read.getReadGroup().getSample()) ) { + throw new IllegalStateException(String.format("Encountered read with the wrong sample name. Expected %s found %s", + newSampleName, read.getReadGroup().getSample())); + } + + return 1; + } + + public Integer reduceInit() { return 0; } + public Integer reduce(Integer value, Integer sum) { return value + sum; } + } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index c3bc78551..84bc6e080 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -42,10 +42,9 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.List; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.*; /** * Tests selected functionality in the GenomeAnalysisEngine class @@ -104,6 +103,64 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.validateSuppliedIntervals(); } + @Test + public void testLoadWellFormedSampleRenameMapFile() throws IOException { + final File mapFile = createTestSampleRenameMapFile(Arrays.asList("/foo/bar/first.bam newSample1", + "/foo/bar/second.bam newSample2", + "/foo/bar2/third.bam newSample3")); + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + final Map renameMap = engine.loadSampleRenameMap(mapFile); + + Assert.assertEquals(renameMap.size(), 3, "Sample rename map was wrong size after loading from file"); + + final Iterator expectedResultsIterator = Arrays.asList("/foo/bar/first.bam", "newSample1", "/foo/bar/second.bam", "newSample2", "/foo/bar2/third.bam", "newSample3").iterator(); + while ( expectedResultsIterator.hasNext() ) { + final String expectedKey = expectedResultsIterator.next(); + final String expectedValue = expectedResultsIterator.next(); + + Assert.assertNotNull(renameMap.get(new SAMReaderID(expectedKey, new Tags())), String.format("Entry for %s not found in sample rename map", expectedKey)); + Assert.assertEquals(renameMap.get(new SAMReaderID(expectedKey, new Tags())), expectedValue, "Wrong value in sample rename map for " + expectedKey); + } + } + + @DataProvider(name = "MalformedSampleRenameMapFileDataProvider") + public Object[][] generateMalformedSampleRenameMapFiles() throws IOException { + final List tests = new ArrayList(); + + tests.add(new Object[]{"testLoadSampleRenameMapFileNonExistentFile", + new File("/foo/bar/nonexistent")}); + tests.add(new Object[]{"testLoadSampleRenameMapFileMalformedLine1", + createTestSampleRenameMapFile(Arrays.asList("/path/to/foo.bam"))}); + tests.add(new Object[]{"testLoadSampleRenameMapFileMalformedLine2", + createTestSampleRenameMapFile(Arrays.asList("/path/to/foo.bam newSample extraField"))}); + tests.add(new Object[]{"testLoadSampleRenameMapFileNonAbsoluteBamPath", + createTestSampleRenameMapFile(Arrays.asList("relative/path/to/foo.bam newSample"))}); + tests.add(new Object[]{"testLoadSampleRenameMapFileDuplicateBamPath", + createTestSampleRenameMapFile(Arrays.asList("/path/to/dupe.bam newSample1", + "/path/to/dupe.bam newSample2"))}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "MalformedSampleRenameMapFileDataProvider", expectedExceptions = UserException.class) + public void testLoadMalformedSampleRenameMapFile( final String testName, final File mapFile ) { + logger.info("Executing test " + testName); + + final GenomeAnalysisEngine engine = new GenomeAnalysisEngine(); + final Map renameMap = engine.loadSampleRenameMap(mapFile); + } + + private File createTestSampleRenameMapFile( final List contents ) throws IOException { + final File mapFile = createTempFile("TestSampleRenameMapFile", ".tmp"); + final PrintWriter writer = new PrintWriter(mapFile); + + for ( final String line : contents ) { + writer.println(line); + } + writer.close(); + + return mapFile; + } /////////////////////////////////////////////////// // Test the ReadTransformer ordering enforcement // diff --git a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java index 56725147e..02d0c66b9 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/ReadMetricsUnitTest.java @@ -158,7 +158,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true); + false, (byte)30, false, true, null); engine.setReadsDataSource(dataSource); @@ -193,7 +193,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true); + false, (byte)30, false, true, null); engine.setReadsDataSource(dataSource); final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); @@ -234,7 +234,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true); + false, (byte)30, false, true, null); engine.setReadsDataSource(dataSource); final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); @@ -281,7 +281,7 @@ public class ReadMetricsUnitTest extends BaseTest { new ValidationExclusion(), filters, new ArrayList(), - false, (byte)30, false, true); + false, (byte)30, false, true, null); engine.setReadsDataSource(dataSource); diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java index 8d33aa8b6..52285fb2e 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java @@ -183,7 +183,8 @@ public class SAMDataSourceUnitTest extends BaseTest { false, (byte) -1, removeProgramRecords, - false); + false, + null); List dontRemoveProgramRecords = data.getHeader().getProgramRecords(); assertEquals(dontRemoveProgramRecords, defaultProgramRecords, "testRemoveProgramRecords: default program records differ from removeProgramRecords = false"); @@ -203,7 +204,8 @@ public class SAMDataSourceUnitTest extends BaseTest { false, (byte) -1, removeProgramRecords, - false); + false, + null); List doRemoveProgramRecords = data.getHeader().getProgramRecords(); assertTrue(doRemoveProgramRecords.isEmpty(), "testRemoveProgramRecords: program records not cleared when removeProgramRecords = true"); diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index e4b6c37cc..30c0c83b5 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -481,7 +481,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { new ValidationExclusion(), new ArrayList(), new ArrayList(), - false, (byte)30, false, true); + false, (byte)30, false, true, null); engine.setReadsDataSource(dataSource); final Set samples = SampleUtils.getSAMFileSamples(dataSource.getHeader()); From c72880f1d0826447f78c0ea9efc7a4455d9a9b41 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 19 Jul 2013 14:35:00 -0400 Subject: [PATCH 024/172] build.xml: make ant -p output only important build targets ant -p outputs only targets that have description attributes. Modify build.xml so only important targets that users might actually want to use are output by ant -p. --- build.xml | 52 ++++++++++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/build.xml b/build.xml index 16db1cec1..0844717dd 100644 --- a/build.xml +++ b/build.xml @@ -22,6 +22,7 @@ ~ OTHER DEALINGS IN THE SOFTWARE. --> + @@ -291,7 +292,7 @@ - + @@ -465,7 +466,7 @@ - + @@ -502,7 +503,7 @@ - + Generating Queue GATK extensions... @@ -520,7 +521,7 @@ - + @@ -595,8 +596,7 @@ - + @@ -687,7 +687,7 @@ - + @@ -786,20 +786,20 @@ - + - + - + - + @@ -831,7 +831,7 @@ - + @@ -849,7 +849,7 @@ - + @@ -909,7 +909,7 @@ - + - + @@ -948,17 +948,17 @@ - + - + - + - + - + @@ -994,7 +994,7 @@ - + - + @@ -1367,7 +1367,7 @@ - + @@ -1399,7 +1399,7 @@ - + @@ -1433,7 +1433,7 @@ - + From 6df43f730a0c33f5a02efbbe5cef985c0df51733 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 23 Jul 2013 23:47:15 -0400 Subject: [PATCH 027/172] Fixing ReadBackedPileup to represent mapping qualities as ints, not (signed) bytes. Having them as bytes caused problems for downstream programmers who had data with high MQs. --- .../missing/QualifyMissingIntervals.java | 2 +- .../sting/utils/pileup/ReadBackedPileup.java | 2 +- .../utils/pileup/ReadBackedPileupImpl.java | 8 +++---- .../pileup/ReadBackedPileupUnitTest.java | 21 ++++++++++++++++++- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java index d0db3ef98..f67db4187 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/missing/QualifyMissingIntervals.java @@ -156,7 +156,7 @@ public final class QualifyMissingIntervals extends LocusWalker baseQual += qual; } double mapQual = 0.0; - for (byte qual : pileup.getMappingQuals()) { + for (int qual : pileup.getMappingQuals()) { mapQual += qual; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java index e1865ba3c..059c41d64 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java @@ -270,7 +270,7 @@ public interface ReadBackedPileup extends Iterable, HasGenomeLoca * Get an array of the mapping qualities * @return */ - public byte[] getMappingQuals(); + public int[] getMappingQuals(); /** * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads. diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java index 65c47c23b..455a6aa12 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java +++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java @@ -969,11 +969,11 @@ public class ReadBackedPileupImpl implements ReadBackedPileup { * @return */ @Override - public byte[] getMappingQuals() { - byte[] v = new byte[getNumberOfElements()]; + public int[] getMappingQuals() { + final int[] v = new int[getNumberOfElements()]; int pos = 0; - for (PileupElement pile : pileupElementTracker) { - v[pos++] = (byte) pile.getRead().getMappingQuality(); + for ( final PileupElement pile : pileupElementTracker ) { + v[pos++] = pile.getRead().getMappingQuality(); } return v; } diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java index 18fa8a302..02b11b970 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java @@ -296,7 +296,6 @@ public class ReadBackedPileupUnitTest { testRBPCounts(pileup, new RBPCountTest(params.nReads + 2, params.nMapq0 + 1, params.nDeletions + 1)); } - private void testRBPCounts(final ReadBackedPileup rbp, RBPCountTest expected) { for ( int cycles = 0; cycles < 3; cycles++ ) { // multiple cycles to make sure caching is working @@ -306,4 +305,24 @@ public class ReadBackedPileupUnitTest { Assert.assertEquals(rbp.getNumberOfMappingQualityZeroReads(), expected.nMapq0); } } + + @Test + public void testRBPMappingQuals() { + + // create a read with high MQ + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, 1, 10); + read.setReadBases(Utils.dupBytes((byte) 'A', 10)); + read.setBaseQualities(Utils.dupBytes((byte) 30, 10)); + read.setCigarString("10M"); + read.setMappingQuality(200); // set a MQ higher than max signed byte + + // now create the RBP + final List elts = new LinkedList<>(); + elts.add(new PileupElement(read, 0, read.getCigar().getCigarElement(0), 0, 0)); + final Map pileupsBySample = new HashMap<>(); + pileupsBySample.put("foo", new ReadBackedPileupImpl(loc, elts)); + final ReadBackedPileup pileup = new ReadBackedPileupImpl(loc, pileupsBySample); + + Assert.assertEquals(pileup.getMappingQuals()[0], 200); + } } \ No newline at end of file From 31ab0824b1fb5d774ebff61ff54f0adfe0f6f38c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 24 Jul 2013 14:09:49 -0400 Subject: [PATCH 028/172] quick indentation fixes to FPGA code --- .../LikelihoodCalculationEngine.java | 106 +++--- .../sting/utils/pairhmm/CnyPairHMM.java | 332 ++++++++++-------- .../sting/utils/pairhmm/BatchPairHMM.java | 41 ++- 3 files changed, 276 insertions(+), 203 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index e44aedd66..8bbe23afc 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -90,10 +90,10 @@ public class LikelihoodCalculationEngine { case EXACT: return new Log10PairHMM(true); case ORIGINAL: return new Log10PairHMM(false); case LOGLESS_CACHING: - if (noFpga || !CnyPairHMM.isAvailable()) - return new LoglessPairHMM(); - else - return new CnyPairHMM(); + if (noFpga || !CnyPairHMM.isAvailable()) + return new LoglessPairHMM(); + else + return new CnyPairHMM(); default: throw new UserException.BadArgumentValue("pairHMM", "Specified pairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are ORIGINAL, EXACT, CACHING, and LOGLESS_CACHING."); } @@ -132,7 +132,7 @@ public class LikelihoodCalculationEngine { this.constantGCP = constantGCP; this.DEBUG = debug; this.log10globalReadMismappingRate = log10globalReadMismappingRate; - this.noFpga = noFpga; + this.noFpga = noFpga; if ( WRITE_LIKELIHOODS_TO_FILE ) { try { @@ -146,7 +146,7 @@ public class LikelihoodCalculationEngine { } public LikelihoodCalculationEngine( final byte constantGCP, final boolean debug, final PairHMM.HMM_IMPLEMENTATION hmmType, final double log10globalReadMismappingRate ) { - this(constantGCP, debug, hmmType, log10globalReadMismappingRate, false); + this(constantGCP, debug, hmmType, log10globalReadMismappingRate, false); } public LikelihoodCalculationEngine() { @@ -209,8 +209,8 @@ public class LikelihoodCalculationEngine { private PerReadAlleleLikelihoodMap computeReadLikelihoods( final List haplotypes, final List reads) { // first, a little set up to get copies of the Haplotypes that are Alleles (more efficient than creating them each time) - final BatchPairHMM batchPairHMM = (pairHMM.get() instanceof BatchPairHMM) ? (BatchPairHMM)pairHMM.get() : null; - final Vector batchedReads = new Vector(reads.size()); + final BatchPairHMM batchPairHMM = (pairHMM.get() instanceof BatchPairHMM) ? (BatchPairHMM)pairHMM.get() : null; + final Vector batchedReads = new Vector(reads.size()); final int numHaplotypes = haplotypes.size(); final Map alleleVersions = new LinkedHashMap<>(numHaplotypes); Allele refAllele = null; @@ -236,11 +236,11 @@ public class LikelihoodCalculationEngine { readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); } - if ( batchPairHMM != null ) { - batchPairHMM.batchAdd(haplotypes, read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP); - batchedReads.add(read); - continue; - } + if ( batchPairHMM != null ) { + batchPairHMM.batchAdd(haplotypes, read.getReadBases(), readQuals, readInsQuals, readDelQuals, overallGCP); + batchedReads.add(read); + continue; + } // keep track of the reference likelihood and the best non-ref likelihood double refLog10l = Double.NEGATIVE_INFINITY; @@ -283,54 +283,54 @@ public class LikelihoodCalculationEngine { } } - if ( batchPairHMM != null ) { - for( final GATKSAMRecord read : batchedReads ) { - double refLog10l = Double.NEGATIVE_INFINITY; - double bestNonReflog10L = Double.NEGATIVE_INFINITY; - final double[] likelihoods = batchPairHMM.batchGetResult(); - for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { - final Haplotype haplotype = haplotypes.get(jjj); - final double log10l = likelihoods[jjj]; + if ( batchPairHMM != null ) { + for( final GATKSAMRecord read : batchedReads ) { + double refLog10l = Double.NEGATIVE_INFINITY; + double bestNonReflog10L = Double.NEGATIVE_INFINITY; + final double[] likelihoods = batchPairHMM.batchGetResult(); + for( int jjj = 0; jjj < numHaplotypes; jjj++ ) { + final Haplotype haplotype = haplotypes.get(jjj); + final double log10l = likelihoods[jjj]; - if ( WRITE_LIKELIHOODS_TO_FILE ) { - final byte[] overallGCP = new byte[read.getReadLength()]; - Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? - // NOTE -- must clone anything that gets modified here so we don't screw up future uses of the read - final byte[] readQuals = read.getBaseQualities().clone(); - final byte[] readInsQuals = read.getBaseInsertionQualities(); - final byte[] readDelQuals = read.getBaseDeletionQualities(); - for( int kkk = 0; kkk < readQuals.length; kkk++ ) { - readQuals[kkk] = (byte) Math.min( 0xff & readQuals[kkk], read.getMappingQuality()); // cap base quality by mapping quality, as in UG - //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated - //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated - // TODO -- why is Q18 hard-coded here??? - readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); - } - likelihoodsStream.printf("%s %s %s %s %s %s %f%n", - haplotype.getBaseString(), - new String(read.getReadBases()), - SAMUtils.phredToFastq(readQuals), - SAMUtils.phredToFastq(readInsQuals), + if ( WRITE_LIKELIHOODS_TO_FILE ) { + final byte[] overallGCP = new byte[read.getReadLength()]; + Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data? + // NOTE -- must clone anything that gets modified here so we don't screw up future uses of the read + final byte[] readQuals = read.getBaseQualities().clone(); + final byte[] readInsQuals = read.getBaseInsertionQualities(); + final byte[] readDelQuals = read.getBaseDeletionQualities(); + for( int kkk = 0; kkk < readQuals.length; kkk++ ) { + readQuals[kkk] = (byte) Math.min( 0xff & readQuals[kkk], read.getMappingQuality()); // cap base quality by mapping quality, as in UG + //readQuals[kkk] = ( readQuals[kkk] > readInsQuals[kkk] ? readInsQuals[kkk] : readQuals[kkk] ); // cap base quality by base insertion quality, needs to be evaluated + //readQuals[kkk] = ( readQuals[kkk] > readDelQuals[kkk] ? readDelQuals[kkk] : readQuals[kkk] ); // cap base quality by base deletion quality, needs to be evaluated + // TODO -- why is Q18 hard-coded here??? + readQuals[kkk] = ( readQuals[kkk] < (byte) 18 ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] ); + } + likelihoodsStream.printf("%s %s %s %s %s %s %f%n", + haplotype.getBaseString(), + new String(read.getReadBases()), + SAMUtils.phredToFastq(readQuals), + SAMUtils.phredToFastq(readInsQuals), SAMUtils.phredToFastq(readDelQuals), SAMUtils.phredToFastq(overallGCP), log10l); - } + } - if ( haplotype.isNonReference() ) - bestNonReflog10L = Math.max(bestNonReflog10L, log10l); - else - refLog10l = log10l; + if ( haplotype.isNonReference() ) + bestNonReflog10L = Math.max(bestNonReflog10L, log10l); + else + refLog10l = log10l; - perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); - } + perReadAlleleLikelihoodMap.add(read, alleleVersions.get(haplotype), log10l); + } - final double worstRefLog10Allowed = bestNonReflog10L + log10globalReadMismappingRate; - if ( refLog10l < (worstRefLog10Allowed) ) { - perReadAlleleLikelihoodMap.add(read, refAllele, worstRefLog10Allowed); - } - } - } + final double worstRefLog10Allowed = bestNonReflog10L + log10globalReadMismappingRate; + if ( refLog10l < (worstRefLog10Allowed) ) { + perReadAlleleLikelihoodMap.add(read, refAllele, worstRefLog10Allowed); + } + } + } return perReadAlleleLikelihoodMap; } diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java index 746c0add1..8353d3282 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -1,43 +1,91 @@ -package org.broadinstitute.sting.utils.pairhmm; +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ -import java.io.File; -import java.util.*; -import java.lang.reflect.*; +package org.broadinstitute.sting.utils.pairhmm; import org.broadinstitute.sting.utils.haplotype.Haplotype; +import java.io.File; +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + public final class CnyPairHMM extends PairHMM implements BatchPairHMM { private static class HmmInput { - public byte[] readBases; - public byte[] readQuals; - public byte[] insertionGOP; - public byte[] deletionGOP; - public byte[] overallGCP; - public List haplotypes; + public byte[] readBases; + public byte[] readQuals; + public byte[] insertionGOP; + public byte[] deletionGOP; + public byte[] overallGCP; + public List haplotypes; }; private static class ResultQueue { - private int offset; - private List batchResults; - - public ResultQueue() { - batchResults = new LinkedList(); - offset = 0; - } + private int offset; + private List batchResults; - public void push(double[] results) { - batchResults.add(results); - } - - public double pop() { - double[] results = batchResults.get(0); - double top = results[offset++]; - if (offset == results.length) { - batchResults.remove(0); - offset = 0; - } - return top; - } + public ResultQueue() { + batchResults = new LinkedList(); + offset = 0; + } + + public void push(double[] results) { + batchResults.add(results); + } + + public double pop() { + double[] results = batchResults.get(0); + double top = results[offset++]; + if (offset == results.length) { + batchResults.remove(0); + offset = 0; + } + return top; + } } final static String libPath = "/opt/convey/personalities/32100.1.1.1.0"; @@ -48,145 +96,145 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { private ResultQueue resultQueue = new ResultQueue(); static public boolean isAvailable() { - if (!loaded) { - File library = new File(libPath + "/lib" + libName + ".so"); - return library.exists(); - } - return true; + if (!loaded) { + File library = new File(libPath + "/lib" + libName + ".so"); + return library.exists(); + } + return true; } private native void initFpga(); private native int dequeueRequirement(int reflen, int readlen); private native int enqueue(byte[] haplotypeBases, - byte[] readBases, - byte[] readQuals, - byte[] insertionGOP, - byte[] deletionGOP, - byte[] overallGCP, - int hapStartIndex, - boolean recacheReadValues); + byte[] readBases, + byte[] readQuals, + byte[] insertionGOP, + byte[] deletionGOP, + byte[] overallGCP, + int hapStartIndex, + boolean recacheReadValues); private native int flushQueue(); private native int dequeue(double[] results); private native double softHmm(byte[] haplotypeBases, - byte[] readBases, - byte[] readQuals, - byte[] insertionGOP, - byte[] deletionGOP, - byte[] overallGCP, - int hapStartIndex, - boolean recacheReadValues); - + byte[] readBases, + byte[] readQuals, + byte[] insertionGOP, + byte[] deletionGOP, + byte[] overallGCP, + int hapStartIndex, + boolean recacheReadValues); + public native void reportStats(); public void initialize( final int READ_MAX_LENGTH, final int HAPLOTYPE_MAX_LENGTH ) { - if (!loaded) { - addLibraryPath(libPath); - System.loadLibrary(libName); - initFpga(); - loaded = true; - System.out.println("FPGA HMM Initialized"); - } + if (!loaded) { + addLibraryPath(libPath); + System.loadLibrary(libName); + initFpga(); + loaded = true; + System.out.println("FPGA HMM Initialized"); + } } - public void batchAdd(final List haplotypes, - final byte[] readBases, - final byte[] readQuals, - final byte[] insertionGOP, - final byte[] deletionGOP, - final byte[] overallGCP) { + public void batchAdd(final List haplotypes, + final byte[] readBases, + final byte[] readQuals, + final byte[] insertionGOP, + final byte[] deletionGOP, + final byte[] overallGCP) { final int numHaplotypes = haplotypes.size(); - HmmInput test = new HmmInput(); - test.readBases = readBases; - test.readQuals = readQuals; - test.insertionGOP = insertionGOP; - test.deletionGOP = deletionGOP; - test.overallGCP = overallGCP; - test.haplotypes = haplotypes; - batchRequests.add(test); - for (int jjj = 0; jjj < numHaplotypes; jjj++) { - final boolean recacheReadValues = (jjj == 0); - final Haplotype haplotype = haplotypes.get(jjj); - enqueuePrepare(haplotype.getBases(), readBases); - if (enqueue(haplotype.getBases(), readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, recacheReadValues) == 0) - throw new RuntimeException("FPGA queue overflow in batchAdd"); - } + HmmInput test = new HmmInput(); + test.readBases = readBases; + test.readQuals = readQuals; + test.insertionGOP = insertionGOP; + test.deletionGOP = deletionGOP; + test.overallGCP = overallGCP; + test.haplotypes = haplotypes; + batchRequests.add(test); + for (int jjj = 0; jjj < numHaplotypes; jjj++) { + final boolean recacheReadValues = (jjj == 0); + final Haplotype haplotype = haplotypes.get(jjj); + enqueuePrepare(haplotype.getBases(), readBases); + if (enqueue(haplotype.getBases(), readBases, readQuals, insertionGOP, deletionGOP, overallGCP, 0, recacheReadValues) == 0) + throw new RuntimeException("FPGA queue overflow in batchAdd"); + } } - + public double[] batchGetResult() { - double[] results; + double[] results; - int n = flushQueue(); - if (n > 0) { - results = new double[n]; - if (dequeue(results) != n) - System.out.println("queue underflow in enqueuePrepare"); - resultQueue.push(results); - } + int n = flushQueue(); + if (n > 0) { + results = new double[n]; + if (dequeue(results) != n) + System.out.println("queue underflow in enqueuePrepare"); + resultQueue.push(results); + } - final HmmInput test = batchRequests.remove(0); + final HmmInput test = batchRequests.remove(0); final int numHaplotypes = test.haplotypes.size(); - results = new double[numHaplotypes]; - for (int jjj = 0; jjj < numHaplotypes; jjj++) { - results[jjj] = resultQueue.pop(); - if (results[jjj]<-60.0) { - final Haplotype haplotype = test.haplotypes.get(jjj); - results[jjj]=softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); - } - } - return results; + results = new double[numHaplotypes]; + for (int jjj = 0; jjj < numHaplotypes; jjj++) { + results[jjj] = resultQueue.pop(); + if (results[jjj]<-60.0) { + final Haplotype haplotype = test.haplotypes.get(jjj); + results[jjj]=softHmm(haplotype.getBases(), test.readBases, test.readQuals, test.insertionGOP, test.deletionGOP, test.overallGCP, 0, true); + } + } + return results; } protected double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases, - final byte[] readBases, - final byte[] readQuals, - final byte[] insertionGOP, - final byte[] deletionGOP, - final byte[] overallGCP, - final int hapStartIndex, - final boolean recacheReadValues ) { - return 0.0; + final byte[] readBases, + final byte[] readQuals, + final byte[] insertionGOP, + final byte[] deletionGOP, + final byte[] overallGCP, + final int hapStartIndex, + final boolean recacheReadValues ) { + return 0.0; } private void enqueuePrepare(byte[] haplotypeBases, byte[] readBases) { - double[] results = null; - int n = dequeueRequirement(haplotypeBases.length, readBases.length); - if (n>0) { - results = new double[n]; - if (dequeue(results)!=n) - System.out.println("queue underflow in enqueuePrepare"); - } else if (n<0) { - n = flushQueue(); - if (n > 0) { - results = new double[n]; - if (dequeue(results) != n) - System.out.println("queue underflow in enqueuePrepare"); - } - } - - if (results != null) - resultQueue.push(results); + double[] results = null; + int n = dequeueRequirement(haplotypeBases.length, readBases.length); + if (n>0) { + results = new double[n]; + if (dequeue(results)!=n) + System.out.println("queue underflow in enqueuePrepare"); + } else if (n<0) { + n = flushQueue(); + if (n > 0) { + results = new double[n]; + if (dequeue(results) != n) + System.out.println("queue underflow in enqueuePrepare"); + } + } + + if (results != null) + resultQueue.push(results); } public static void addLibraryPath(String pathToAdd) { - try { - final Field usrPathsField = ClassLoader.class.getDeclaredField("usr_paths"); - usrPathsField.setAccessible(true); - - //get array of paths - final String[] paths = (String[])usrPathsField.get(null); - - //check if the path to add is already present - for(String path : paths) { - if(path.equals(pathToAdd)) { - return; - } - } - - //add the new path - final String[] newPaths = Arrays.copyOf(paths, paths.length + 1); - newPaths[newPaths.length-1] = pathToAdd; - usrPathsField.set(null, newPaths); - } catch (Exception ex) { - } + try { + final Field usrPathsField = ClassLoader.class.getDeclaredField("usr_paths"); + usrPathsField.setAccessible(true); + + //get array of paths + final String[] paths = (String[])usrPathsField.get(null); + + //check if the path to add is already present + for(String path : paths) { + if(path.equals(pathToAdd)) { + return; + } + } + + //add the new path + final String[] newPaths = Arrays.copyOf(paths, paths.length + 1); + newPaths[newPaths.length-1] = pathToAdd; + usrPathsField.set(null, newPaths); + } catch (Exception ex) { + } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java index 3b794b00e..6468753d2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/BatchPairHMM.java @@ -1,16 +1,41 @@ -package org.broadinstitute.sting.utils.pairhmm; +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ -import java.util.List; +package org.broadinstitute.sting.utils.pairhmm; import org.broadinstitute.sting.utils.haplotype.Haplotype; +import java.util.List; + public interface BatchPairHMM { - public void batchAdd(final List haplotypes, - final byte[] readBases, - final byte[] readQuals, - final byte[] insertionGOP, - final byte[] deletionGOP, - final byte[] overallGCP); + public void batchAdd(final List haplotypes, + final byte[] readBases, + final byte[] readQuals, + final byte[] insertionGOP, + final byte[] deletionGOP, + final byte[] overallGCP); public double[] batchGetResult(); } From 0a2b5ddadf72a52e2411c81b2b263581421261f5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 25 Jul 2013 11:02:46 -0400 Subject: [PATCH 029/172] More specific fix for the dangling tail edge case with a single leading deletion. The previous fix was too general (and therefore incorrect) and caused the HC to exception out. Added "unit" test for this exact case. --- .../readthreading/ReadThreadingGraph.java | 9 ++++++- .../HaplotypeCallerIntegrationTest.java | 27 ++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index 47d14e185..2a94ece96 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -340,8 +340,15 @@ public class ReadThreadingGraph extends BaseGraph Date: Sun, 21 Jul 2013 23:35:44 -0400 Subject: [PATCH 030/172] Fully stranded implementation of RR (plus bug fix for insertions and het compression). Now only filtered reads are unstranded. All consensus reads have strand, so that we emit 2 consensus reads in general now: one for each strand. This involved some refactoring of the sliding window which cleaned it up a lot. Also included is a bug fix: insertions downstream of a variant region weren't triggering a stop to the compression. --- .../reducereads/HeaderElement.java | 132 ++++-- .../reducereads/SlidingWindow.java | 401 +++++------------- .../gatk/walkers/qc/AssessReducedQuals.java | 6 +- .../reducereads/HeaderElementUnitTest.java | 20 +- .../ReduceReadsIntegrationTest.java | 36 +- .../reducereads/SlidingWindowUnitTest.java | 96 +++-- 6 files changed, 292 insertions(+), 399 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java index ba2c2ae56..5e84076fd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java @@ -59,7 +59,8 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; * out due to mapping or base quality. */ public class HeaderElement { - private BaseAndQualsCounts consensusBaseCounts; // How many A,C,G,T (and D's) are in this site. + private BaseAndQualsCounts positiveConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site. + private BaseAndQualsCounts negativeConsensusBaseCounts; // How many A,C,G,T (and D's) are in this site. private BaseAndQualsCounts filteredBaseCounts; // How many A,C,G,T (and D's) were filtered out in this site. private int insertionsToTheRight; // How many reads in this site had insertions to the immediate right private int location; // Genome location of this site (the sliding window knows which contig we're at @@ -70,14 +71,20 @@ public class HeaderElement { return location; } - public BaseAndQualsCounts getFilteredBaseCounts() { + /** + * Get the base counts object for the consensus type + * + * @param consensusType the type to use + * @return non-null base counts + */ + public BaseAndQualsCounts getBaseCounts(final SlidingWindow.ConsensusType consensusType) { + if ( consensusType == SlidingWindow.ConsensusType.POSITIVE_CONSENSUS ) + return positiveConsensusBaseCounts; + if ( consensusType == SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS ) + return negativeConsensusBaseCounts; return filteredBaseCounts; } - public BaseAndQualsCounts getConsensusBaseCounts() { - return consensusBaseCounts; - } - /** * Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty * filteredBaseCounts - 0 insertions to the right - empty mappingQuality list @@ -85,7 +92,7 @@ public class HeaderElement { * @param location the reference location for the new element */ public HeaderElement(final int location) { - this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location); + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, location); } /** @@ -95,20 +102,22 @@ public class HeaderElement { * @param location the reference location for the new element */ public HeaderElement(final int location, final int insertionsToTheRight) { - this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location); + this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), new BaseAndQualsCounts(), insertionsToTheRight, location); } /** * Creates a new HeaderElement with all given parameters * - * @param consensusBaseCounts the BaseCounts object for the running consensus synthetic read + * @param positiveConsensusBaseCounts the BaseCounts object for the running positive consensus synthetic read + * @param negativeConsensusBaseCounts the BaseCounts object for the running negative consensus synthetic read * @param filteredBaseCounts the BaseCounts object for the filtered data synthetic read * @param insertionsToTheRight number of insertions to the right of this HeaderElement * @param location the reference location of this reference element * HeaderElement */ - public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int location) { - this.consensusBaseCounts = consensusBaseCounts; + public HeaderElement(final BaseAndQualsCounts positiveConsensusBaseCounts, final BaseAndQualsCounts negativeConsensusBaseCounts, final BaseAndQualsCounts filteredBaseCounts, final int insertionsToTheRight, final int location) { + this.positiveConsensusBaseCounts = positiveConsensusBaseCounts; + this.negativeConsensusBaseCounts = negativeConsensusBaseCounts; this.filteredBaseCounts = filteredBaseCounts; this.insertionsToTheRight = insertionsToTheRight; this.location = location; @@ -124,7 +133,8 @@ public class HeaderElement { * @return true if site is variant by any definition. False otherwise. */ public boolean isVariant(final double minVariantPvalue, final double minVariantProportion, final double minIndelProportion) { - return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); + return ( hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) ) + && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantPvalue, minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips()); } /** @@ -138,13 +148,18 @@ public class HeaderElement { * @param minBaseQual the minimum base qual allowed to be a good base * @param minMappingQual the minimum mapping qual allowed to be a good read * @param isSoftClipped true if the base is soft-clipped in the original read + * @param isNegativeStrand true if the base comes from a read on the negative strand */ - public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { + public void addBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) { // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts - if ( baseMappingQuality >= minMappingQual ) - consensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); - else + if ( baseMappingQuality >= minMappingQual ) { + if ( isNegativeStrand ) + negativeConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); + else + positiveConsensusBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); + } else { filteredBaseCounts.incr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); + } } /** @@ -158,14 +173,20 @@ public class HeaderElement { * @param minBaseQual the minimum base qual allowed to be a good base * @param minMappingQual the minimum mapping qual allowed to be a good read * @param isSoftClipped true if the base is soft-clipped in the original read + * @param isNegativeStrand true if the base comes from a read on the negative strand */ - public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) { + public void removeBase(final byte base, final byte baseQual, final byte insQual, final byte delQual, final int baseMappingQuality, final int minBaseQual, final int minMappingQual, final boolean isSoftClipped, final boolean isNegativeStrand) { // If the base passes the MQ filter it is included in the consensus base counts, otherwise it's part of the filtered counts - if ( baseMappingQuality >= minMappingQual ) - consensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); - else + if ( baseMappingQuality >= minMappingQual ) { + if ( isNegativeStrand ) + negativeConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); + else + positiveConsensusBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual, isSoftClipped); + } else { filteredBaseCounts.decr(base, baseQual, insQual, delQual, baseMappingQuality, baseQual < minBaseQual); + } } + /** * Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions * should be added to the right of the element. @@ -177,19 +198,11 @@ public class HeaderElement { /** * Does this HeaderElement contain consensus data? * + * @param consensusType the type to use * @return whether or not this HeaderElement contains consensus data */ - public boolean hasConsensusData() { - return consensusBaseCounts.totalCount() > 0; - } - - /** - * Does this HeaderElement contain filtered data? - * - * @return whether or not this HeaderElement contains filtered data - */ - public boolean hasFilteredData() { - return filteredBaseCounts.totalCount() > 0; + public boolean hasConsensusData(final SlidingWindow.ConsensusType consensusType) { + return getBaseCounts(consensusType).totalCount() > 0; } /** @@ -198,7 +211,7 @@ public class HeaderElement { * @return whether or not this HeaderElement has no data */ public boolean isEmpty() { - return (!hasFilteredData() && !hasConsensusData()); + return !hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS) && !hasConsensusData(SlidingWindow.ConsensusType.FILTERED); } /** @@ -224,7 +237,7 @@ public class HeaderElement { * @return whether or not the HeaderElement is variant due to excess insertions */ private boolean isVariantFromInsertions(double minIndelProportion) { - final int numberOfBases = consensusBaseCounts.totalCount(); + final int numberOfBases = totalCountForBothStrands(); if (numberOfBases == 0) return (insertionsToTheRight > 0); // do we only have insertions? @@ -232,13 +245,18 @@ public class HeaderElement { return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion; } + private int totalCountForBothStrands() { + return positiveConsensusBaseCounts.totalCount() + negativeConsensusBaseCounts.totalCount(); + } + /** * Whether or not the HeaderElement is variant due to excess deletions * * @return whether or not the HeaderElement is variant due to excess deletions */ private boolean isVariantFromDeletions(double minIndelProportion) { - return consensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || consensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion; + return positiveConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || positiveConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion + || negativeConsensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || negativeConsensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion; } /** @@ -249,9 +267,23 @@ public class HeaderElement { * @return whether or not the HeaderElement is variant due to excess mismatches */ protected boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion) { - final int totalCount = consensusBaseCounts.totalCountWithoutIndels(); - final BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostProbabilityWithoutIndels(); - final int countOfOtherBases = totalCount - consensusBaseCounts.countOfBase(mostCommon); + return isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || + isVariantFromMismatches(minVariantPvalue, minVariantProportion, SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS); + } + + /** + * Whether or not the HeaderElement is variant due to excess mismatches + * + * @param minVariantPvalue the minimum pvalue to call a site variant (used with low coverage). + * @param minVariantProportion the minimum proportion to call a site variant (used with high coverage). + * @param consensusType the consensus type to use + * @return whether or not the HeaderElement is variant due to excess mismatches + */ + private boolean isVariantFromMismatches(final double minVariantPvalue, final double minVariantProportion, final SlidingWindow.ConsensusType consensusType) { + final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType); + final int totalCount = baseAndQualsCounts.totalCountWithoutIndels(); + final BaseIndex mostCommon = baseAndQualsCounts.baseIndexWithMostProbabilityWithoutIndels(); + final int countOfOtherBases = totalCount - baseAndQualsCounts.countOfBase(mostCommon); return hasSignificantCount(countOfOtherBases, totalCount, minVariantPvalue, minVariantProportion); } @@ -262,8 +294,20 @@ public class HeaderElement { * @return true if we had more soft clipped bases contributing to this site than matches/mismatches. */ protected boolean isVariantFromSoftClips() { - final int nSoftClippedBases = consensusBaseCounts.nSoftclips(); - return nSoftClippedBases > 0 && nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases); + return isVariantFromSoftClips(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) || isVariantFromSoftClips(SlidingWindow.ConsensusType.NEGATIVE_CONSENSUS); + } + + /** + * This handles the special case where we have more bases that came from soft clips than bases that came from + * normal bases by forcing it to become a variant region. We don't want a consensus based on too little information. + * + * @param consensusType the consensus type to use + * @return true if we had more soft clipped bases contributing to this site than matches/mismatches. + */ + private boolean isVariantFromSoftClips(final SlidingWindow.ConsensusType consensusType) { + final BaseAndQualsCounts baseAndQualsCounts = getBaseCounts(consensusType); + final int nSoftClippedBases = baseAndQualsCounts.nSoftclips(); + return nSoftClippedBases > 0 && nSoftClippedBases >= (baseAndQualsCounts.totalCount() - nSoftClippedBases); } /** @@ -287,9 +331,9 @@ public class HeaderElement { */ public ObjectArrayList getAlleles(final double minVariantPvalue, final double minVariantProportion) { // make sure we have bases at all - final int totalBaseCount = consensusBaseCounts.totalCount(); + final int totalBaseCount = totalCountForBothStrands(); if ( totalBaseCount == 0 ) - return new ObjectArrayList(0); + return new ObjectArrayList<>(0); // next, check for insertions; technically, the insertion count can be greater than totalBaseCount // (because of the way insertions are counted), so we need to account for that @@ -297,9 +341,9 @@ public class HeaderElement { return null; // finally, check for the bases themselves (including deletions) - final ObjectArrayList alleles = new ObjectArrayList(4); + final ObjectArrayList alleles = new ObjectArrayList<>(4); for ( final BaseIndex base : BaseIndex.values() ) { - final int baseCount = consensusBaseCounts.countOfBase(base); + final int baseCount = positiveConsensusBaseCounts.countOfBase(base) + negativeConsensusBaseCounts.countOfBase(base); if ( baseCount == 0 ) continue; @@ -320,7 +364,7 @@ public class HeaderElement { * @return true if there are significant softclips, false otherwise */ public boolean hasSignificantSoftclips(final double minVariantPvalue, final double minVariantProportion) { - return hasSignificantCount(consensusBaseCounts.nSoftclips(), consensusBaseCounts.totalCount(), minVariantPvalue, minVariantProportion); + return hasSignificantCount(positiveConsensusBaseCounts.nSoftclips() + negativeConsensusBaseCounts.nSoftclips(), totalCountForBothStrands(), minVariantPvalue, minVariantProportion); } /* diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index 5115a6777..e15c68774 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -59,7 +59,6 @@ import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.UnvalidatingGenomeLoc; import org.broadinstitute.sting.utils.collections.Pair; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator; import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -87,12 +86,10 @@ public class SlidingWindow { protected int downsampleCoverage; // Running consensus data - protected SyntheticRead runningConsensus; protected int consensusCounter; protected String consensusReadName; // Filtered Data Consensus data - protected SyntheticRead filteredDataConsensus; protected int filteredDataConsensusCounter; protected String filteredDataReadName; @@ -109,12 +106,12 @@ public class SlidingWindow { private static CompressionStash emptyRegions = new CompressionStash(); /** - * The types of synthetic reads to use in the finalizeAndAdd method + * The types of synthetic reads */ - private enum ConsensusType { - CONSENSUS, - FILTERED, - BOTH + protected enum ConsensusType { + POSITIVE_CONSENSUS, + NEGATIVE_CONSENSUS, + FILTERED } public int getStopLocation() { @@ -144,9 +141,9 @@ public class SlidingWindow { contextSize = 10; - this.windowHeader = new LinkedList(); + this.windowHeader = new LinkedList<>(); windowHeader.addFirst(new HeaderElement(startLocation)); - this.readsInWindow = new PriorityQueue(100, new Comparator() { + this.readsInWindow = new PriorityQueue<>(100, new Comparator() { @Override public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { return read1.getSoftEnd() - read2.getSoftEnd(); @@ -168,8 +165,8 @@ public class SlidingWindow { this.MIN_BASE_QUAL_TO_COUNT = minBaseQual; this.MIN_MAPPING_QUALITY = minMappingQuality; - this.windowHeader = new LinkedList(); - this.readsInWindow = new PriorityQueue(1000, new Comparator() { + this.windowHeader = new LinkedList<>(); + this.readsInWindow = new PriorityQueue<>(1000, new Comparator() { @Override public int compare(GATKSAMRecord read1, GATKSAMRecord read2) { return read1.getSoftEnd() - read2.getSoftEnd(); @@ -187,9 +184,6 @@ public class SlidingWindow { this.filteredDataConsensusCounter = 0; this.filteredDataReadName = "Filtered-" + windowNumber + "-"; - this.runningConsensus = null; - this.filteredDataConsensus = null; - this.downsampleStrategy = downsampleStrategy; this.hasIndelQualities = hasIndelQualities; } @@ -209,7 +203,9 @@ public class SlidingWindow { @Ensures("result != null") public CompressionStash addRead(GATKSAMRecord read) { addToHeader(windowHeader, read); // update the window header counts - readsInWindow.add(read); // add read to sliding reads + // no need to track low mapping quality reads + if ( read.getMappingQuality() >= MIN_MAPPING_QUALITY ) + readsInWindow.add(read); // add read to sliding reads return slideWindow(read.getUnclippedStart()); } @@ -296,7 +292,7 @@ public class SlidingWindow { } while (!readsInWindow.isEmpty() && readsInWindow.peek().getSoftEnd() < windowHeaderStartLocation) { - readsInWindow.poll(); + readsInWindow.poll(); } return regions; @@ -413,280 +409,83 @@ public class SlidingWindow { * * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus * - * @param header the window header + * @param header the header to use * @param start the first header index to add to consensus * @param end the first header index NOT TO add to consensus - * @param strandType the strandedness that the synthetic read should be represented as having + * @param consensusType the consensus type to use * @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated. */ @Requires({"start >= 0 && (end >= start || end == 0)"}) @Ensures("result != null") - protected ObjectArrayList addToSyntheticReads(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - final ObjectArrayList reads = new ObjectArrayList(); + protected ObjectArrayList addToSyntheticReads(final LinkedList header, final int start, final int end, final ConsensusType consensusType) { + final ObjectArrayList reads = new ObjectArrayList<>(); - if ( start < end ) { - final ListIterator headerElementIterator = header.listIterator(start); + SyntheticRead consensus = null; + final ListIterator headerElementIterator = header.listIterator(start); + boolean wasInConsensus = false; - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); + for ( int currentPosition = start; currentPosition < end; currentPosition++ ) { - HeaderElement headerElement = headerElementIterator.next(); + if ( ! headerElementIterator.hasNext() ) + throw new IllegalStateException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, windowHeader.size(), end)); + final HeaderElement headerElement = headerElementIterator.next(); - if (headerElement.hasConsensusData()) { + if ( headerElement.hasConsensusData(consensusType) ) { + wasInConsensus = true; - // find the end of the consecutive consensus data in the window - final int endOfConsensus = findNextNonConsensusElement(header, start, end); - if (endOfConsensus <= start) - throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start)); + // add to running consensus + if ( consensus == null ) + consensus = createNewConsensus(consensusType, headerElement.getLocation()); - // add to running consensus and recurse - addToRunningConsensus(header, start, endOfConsensus, strandType); - reads.addAll(addToSyntheticReads(header, endOfConsensus, end, strandType)); + genericAddBaseToConsensus(consensus, headerElement.getBaseCounts(consensusType)); } else { // add any outstanding consensus data - reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS)); + if ( wasInConsensus ) { + reads.addAll(finalizeAndAdd(consensus, consensusType)); + consensus = null; + } - // find the end of the consecutive empty data in the window - final int endOfEmptyData = findNextConsensusElement(header, start, end); - if (endOfEmptyData <= start) - throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); - - // recurse out of the empty region - reads.addAll(addToSyntheticReads(header, endOfEmptyData, end, strandType)); + wasInConsensus = false; } } + // add any outstanding consensus data + reads.addAll(finalizeAndAdd(consensus, consensusType)); + return reads; } + private SyntheticRead createNewConsensus(final ConsensusType consensusType, final int start) { + if ( consensusType == ConsensusType.FILTERED ) + return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, start, hasIndelQualities, SyntheticRead.StrandType.STRANDLESS); + return new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, start, hasIndelQualities, consensusType == ConsensusType.POSITIVE_CONSENSUS ? SyntheticRead.StrandType.POSITIVE : SyntheticRead.StrandType.NEGATIVE); + } + /** - * Finalizes one or more synthetic reads. + * Finalizes a synthetic read. * + * @param consensus the consensus to finalize * @param type the synthetic reads you want to close - * @return a possibly null list of GATKSAMRecords generated by finalizing the synthetic reads + * @return a possibly empty list of GATKSAMRecords generated by finalizing the synthetic reads */ - private ObjectArrayList finalizeAndAdd(final ConsensusType type) { + private ObjectArrayList finalizeAndAdd(final SyntheticRead consensus, final ConsensusType type) { - final ObjectArrayList list = new ObjectArrayList(); + final ObjectArrayList list = new ObjectArrayList<>(); - if ( type == ConsensusType.CONSENSUS || type == ConsensusType.BOTH ) { - final GATKSAMRecord read = finalizeRunningConsensus(); - if ( read != null ) - list.add(read); - } + final GATKSAMRecord read; + if ( type == ConsensusType.FILTERED ) + read = finalizeFilteredDataConsensus(consensus); + else + read = finalizeRunningConsensus(consensus); - if ( type == ConsensusType.FILTERED || type == ConsensusType.BOTH ) { - final GATKSAMRecord read = finalizeFilteredDataConsensus(); - if ( read != null ) - list.add(read); - } + if ( read != null ) + list.add(read); return list; } - /** - * Looks for the next position without consensus data - * - * @param header the header to check - * @param start beginning of the filtered region - * @param upTo limit to search for another consensus element - * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position - */ - private int findNextNonConsensusElement(final LinkedList header, final int start, final int upTo) { - final Iterator headerElementIterator = header.listIterator(start); - int index = start; - while (index < upTo) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("There are no more header elements in this window"); - - if (!headerElementIterator.next().hasConsensusData()) - break; - index++; - } - return index; - } - - /** - * Looks for the next position witho consensus data - * - * @param header the header to check - * @param start beginning of the filtered region - * @param upTo limit to search for another consensus element - * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position - */ - private int findNextConsensusElement(final LinkedList header, final int start, final int upTo) { - final Iterator headerElementIterator = header.listIterator(start); - int index = start; - while (index < upTo) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("There are no more header elements in this window"); - - if (headerElementIterator.next().hasConsensusData()) - break; - index++; - } - return index; - } - - /** - * Adds bases to the filtered data synthetic read. - * - * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData - * bases. - * - * @param header the window header - * @param start the first header index to add to consensus - * @param end the first header index NOT TO add to consensus - * @param strandType the strandedness that the synthetic read should be represented as having - */ - @Requires({"start >= 0 && (end >= start || end == 0)"}) - private void addToRunningConsensus(final LinkedList header, final int start, final int end, final SyntheticRead.StrandType strandType) { - if (runningConsensus == null) - runningConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, header.get(start).getLocation(), hasIndelQualities, strandType); - - final Iterator headerElementIterator = header.listIterator(start); - - for (int index = start; index < end; index++) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist"); - - final HeaderElement headerElement = headerElementIterator.next(); - if (!headerElement.hasConsensusData()) - throw new ReviewedStingException("No CONSENSUS data in " + index); - - genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts()); - } - } - - /** - * Adds bases to the running filtered data accordingly - * - * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus - * - * @param header the window header - * @param start the first header index to add to consensus - * @param end the first header index NOT TO add to consensus - * @return a non-null list of consensus reads generated by this call. Empty list if no consensus was generated. - */ - @Requires({"start >= 0 && (end >= start || end == 0)"}) - @Ensures("result != null") - protected ObjectArrayList addToFilteredReads(final LinkedList header, final int start, final int end) { - final ObjectArrayList reads = new ObjectArrayList(); - - if ( start < end ) { - final ListIterator headerElementIterator = header.listIterator(start); - - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d - %d / %d", start, header.size(), end)); - - HeaderElement headerElement = headerElementIterator.next(); - - if (headerElement.hasFilteredData()) { - - // find the end of the consecutive filtered data in the window - final int endOfFiltered = findNextNonFilteredElement(header, start, end); - if (endOfFiltered <= start) - throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFiltered, start)); - - // add to running filtered consensus and recurse - addToFilteredData(header, start, endOfFiltered); - reads.addAll(addToFilteredReads(header, endOfFiltered, end)); - - } else { - - // add any outstanding filtered data - reads.addAll(finalizeAndAdd(ConsensusType.FILTERED)); - - // find the end of the consecutive empty data in the window - final int endOfEmptyData = findNextFilteredElement(header, start, end); - if (endOfEmptyData <= start) - throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start)); - - // recurse out of the empty region - reads.addAll(addToFilteredReads(header, endOfEmptyData, end)); - } - } - - return reads; - } - - /** - * Looks for the next position without consensus data - * - * @param header the header to check - * @param start beginning of the filtered region - * @param upTo limit to search for another consensus element - * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position - */ - private int findNextNonFilteredElement(final LinkedList header, final int start, final int upTo) { - final Iterator headerElementIterator = header.listIterator(start); - int index = start; - while (index < upTo) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("There are no more header elements in this window"); - - if (!headerElementIterator.next().hasFilteredData()) - break; - index++; - } - return index; - } - - /** - * Looks for the next position witho consensus data - * - * @param header the header to check - * @param start beginning of the filtered region - * @param upTo limit to search for another consensus element - * @return next position in local coordinates (relative to the windowHeader) with consensus data; otherwise, the start position - */ - private int findNextFilteredElement(final LinkedList header, final int start, final int upTo) { - final Iterator headerElementIterator = header.listIterator(start); - int index = start; - while (index < upTo) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("There are no more header elements in this window"); - - if (headerElementIterator.next().hasFilteredData()) - break; - index++; - } - return index; - } - - - /** - * Adds bases to the filtered data synthetic read. - * - * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData bases. - * - * @param header the window header - * @param start the first header index to add to consensus - * @param end the first header index NOT TO add to consensus - */ - @Requires({"start >= 0 && (end >= start || end == 0)"}) - @Ensures("result != null") - private void addToFilteredData(final LinkedList header, final int start, final int end) { - - if (filteredDataConsensus == null) - filteredDataConsensus = new SyntheticRead(samHeader, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, header.get(start).getLocation(), hasIndelQualities, SyntheticRead.StrandType.STRANDLESS); - - ListIterator headerElementIterator = header.listIterator(start); - for (int index = start; index < end; index++) { - if (!headerElementIterator.hasNext()) - throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist"); - - final HeaderElement headerElement = headerElementIterator.next(); - - if (!headerElement.hasFilteredData()) - throw new ReviewedStingException("No filtered data in " + index); - - genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts()); - } - } - /** * Generic accessor to add base and qualities to a synthetic read * @@ -734,7 +533,7 @@ public class SlidingWindow { final int refStart = windowHeader.get(start).getLocation(); final int refStop = windowHeader.get(stop).getLocation(); - final ObjectList toRemove = new ObjectArrayList(); + final ObjectList toRemove = new ObjectArrayList<>(); for ( final GATKSAMRecord read : readsInWindow ) { if ( read.getSoftStart() <= refStop ) { if ( read.getAlignmentEnd() >= refStart ) { @@ -814,7 +613,7 @@ public class SlidingWindow { continue; if ( headerElement.hasSignificantSoftclips(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) || - headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) > 1 ) + headerElement.getNumberOfBaseAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) != 1 ) return true; } @@ -836,13 +635,26 @@ public class SlidingWindow { final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); - result.reads.addAll(addToSyntheticReads(windowHeader, 0, allReads.stopPerformed + 1, SyntheticRead.StrandType.STRANDLESS)); - result.reads.addAll(addToFilteredReads(windowHeader, 0, allReads.stopPerformed + 1)); - result.reads.addAll(finalizeAndAdd(ConsensusType.BOTH)); + result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1)); return result; // finalized reads will be downsampled if necessary } + /** + * Adds reads for all possible strands (positive, negative, filtered) from the global windowHeader object + * + * @param start the start position (inclusive) + * @param end the end position (exclusive) + * @return non-null but possibly empty array list with reduced reads + */ + private ObjectArrayList addAllSyntheticReadTypes(final int start, final int end) { + final ObjectArrayList reads = new ObjectArrayList<>(); + reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.POSITIVE_CONSENSUS)); + reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.NEGATIVE_CONSENSUS)); + reads.addAll(addToSyntheticReads(windowHeader, start, end, ConsensusType.FILTERED)); + return reads; + } + /* * @see #closeVariantRegions(CompressionStash, ObjectSortedSet, boolean) with forceCloseFullRegions set to false */ @@ -851,7 +663,7 @@ public class SlidingWindow { } private static final class CloseVariantRegionResult { - final private ObjectList reads = new ObjectArrayList(); + final private ObjectList reads = new ObjectArrayList<>(); private int stopPerformed; public CloseVariantRegionResult(final int stopPerformed) { this.stopPerformed = stopPerformed; } @@ -866,7 +678,7 @@ public class SlidingWindow { * @return a non-null set of reduced reads representing the finalized regions */ public ObjectSet closeVariantRegions(final CompressionStash regions, final ObjectSortedSet knownSnpPositions, final boolean forceCloseFullRegions) { - final ObjectAVLTreeSet allReads = new ObjectAVLTreeSet(new AlignmentStartWithNoTiesComparator()); + final ObjectAVLTreeSet allReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator()); if ( !regions.isEmpty() ) { int windowHeaderStart = getStartLocation(windowHeader); @@ -945,9 +757,9 @@ public class SlidingWindow { if (downsampleCoverage >= nReads) return allReads; - ReservoirDownsampler downsampler = new ReservoirDownsampler(downsampleCoverage); + ReservoirDownsampler downsampler = new ReservoirDownsampler<>(downsampleCoverage); downsampler.submit(allReads); - return new ObjectArrayList(downsampler.consumeFinalizedItems()); + return new ObjectArrayList<>(downsampler.consumeFinalizedItems()); } @@ -962,7 +774,7 @@ public class SlidingWindow { @Ensures("result != null") public Pair, CompressionStash> close(final ObjectSortedSet knownSnpPositions) { // mark variant regions - ObjectSet finalizedReads = new ObjectAVLTreeSet(new AlignmentStartWithNoTiesComparator()); + ObjectSet finalizedReads = new ObjectAVLTreeSet<>(new AlignmentStartWithNoTiesComparator()); CompressionStash regions = new CompressionStash(); if (!windowHeader.isEmpty()) { @@ -970,48 +782,45 @@ public class SlidingWindow { regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), true); finalizedReads = closeVariantRegions(regions, knownSnpPositions, true); - if (!windowHeader.isEmpty()) { - finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), SyntheticRead.StrandType.STRANDLESS)); - finalizedReads.addAll(addToFilteredReads(windowHeader, 0, windowHeader.size())); - finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH)); // if it ended in running consensus, finish it up - } + if (!windowHeader.isEmpty()) + finalizedReads.addAll(addAllSyntheticReadTypes(0, windowHeader.size())); } - return new Pair, CompressionStash>(finalizedReads, regions); + return new Pair<>(finalizedReads, regions); } /** * generates the SAM record for the running consensus read and resets it (to null) * + * @param runningConsensus the consensus to finalize * @return the read contained in the running consensus or null */ - protected GATKSAMRecord finalizeRunningConsensus() { + protected GATKSAMRecord finalizeRunningConsensus(final SyntheticRead runningConsensus) { GATKSAMRecord finalizedRead = null; - if (runningConsensus != null) { - if (runningConsensus.size() > 0) + + if ( runningConsensus != null ) { + if ( runningConsensus.size() > 0 ) finalizedRead = runningConsensus.close(); else consensusCounter--; - - runningConsensus = null; } + return finalizedRead; } /** * generates the SAM record for the filtered data consensus and resets it (to null) * + * @param filteredDataConsensus the consensus to finalize * @return the read contained in the running consensus or null */ - protected GATKSAMRecord finalizeFilteredDataConsensus() { + protected GATKSAMRecord finalizeFilteredDataConsensus(final SyntheticRead filteredDataConsensus) { GATKSAMRecord finalizedRead = null; if (filteredDataConsensus != null) { if (filteredDataConsensus.size() > 0) finalizedRead = filteredDataConsensus.close(); else filteredDataConsensusCounter--; - - filteredDataConsensus = null; } return finalizedRead; } @@ -1021,7 +830,7 @@ public class SlidingWindow { private final static class SingleStrandConsensusData { final HeaderElementList consensus = new HeaderElementList(); - final ObjectList reads = new ObjectArrayList(); + final ObjectList reads = new ObjectArrayList<>(); } /** @@ -1042,6 +851,7 @@ public class SlidingWindow { // initialize the mapping from base (allele) to header final Byte2IntMap alleleHeaderMap = new Byte2IntArrayMap(2); + alleleHeaderMap.defaultReturnValue(-1); for ( final BaseIndex allele : windowHeader.get(hetRefPosition).getAlleles(MIN_ALT_PVALUE_TO_TRIGGER_VARIANT, MIN_ALT_PROPORTION_TO_TRIGGER_VARIANT) ) { final int currentIndex = alleleHeaderMap.size(); if ( currentIndex > 1 ) @@ -1056,7 +866,7 @@ public class SlidingWindow { if ( alleleHeaderMap.size() != 2 ) throw new IllegalStateException("We expected to see 2 alleles when creating a diploid consensus but saw " + alleleHeaderMap.size()); - final ObjectList readsToRemove = new ObjectArrayList(); + final ObjectList readsToRemove = new ObjectArrayList<>(); for ( final GATKSAMRecord read : readsInWindow ) { @@ -1081,10 +891,10 @@ public class SlidingWindow { final byte base = read.getReadBases()[readPosOfHet]; // check which allele this read represents - final Integer allele = alleleHeaderMap.get(base); + final int allele = alleleHeaderMap.get(base); // ignore the read if it represents a base that's not part of the consensus - if ( allele != null ) { + if ( allele != -1 ) { // add to the appropriate polyploid header final SingleStrandConsensusData header = read.getReadNegativeStrandFlag() ? headersNegStrand[allele] : headersPosStrand[allele]; header.reads.add(read); @@ -1096,7 +906,7 @@ public class SlidingWindow { readsInWindow.remove(read); // create the polyploid synthetic reads if we can - final ObjectList hetReads = new ObjectArrayList(); + final ObjectList hetReads = new ObjectArrayList<>(); // sanity check that no new "variant region" exists on just a single consensus strand due to softclips // or multi-allelic sites now that we've broken everything out into their component parts. if one does @@ -1125,10 +935,12 @@ public class SlidingWindow { * @param result list in which to store results */ protected void finalizeHetConsensus(final LinkedList header, final boolean isNegativeStrand, final ObjectList result) { - if ( header.size() > 0 ) - result.addAll(addToSyntheticReads(header, 0, header.size(), isNegativeStrand ? SyntheticRead.StrandType.NEGATIVE : SyntheticRead.StrandType.POSITIVE)); - if ( runningConsensus != null ) - result.add(finalizeRunningConsensus()); + if ( header.size() > 0 ) { + if ( isNegativeStrand ) + result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.NEGATIVE_CONSENSUS)); + else + result.addAll(addToSyntheticReads(header, 0, header.size(), ConsensusType.POSITIVE_CONSENSUS)); + } } private void addToHeader(LinkedList header, GATKSAMRecord read) { @@ -1222,6 +1034,7 @@ public class SlidingWindow { final Iterator headerElementIterator = header.listIterator(startIndex); final byte mappingQuality = (byte) read.getMappingQuality(); + final boolean isNegativeStrand = read.getReadNegativeStrandFlag(); // iterator variables int locationIndex = startIndex; @@ -1254,9 +1067,9 @@ public class SlidingWindow { for ( int i = 0; i < nDeletionBases; i++ ) { headerElement = headerElementIterator.next(); if (removeRead) - headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); + headerElement.removeBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand); else - headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false); + headerElement.addBase(BaseUtils.Base.D.base, mappingQuality, mappingQuality, mappingQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false, isNegativeStrand); } locationIndex += nDeletionBases; break; @@ -1279,9 +1092,9 @@ public class SlidingWindow { final byte deletionQuality = readHasIndelQuals ? deletionQuals[readBaseIndex] : -1; if ( removeRead ) - headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); + headerElement.removeBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand); else - headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip); + headerElement.addBase(readBases[readBaseIndex], readQuals[readBaseIndex], insertionQuality, deletionQuality, mappingQuality, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, isSoftClip, isNegativeStrand); readBaseIndex++; } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java index 13daee8c9..25f6f874d 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java @@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -133,6 +132,11 @@ public class AssessReducedQuals extends LocusWalker implem return reportLocus ? ref.getLocus() : null; } + /** + * Get the quals separated by version and strand + * @param readPileup the pileup + * @return 2x2 array with sum of quals separated by version in 1st dimension and strand in the 2nd + */ private int[] getPileupQuals(final ReadBackedPileup readPileup) { final int[] quals = new int[2]; diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java index 32791dd97..4f5b7477c 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElementUnitTest.java @@ -97,15 +97,15 @@ public class HeaderElementUnitTest extends BaseTest { HeaderElement headerElement = new HeaderElement(1000, 0); // first test that if we add and then remove it, we have no data - headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip); + headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false); headerElement.addInsertionToTheRight(); - headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip); + headerElement.removeBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false); headerElement.removeInsertionToTheRight(); testHeaderIsEmpty(headerElement); // now, test that the data was added as expected for ( int i = 0; i < 10; i++ ) - headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip); + headerElement.addBase(test.base, test.baseQual, test.insQual, test.delQual, test.MQ, minBaseQual, minMappingQual, test.isClip, false); testHeaderData(headerElement, test); // test the insertion adding functionality @@ -115,8 +115,8 @@ public class HeaderElementUnitTest extends BaseTest { } private void testHeaderIsEmpty(final HeaderElement headerElement) { - Assert.assertFalse(headerElement.hasConsensusData()); - Assert.assertFalse(headerElement.hasFilteredData()); + Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS)); + Assert.assertFalse(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED)); Assert.assertFalse(headerElement.hasInsertionToTheRight()); Assert.assertTrue(headerElement.isEmpty()); } @@ -125,9 +125,9 @@ public class HeaderElementUnitTest extends BaseTest { Assert.assertEquals(headerElement.isVariantFromSoftClips(), test.isClip); Assert.assertFalse(headerElement.isEmpty()); Assert.assertFalse(headerElement.hasInsertionToTheRight()); - Assert.assertEquals(headerElement.hasConsensusData(), test.MQ >= minMappingQual); - Assert.assertEquals(headerElement.hasFilteredData(), test.MQ < minMappingQual); - Assert.assertEquals(headerElement.hasConsensusData() ? headerElement.getConsensusBaseCounts().getRMS() : headerElement.getFilteredBaseCounts().getRMS(), (double)test.MQ); + Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS), test.MQ >= minMappingQual); + Assert.assertEquals(headerElement.hasConsensusData(SlidingWindow.ConsensusType.FILTERED), test.MQ < minMappingQual); + Assert.assertEquals(headerElement.getBaseCounts(headerElement.hasConsensusData(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS) ? SlidingWindow.ConsensusType.POSITIVE_CONSENSUS : SlidingWindow.ConsensusType.FILTERED).getRMS(), (double)test.MQ); Assert.assertFalse(headerElement.isVariantFromMismatches(0.05, 0.05)); Assert.assertEquals(headerElement.isVariant(0.05, 0.05, 0.05), test.isClip); } @@ -145,7 +145,7 @@ public class HeaderElementUnitTest extends BaseTest { @DataProvider(name = "alleles") public Object[][] createAllelesData() { - List tests = new ArrayList(); + List tests = new ArrayList<>(); final int[] counts = new int[]{ 0, 5, 10, 15, 20 }; final double [] pvalues = new double[]{ 0.0, 0.01, 0.05, 0.20, 1.0 }; @@ -174,7 +174,7 @@ public class HeaderElementUnitTest extends BaseTest { for ( int i = 0; i < test.counts.length; i++ ) { final BaseIndex base = BaseIndex.values()[i]; for ( int j = 0; j < test.counts[i]; j++ ) - headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false); + headerElement.addBase(base.b, byte20, byte10, byte10, byte20, minBaseQual, minMappingQual, false, false); } final int nAllelesSeen = headerElement.getNumberOfBaseAlleles(test.pvalue, test.pvalue); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 4fbbe1d0c..9942821e1 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -158,44 +158,44 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testDefaultCompression() { - RRTest("testDefaultCompression ", L, "fa1cffc4539e0c20b818a11da5dba5b9", false); + RRTest("testDefaultCompression ", L, "0e503f7b79ace4c89d74f0943a0de1c0", false); } @Test(enabled = true) public void testDefaultCompressionWithKnowns() { - RRTest("testDefaultCompressionWithKnowns ", L, "d1b5fbc402810d9cdc020bb3503f1325", true); + RRTest("testDefaultCompressionWithKnowns ", L, "6db7ce2733d006f8bd61c42a40d23728", true); } private final String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110"; @Test(enabled = true) public void testMultipleIntervals() { - RRTest("testMultipleIntervals ", intervals, "7e9dcd157ad742d4ebae7e56bc4af663", false); + RRTest("testMultipleIntervals ", intervals, "207f2c6d3db956e19412a45a231ca367", false, "043b2838c27d8f9580379b54c18ff40a"); } @Test(enabled = true) public void testMultipleIntervalsWithKnowns() { - RRTest("testMultipleIntervalsWithKnowns ", intervals, "dbb1e95e1bcad956701142afac763717", true); + RRTest("testMultipleIntervalsWithKnowns ", intervals, "f3b11a8a7673b301e27137936fafc6b6", true, "043b2838c27d8f9580379b54c18ff40a"); } @Test(enabled = true) public void testHighCompression() { - RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "8f8fd1a53fa0789116f45e4cf2625906", false); + RRTest("testHighCompression ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "dcc3716b3665aa1c2dbe6b22d6534aef", false); } @Test(enabled = true) public void testHighCompressionWithKnowns() { - RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "52fd2a77802a4677b604abb18e15d96a", true); + RRTest("testHighCompressionWithKnowns ", " -cs 10 -min_pvalue 0.3 -minvar 0.3 -mindel 0.3 " + L, "97ae655bf0e483ea227b1aac67ced024", true); } @Test(enabled = true) public void testLowCompression() { - RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "79c6543d5ce84ebc2ca74404498edbd1", false); + RRTest("testLowCompression ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "a1377eb922e0b09a03a280b691b0b3ff", false); } @Test(enabled = true) public void testLowCompressionWithKnowns() { - RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "271aec358b309603291a974b5ba3bd60", true); + RRTest("testLowCompressionWithKnowns ", " -cs 30 -min_pvalue 0.001 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "bd7c5b0b210694f364ca6a41f5b89870", true); } @Test(enabled = true) @@ -207,7 +207,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testIndelCompression() { - final String md5 = "d20e6012300898a0315c795cab7583d8"; + final String md5 = "9c9305eda5e4e7f22246ec8a4b242c97"; RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, false); RRTest("testIndelCompressionWithKnowns ", " -cs 50 -L 20:10,100,500-10,100,600 ", md5, true); } @@ -215,27 +215,25 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testFilteredDeletionCompression() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s "; - executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("e5da09662708f562c0c617ba73cf4763")), "4f916da29d91852077f0a2fdbdd2c7f6"); + executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("1bda512143be1016dfaca1f7020b6398")), "4f916da29d91852077f0a2fdbdd2c7f6"); } - private static final String COREDUCTION_QUALS_TEST_MD5 = "26d84a2bd549a01a63fcebf8847a1b7d"; - @Test(enabled = true) public void testCoReduction() { String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; - executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5f4d2c1d9c010dfd6865aeba7d0336fe")), COREDUCTION_QUALS_TEST_MD5); + executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866"))); } @Test(enabled = true) public void testCoReductionWithKnowns() { String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; - executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("ca48dd972bf57595c691972c0f887cb4")), COREDUCTION_QUALS_TEST_MD5); + executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0"))); } @Test(enabled = true) public void testInsertionsAtEdgeOfConsensus() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM) + " -o %s "; - executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("760500a5b036b987f84099f45f26a804"))); + executeTest("testInsertionsAtEdgeOfConsensus", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("c10653a8c21fb32b5cf580d3704b0edd"))); } /** @@ -249,7 +247,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testAddingReadAfterTailingTheStash() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s "; - executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("67f8a3a647f8ec5212104bdaafd8c862")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); + executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("fddbec29d0945afbbb34b42994614c15")), "3eab32c215ba68e75efd5ab7e9f7a2e7"); } /** @@ -260,7 +258,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("4f0ef477c0417d1eb602b323474ef377"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9"))); } /** @@ -270,7 +268,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testReadOffContig() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s "; - executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("0ce693b4ff925998867664e4099f3248"))); + executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("595e5812c37189930cae93e45765def4"))); } /** @@ -280,7 +278,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testPairedReadsInVariantRegion() { String base = String.format("-T ReduceReads -npt -R %s -I %s ", hg19Reference, BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM) + " -o %s --downsample_coverage 250 -dcov 50 "; - executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7e7b358443827ca239db3b98f299aec6")), "2af063d1bd3c322b03405dbb3ecf59a9"); + executeTest("testPairedReadsInVariantRegion", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("b005727119eee27995705959a637085e")), "2af063d1bd3c322b03405dbb3ecf59a9"); } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index bd0a8933c..3534284cd 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -318,7 +318,7 @@ public class SlidingWindowUnitTest extends BaseTest { this.expectedNumberOfReads = expectedNumberOfReads; this.expectedNumberOfReadsWithHetCompression = expectedNumberOfReadsWithHetCompression; this.expectedNumberOfReadsAtDeepCoverage = expectedNumberOfReadsAtDeepCoverage; - this.description = String.format("%d %d %d", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage); + this.description = String.format("%d %d %d %b %b", expectedNumberOfReads, expectedNumberOfReadsWithHetCompression, expectedNumberOfReadsAtDeepCoverage, readsShouldBeLowQuality, variantBaseShouldBeLowQuality); // first, add the basic reads to the collection myReads.addAll(basicReads); @@ -390,40 +390,40 @@ public class SlidingWindowUnitTest extends BaseTest { List tests = new ArrayList(); // test high quality reads and bases - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, false, 1, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, false, 9, 6, 5 + DEEP_COVERAGE_ITERATIONS)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, false, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, false, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc320), false, false, 11, 10, 4 + (6 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, false, 11, 8, 7 + DEEP_COVERAGE_ITERATIONS)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, false, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, false, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc320), false, false, 13, 12, 6 + (6 * DEEP_COVERAGE_ITERATIONS))}); // test low quality reads - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), true, false, 1, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), true, false, 2, 2, 2)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), true, false, 2, 2, 2)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), true, false, 2, 2, 2)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), true, false, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), true, false, 3, 3, 3)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), true, false, 3, 3, 3)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), true, false, 3, 3, 3)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), true, false, 3, 3, 3)}); // test low quality bases - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, true, 1, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, true, 1, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, true, 1, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, true, 1, 1, 1)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(), false, true, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), false, true, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), false, true, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), false, true, 2, 2, 2)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), false, true, 2, 2, 2)}); // test mixture - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), true, false, 2, 2, 2)}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), false, true, 1, 1, 1)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), true, false, 3, 3, 3)}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc1100), false, true, 2, 2, 2)}); // test I/D operators - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.D, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.D, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.D, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.I, 9, 9, 2 + (7 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.I, 10, 10, 2 + (8 * DEEP_COVERAGE_ITERATIONS))}); - tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.I, 11, 11, 2 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.D, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.D, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.D, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290), CigarOperator.I, 11, 11, 4 + (7 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc295), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc309), CigarOperator.I, 12, 12, 4 + (8 * DEEP_COVERAGE_ITERATIONS))}); + tests.add(new Object[]{new ConsensusCreationTest(Arrays.asList(loc290, loc310), CigarOperator.I, 13, 13, 4 + (9 * DEEP_COVERAGE_ITERATIONS))}); return tests.toArray(new Object[][]{}); } @@ -517,6 +517,39 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.getFirst().size(), totalNumReads); // no compression at all } + @Test + public void testConsensusCreationForInsertions() { + + final int totalNumReads = 7; + final ObjectList myReads = new ObjectArrayList<>(totalNumReads); + + // add reads, one with a SNP and one with a SNP and insertion + for ( int i = 0; i < totalNumReads; i++ ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "basicRead" + i, 0, globalStartPosition, readLength); + read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); + read.setMappingQuality(30); + read.setReadNegativeStrandFlag(false); + + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + if ( i < 2 ) + bases[20] = 'C'; + if ( i == 0 ) + bases[80] = 'C'; + read.setReadBases(bases); + + if ( i == 0 ) + read.setCigarString("80M1I19M"); + + myReads.add(read); + } + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.1, 0.05, 0.05, 20, 20, 100, ReduceReads.DownsampleStrategy.Normal, false); + for ( final GATKSAMRecord read : myReads ) + slidingWindow.addRead(read); + final Pair, CompressionStash> result = slidingWindow.close(null); + Assert.assertEquals(result.getFirst().size(), 3); // no compression at all for SNPs + } + @Test public void testAddingReadPairWithSameCoordinates() { final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10); @@ -739,21 +772,22 @@ public class SlidingWindowUnitTest extends BaseTest { read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); read.setMappingQuality(30); + read.setReadNegativeStrandFlag(false); // add the read final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, 10, ReduceReads.DownsampleStrategy.Normal, false); slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, false, start); for ( int i = 0; i < start; i++ ) - Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); + Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0); for ( int i = 0; i < readLength; i++ ) - Assert.assertEquals(windowHeader.get(start + i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 1); + Assert.assertEquals(windowHeader.get(start + i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 1); for ( int i = start + readLength; i < currentHeaderLength; i++ ) - Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); + Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0); // now remove the read slidingWindow.actuallyUpdateHeaderForRead(windowHeader, read, true, start); for ( int i = 0; i < currentHeaderLength; i++ ) - Assert.assertEquals(windowHeader.get(i).getConsensusBaseCounts().countOfBase(BaseUtils.Base.A.base), 0); + Assert.assertEquals(windowHeader.get(i).getBaseCounts(SlidingWindow.ConsensusType.POSITIVE_CONSENSUS).countOfBase(BaseUtils.Base.A.base), 0); } ////////////////////////////////////////////////////////////////////////////////// From 71eb944e629ea2c9bf37ff9a2b59f7c3988c1fcf Mon Sep 17 00:00:00 2001 From: sathibault Date: Thu, 25 Jul 2013 14:19:50 -0500 Subject: [PATCH 031/172] Adding CnyPairHMMUnitTest --- .../sting/utils/pairhmm/CnyPairHMM.java | 2 +- .../utils/pairhmm/CnyPairHMMUnitTest.java | 101 ++++++++++++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java index 8353d3282..0afd4afe2 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/CnyPairHMM.java @@ -64,7 +64,7 @@ public final class CnyPairHMM extends PairHMM implements BatchPairHMM { public List haplotypes; }; - private static class ResultQueue { + public static class ResultQueue { private int offset; private List batchResults; diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java new file mode 100644 index 000000000..bdc06a6f0 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java @@ -0,0 +1,101 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import org.broadinstitute.sting.BaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +public class CnyPairHMMUnitTest extends BaseTest { + private native void puts(String text); + + @Test(enabled = true) + public void testResultQueue() { + final double[] row1 = new double[] { 4.5, 53.1, 6.4 }; + final double[] row2 = new double[] { 1.0, 5.9, 6.9, 6.1, 19.8 }; + final double[] row3 = new double[] { 10.4, 9.101, 89.5, 9.8}; + final double[] row4 = new double[] { 7.3, 1.4, 5.67, 56.32 }; + CnyPairHMM.ResultQueue queue = new CnyPairHMM.ResultQueue(); + + // Test inter-mixed push/pop operations produce the correct output + queue.push(row1); + queue.push(row2); + + for (int i=0; i Date: Thu, 25 Jul 2013 13:22:49 -0400 Subject: [PATCH 032/172] Adding LibraryReadFilter. --Moving LibraryReadFilter which has been part of Mutect into gatk public. --Added an additional check for null values. --- .../sting/gatk/filters/LibraryReadFilter.java | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/filters/LibraryReadFilter.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/LibraryReadFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/LibraryReadFilter.java new file mode 100644 index 000000000..39bcb96e1 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/LibraryReadFilter.java @@ -0,0 +1,49 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.filters; + +import net.sf.samtools.SAMReadGroupRecord; +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.commandline.Argument; +import org.broadinstitute.sting.gatk.filters.ReadFilter; + +/** + * Only use reads from the specified library + * + * @author kcibul + * @since Aug 15, 2012 + * + */ + +public class LibraryReadFilter extends ReadFilter { + @Argument(fullName = "library", shortName = "library", doc="The name of the library to keep, filtering out all others", required=true) + private String LIBRARY_TO_KEEP = null; + + public boolean filterOut( final SAMRecord read ) { + final SAMReadGroupRecord readGroup = read.getReadGroup(); + return ( readGroup == null || readGroup.getLibrary() == null || !readGroup.getLibrary().equals( LIBRARY_TO_KEEP ) ); + } +} From 8c205dda1b2a3b98d8107c2c494a28fd66ed9fc9 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Thu, 25 Jul 2013 12:47:52 -0400 Subject: [PATCH 033/172] Automatically order the annotation dimensions in the VQSR by their standard deviation instead of the order they were specified on the command line. --- .../VariantDataManager.java | 91 ++++++++++++++++--- .../VariantDataManagerUnitTest.java | 71 +++++++++++++++ ...ntRecalibrationWalkersIntegrationTest.java | 24 ++--- .../sting/utils/pairhmm/PairHMM.java | 2 +- 4 files changed, 160 insertions(+), 28 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManagerUnitTest.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 40032a886..d3be3de9e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -46,6 +46,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; +import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -71,20 +72,20 @@ import java.util.*; public class VariantDataManager { private ExpandingArrayList data; - private final double[] meanVector; - private final double[] varianceVector; // this is really the standard deviation - public final List annotationKeys; + private double[] meanVector; + private double[] varianceVector; // this is really the standard deviation + public List annotationKeys; private final VariantRecalibratorArgumentCollection VRAC; protected final static Logger logger = Logger.getLogger(VariantDataManager.class); protected final List trainingSets; public VariantDataManager( final List annotationKeys, final VariantRecalibratorArgumentCollection VRAC ) { this.data = null; - this.annotationKeys = new ArrayList( annotationKeys ); + this.annotationKeys = new ArrayList<>( annotationKeys ); this.VRAC = VRAC; meanVector = new double[this.annotationKeys.size()]; varianceVector = new double[this.annotationKeys.size()]; - trainingSets = new ArrayList(); + trainingSets = new ArrayList<>(); } public void setData( final ExpandingArrayList data ) { @@ -125,6 +126,73 @@ public class VariantDataManager { } datum.failingSTDThreshold = remove; } + + // re-order the data by increasing standard deviation so that the results don't depend on the order things were specified on the command line + // standard deviation over the training points is used as a simple proxy for information content, perhaps there is a better thing to use here + final List theOrder = calculateSortOrder(varianceVector); + annotationKeys = reorderList(annotationKeys, theOrder); + varianceVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(varianceVector), theOrder)); + meanVector = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(meanVector), theOrder)); + for( final VariantDatum datum : data ) { + datum.annotations = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.annotations), theOrder)); + datum.isNull = ArrayUtils.toPrimitive(reorderArray(ArrayUtils.toObject(datum.isNull), theOrder)); + } + } + + /** + * Get a list of indices which give the ascending sort order of the data array + * @param data the data to consider + * @return a non-null list of integers with length matching the length of the input array + */ + protected List calculateSortOrder(final double[] data) { + final List theOrder = new ArrayList<>(data.length); + final List sortedData = new ArrayList<>(data.length); + int count = 0; + for( final double d : data ) { + sortedData.add(new MyStandardDeviation(d, count++)); + } + Collections.sort(sortedData); // sort the data in ascending order + for( final MyStandardDeviation d : sortedData ) { + theOrder.add(d.originalIndex); // read off the sort order by looking at the index field + } + return theOrder; + } + + // small private class to assist in reading off the new ordering of the standard deviation array + private class MyStandardDeviation implements Comparable { + final Double standardDeviation; + final int originalIndex; + + public MyStandardDeviation( final double standardDeviation, final int originalIndex ) { + this.standardDeviation = standardDeviation; + this.originalIndex = originalIndex; + } + + @Override + public int compareTo(final MyStandardDeviation other) { + return standardDeviation.compareTo(other.standardDeviation); + } + } + + /** + * Convenience connector method to work with arrays instead of lists. See ##reorderList## + */ + private T[] reorderArray(final T[] data, final List order) { + return reorderList(Arrays.asList(data), order).toArray(data); + } + + /** + * Reorder the given data list to be in the specified order + * @param data the data to reorder + * @param order the new order to use + * @return a reordered list of data + */ + private List reorderList(final List data, final List order) { + final List returnList = new ArrayList<>(data.size()); + for( final int index : order ) { + returnList.add( data.get(index) ); + } + return returnList; } /** @@ -161,15 +229,8 @@ public class VariantDataManager { return false; } - public boolean checkHasKnownSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isKnown ) { return true; } - } - return false; - } - public ExpandingArrayList getTrainingData() { - final ExpandingArrayList trainingData = new ExpandingArrayList(); + final ExpandingArrayList trainingData = new ExpandingArrayList<>(); for( final VariantDatum datum : data ) { if( datum.atTrainingSite && !datum.failingSTDThreshold && datum.originalQual > VRAC.QUAL_THRESHOLD ) { trainingData.add( datum ); @@ -184,7 +245,7 @@ public class VariantDataManager { public ExpandingArrayList selectWorstVariants( double bottomPercentage, final int minimumNumber ) { // The return value is the list of training variants - final ExpandingArrayList trainingData = new ExpandingArrayList(); + final ExpandingArrayList trainingData = new ExpandingArrayList<>(); // First add to the training list all sites overlapping any bad sites training tracks for( final VariantDatum datum : data ) { @@ -219,7 +280,7 @@ public class VariantDataManager { public ExpandingArrayList getRandomDataForPlotting( int numToAdd ) { numToAdd = Math.min(numToAdd, data.size()); - final ExpandingArrayList returnData = new ExpandingArrayList(); + final ExpandingArrayList returnData = new ExpandingArrayList<>(); for( int iii = 0; iii < numToAdd; iii++) { final VariantDatum datum = data.get(GenomeAnalysisEngine.getRandomGenerator().nextInt(data.size())); if( !datum.failingSTDThreshold ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManagerUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManagerUnitTest.java new file mode 100644 index 000000000..cbb115cf7 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManagerUnitTest.java @@ -0,0 +1,71 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.variantrecalibration; + +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.BaseTest; +import org.junit.Assert; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin + * Date: 7/25/13 + */ + +public class VariantDataManagerUnitTest extends BaseTest { + @Test + public final void testCalculateSortOrder() { + final double[] data = {2.0, 3.0, 0.0, 1.0, 4.0, 68.0, 5.0}; + VariantDataManager vdm = new VariantDataManager(new ArrayList(), new VariantRecalibratorArgumentCollection()); + final List order = vdm.calculateSortOrder(data); + Assert.assertArrayEquals(new int[]{2,3,0,1,4,6,5}, ArrayUtils.toPrimitive(order.toArray(new Integer[order.size()]))); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index e7a3f23a4..b5a541d53 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -72,9 +72,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest lowPass = new VRTest(validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf", - "4d08c8eee61dd1bdea8c5765f34e41f0", // tranches - "ca7de32b6143cce58aa4bc59b311feb7", // recal file - "cc7f413ba50b3d12f11f95aaa31e67d1"); // cut VCF + "583e8f63475dfd09a26bf11579075c8e", // tranches + "39a98f13b26c8c1f363f99ab8cead6ca", // recal file + "d235aefef741a6b2c352ef20af1ca790"); // cut VCF @DataProvider(name = "VRTest") public Object[][] createData1() { @@ -121,9 +121,9 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { } VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", - "6a1eef4d02857dbb117a15420b5c0ce9", // tranches - "db9faaee11ee5427a81ddee328245f8c", // recal file - "42e0fcd8e048a5f6abc41a4d1c3e97a5"); // cut VCF + "d29356849670aabcc12643a2b68dcc82", // tranches + "8abaf8142a6ee212b6dddc7053605512", // recal file + "d6cd4f61875ae09a030fd9f2d7328246"); // cut VCF @DataProvider(name = "VRBCFTest") public Object[][] createVRBCFTest() { @@ -173,15 +173,15 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest indelUnfiltered = new VRTest( validationDataLocation + "combined.phase1.chr20.raw.indels.unfiltered.sites.vcf", // all FILTERs as . - "b7589cd098dc153ec64c02dcff2838e4", // tranches - "5a9ba210a3c68109289a71039a04509d", // recal file - "d816bd43c844069d65711a7975707437"); // cut VCF + "99c3736dab836ae8b41e344062e01b5a", // tranches + "55d2f89980ea9c6c469314129dbac732", // recal file + "482039de04961876890e125055732450"); // cut VCF VRTest indelFiltered = new VRTest( validationDataLocation + "combined.phase1.chr20.raw.indels.filtered.sites.vcf", // all FILTERs as PASS - "b7589cd098dc153ec64c02dcff2838e4", // tranches - "5a9ba210a3c68109289a71039a04509d", // recal file - "6bcb344511c727c28523825f73c7daee"); // cut VCF + "99c3736dab836ae8b41e344062e01b5a", // tranches + "55d2f89980ea9c6c469314129dbac732", // recal file + "e63e22ae05ad0bd32b943cde00b6e5a9"); // cut VCF @DataProvider(name = "VRIndelTest") public Object[][] createTestVariantRecalibratorIndel() { diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 85ac97f95..f4f70ac63 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -95,7 +95,7 @@ public abstract class PairHMM { * * @param haplotypeBases the full sequence (in standard SAM encoding) of the haplotype, must be >= than read bases in length * @param readBases the bases (in standard encoding) of the read, must be <= haplotype bases in length - * @param readQuals the phred-scaled per base substitition quality scores of read. Must be the same length as readBases + * @param readQuals the phred-scaled per base substitution quality scores of read. Must be the same length as readBases * @param insertionGOP the phred-scaled per base insertion quality scores of read. Must be the same length as readBases * @param deletionGOP the phred-scaled per base deletion quality scores of read. Must be the same length as readBases * @param overallGCP the phred-scaled gap continuation penalties scores of read. Must be the same length as readBases From edbd17b8e048c49d8f2e2d287693091ff7e01fd4 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Tue, 23 Jul 2013 13:27:47 -0400 Subject: [PATCH 034/172] Added note of caution to VQSR gatkdocs for option BOTH of recalibration mode --- .../VariantRecalibratorArgumentCollection.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java index bdd41f37c..ae0b4a347 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorArgumentCollection.java @@ -70,9 +70,9 @@ public class VariantRecalibratorArgumentCollection { throw new ReviewedStingException("VariantRecalibrator mode string is unrecognized, input = " + input); } - @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only snps (emitting indels untouched in the output VCF); 2.) INDEL for indels; and 3.) BOTH for recalibrating both snps and indels simultaneously.", required = false) + @Argument(fullName = "mode", shortName = "mode", doc = "Recalibration mode to employ: 1.) SNP for recalibrating only SNPs (emitting indels untouched in the output VCF); 2.) INDEL for indels (emitting SNPs untouched in the output VCF); and 3.) BOTH for recalibrating both SNPs and indels simultaneously (for testing purposes only, not recommended for general use).", required = false) public VariantRecalibratorArgumentCollection.Mode MODE = VariantRecalibratorArgumentCollection.Mode.SNP; - @Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm", required=false) + @Argument(fullName="maxGaussians", shortName="mG", doc="The maximum number of Gaussians to try during variational Bayes algorithm.", required=false) public int MAX_GAUSSIANS = 10; @Argument(fullName="maxIterations", shortName="mI", doc="The maximum number of VBEM iterations to be performed in variational Bayes algorithm. Procedure will normally end when convergence is detected.", required=false) public int MAX_ITERATIONS = 100; From 0ea3f8ca584d8194f609a0aa587c8ef8c542601c Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Thu, 25 Jul 2013 13:58:54 -0400 Subject: [PATCH 035/172] Added function to gatkdocs to specify what VCF field an annotation goes in (INFO or FORMAT) --- .../help/GenericDocumentationHandler.java | 32 +++++++++++++++++-- settings/helpTemplates/generic.template.html | 7 +++- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java index 02c269495..01221cf27 100644 --- a/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java +++ b/public/java/src/org/broadinstitute/sting/utils/help/GenericDocumentationHandler.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2012 The Broad Institute -* +* * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -9,10 +9,10 @@ * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: -* +* * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. -* +* * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND @@ -37,6 +37,8 @@ import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.collections.Pair; @@ -295,6 +297,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // Get annotation info (what type of annotation, standard etc.) final HashSet annotInfo = getAnnotInfo(myClass, new HashSet()); root.put("annotinfo", StringUtils.join(annotInfo, ", ")); + // Get annotation field (whether it goes in INFO or FORMAT) + root.put("annotfield", getAnnotField(myClass)); // Get walker type if applicable root.put("walkertype", getWalkerType(myClass)); // Get partition type if applicable @@ -316,6 +320,7 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { // put empty items to avoid blowups root.put("parallel", new HashSet()); root.put("annotinfo", ""); + root.put("annotfield", ""); root.put("walkertype", ""); root.put("partitiontype", ""); root.put("readfilters", new HashSet>()); @@ -359,6 +364,27 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler { return getParallelism(mySuperClass, parallelOptions); } + /** + * Utility function that looks up whether the annotation goes in INFO or FORMAT field. + * + * @param myClass the class to query for the interfaces + * @return a String specifying the annotation field + */ + private final String getAnnotField(Class myClass) { + // + // Look up superclasses recursively until we find either + // GenotypeAnnotation or InfoFieldAnnotation + final Class mySuperClass = myClass.getSuperclass(); + if (mySuperClass == InfoFieldAnnotation.class) { + return "INFO (variant-level)"; + } else if (mySuperClass == GenotypeAnnotation.class) { + return "FORMAT (sample genotype-level)"; + } else if (mySuperClass.getSimpleName().equals("Object")) { + return ""; + } + return getAnnotField(mySuperClass); + } + /** * Utility function that determines the annotation type for an instance of class c. * diff --git a/settings/helpTemplates/generic.template.html b/settings/helpTemplates/generic.template.html index b05ad65c0..ea9fa2d72 100644 --- a/settings/helpTemplates/generic.template.html +++ b/settings/helpTemplates/generic.template.html @@ -123,8 +123,13 @@ ${partitiontype} + <#if annotfield != "" > +

VCF Field + ${annotfield} +

+ <#if annotinfo != "" > -

Annotation type +

Type ${annotinfo}

From 5ad99c362dc95a767ba1cbe93572faaa3d816952 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Thu, 25 Jul 2013 17:20:02 -0400 Subject: [PATCH 036/172] Added caveat to gatkdocs for MAPQ read transformers & cleaned up AB annotation gatkdocs --- .../filters/ReassignMappingQualityFilter.java | 23 +++++++++++++++---- .../ReassignOneMappingQualityFilter.java | 7 +++--- .../gatk/walkers/annotator/AlleleBalance.java | 7 +++++- .../annotator/AlleleBalanceBySample.java | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java index 41ab59845..e576666e1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignMappingQualityFilter.java @@ -32,10 +32,23 @@ import org.broadinstitute.sting.commandline.Argument; * A read filter (transformer) that sets all reads mapping quality to a given value. * *

- * If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set - * all your mapping qualities to a given value. Default being 60. + * If a BAM file contains erroneous or missing mapping qualities (MAPQ), this read transformer will set all your + * mapping qualities to a given value (see arguments list for default value). *

* + *

See also

+ * + *

ReassignOneMappingQualityFilter: reassigns a single MAPQ value, as opposed to all those found in the BAM file.

+ * + *

Caveats

+ * + *

Note that due to the order of operations involved in applying filters, it is possible that other read filters + * (determined either at command-line or internally by the tool you are using) will be applied to your data before + * this read transformation can be applied. If one of those other filters acts on the read mapping quality (MAPQ), + * then you may not obtain the expected results. Unfortunately it is currently not possible to change the order of + * operations from command line. To avoid the problem, we recommend applying this filter separately from any other + * analysis, using PrintReads.

+ * * *

Input

*

@@ -50,9 +63,9 @@ import org.broadinstitute.sting.commandline.Argument; * *

Examples

*
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -rf ReassignMappingQuality
+ *  java -jar GenomeAnalysisTK.jar \
+ *      -T PrintReads \
+ *      -rf ReassignMappingQuality \
  *      -DMQ 35
  *  
* diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java index f31313a86..232b7ed3d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java +++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReassignOneMappingQualityFilter.java @@ -32,7 +32,7 @@ import org.broadinstitute.sting.commandline.Argument; * A read filter (transformer) that changes a given read mapping quality to a different value. * *

- * This 'filter' will change a certain read mapping quality to a different value without affecting reads that + * This read transformer will change a certain read mapping quality to a different value without affecting reads that * have other mapping qualities. This is intended primarily for users of RNA-Seq data handling programs such * as TopHat, which use MAPQ = 255 to designate uniquely aligned reads. According to convention, 255 normally * designates "unknown" quality, and most GATK tools automatically ignore such reads. By reassigning a different @@ -46,7 +46,6 @@ import org.broadinstitute.sting.commandline.Argument; * that have no assigned mapping qualities. *

* - * *

Input

*

* BAM file(s) @@ -60,8 +59,8 @@ import org.broadinstitute.sting.commandline.Argument; * *

Examples

*
- *    java
- *      -jar GenomeAnalysisTK.jar
+ *    java -jar GenomeAnalysisTK.jar
+ *      -T PrintReads
  *      -rf ReassignOneMappingQuality
  *      -RMQF 255
  *      -RMQT 60
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
index 6e7bc9805..87323cf87 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@@ -46,7 +46,12 @@ import java.util.Map;
 
 
 /**
- * The allele balance (fraction of ref bases over ref + alt bases) across all biallelic het-called samples
+ * Allele balance across all samples
+ *
+ * 

The allele balance is the fraction of ref bases over ref + alt bases.

+ * + *

Caveats

+ *

Note that this annotation will only work properly for biallelic samples that are called as heterozygous.

*/ public class AlleleBalance extends InfoFieldAnnotation { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java index 608257b54..f5930078f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java @@ -51,7 +51,7 @@ import java.util.List; *

The allele balance is the fraction of ref bases over ref + alt bases.

* *

Caveats

- *

Note that this annotation will only work properly for biallelic het-called samples.

+ *

Note that this annotation will only work properly for biallelic samples that are called as heterozygous.

*

This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.

*/ public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation { From 660b0759004bfb11e4b05c14d277c635730ee376 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Thu, 25 Jul 2013 17:28:06 -0400 Subject: [PATCH 037/172] Added deprecation notice for SomaticIndelDetector --- .../src/org/broadinstitute/sting/utils/DeprecatedToolChecks.java | 1 + 1 file changed, 1 insertion(+) diff --git a/public/java/src/org/broadinstitute/sting/utils/DeprecatedToolChecks.java b/public/java/src/org/broadinstitute/sting/utils/DeprecatedToolChecks.java index 78c32ed02..9823e524a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/DeprecatedToolChecks.java +++ b/public/java/src/org/broadinstitute/sting/utils/DeprecatedToolChecks.java @@ -46,6 +46,7 @@ public class DeprecatedToolChecks { deprecatedGATKWalkers.put("TableRecalibration", "2.0 (use PrintReads with -BQSR instead; see documentation for usage)"); deprecatedGATKWalkers.put("AlignmentWalker", "2.2 (no replacement)"); deprecatedGATKWalkers.put("CountBestAlignments", "2.2 (no replacement)"); + deprecatedGATKWalkers.put("SomaticIndelDetector", "2.0 (replaced by the standalone tool Indelocator; see Cancer Tools documentation)"); } // Mapping from walker name to major version number where the walker first disappeared and optional replacement options From fc4a8b1dd04603c00197a133450a746137619422 Mon Sep 17 00:00:00 2001 From: Geraldine Van der Auwera Date: Thu, 25 Jul 2013 17:29:09 -0400 Subject: [PATCH 038/172] Fixed example in DoC gatkdoc --- .../sting/gatk/walkers/coverage/DepthOfCoverage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java index c4ef4d23b..ca3255097 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java @@ -102,7 +102,7 @@ import java.util.*; *
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \
- *   -T Coverage \
+ *   -T DepthOfCoverage \
  *   -o file_name_base \
  *   -I input_bams.list
  *   [-geneList refSeq.sorted.txt] \

From 3063d82797830b0b5905987c82f92d62d524c4f4 Mon Sep 17 00:00:00 2001
From: Geraldine Van der Auwera 
Date: Thu, 25 Jul 2013 17:30:42 -0400
Subject: [PATCH 039/172] Fixed example in CallableLoci gatkdoc

---
 .../sting/gatk/walkers/coverage/CallableLoci.java              | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
index 6af6723f2..e30965925 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
@@ -84,7 +84,8 @@ import java.io.PrintStream;
  * 

*

Examples

*
- *     -T CallableLociWalker \
+ *  java -jar GenomeAnalysisTK.jar \
+ *     -T CallableLoci \
  *     -I my.bam \
  *     -summary my.summary \
  *     -o my.bed

From 464a5b229d81a5a3866b7c5121f0be84fa541a55 Mon Sep 17 00:00:00 2001
From: Chris Hartl 
Date: Mon, 29 Jul 2013 15:48:17 -0700
Subject: [PATCH 040/172] Add 
 tags to the Genotype Concordance docs.
 Tables were not being displayed properly.

---
 .../gatk/walkers/variantutils/GenotypeConcordance.java | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java
index da8b20c66..724578a09 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java
@@ -69,12 +69,15 @@ import java.util.*;
  *  Genotype Concordance writes a GATK report to the specified file (via -o) , consisting of multiple tables of counts
  *  and proportions. These tables may be optionally moltenized via the -moltenize argument. That is, the standard table
  *
+ *  
  *  Sample   NO_CALL_HOM_REF  NO_CALL_HET  NO_CALL_HOM_VAR   (...)
  *  NA12878       0.003        0.001            0.000        (...)
  *  NA12891       0.005        0.000            0.000        (...)
+ *  
* * would instead be displayed * + *
  *  NA12878  NO_CALL_HOM_REF   0.003
  *  NA12878  NO_CALL_HET       0.001
  *  NA12878  NO_CALL_HOM_VAR   0.000
@@ -82,6 +85,7 @@ import java.util.*;
  *  NA12891  NO_CALL_HET       0.000
  *  NA12891  NO_CALL_HOM_VAR   0.000
  *  (...)
+ *  
* * * These tables are constructed on a per-sample basis, and include counts of eval vs comp genotype states, and the @@ -92,8 +96,10 @@ import java.util.*; * counts for EVAL_SUBSET_TRUTH and EVAL_SUPERSET_TRUTH will be generated. * * For example, in the following situation + *
  *    eval:  ref - A   alt - C
  *    comp:  ref - A   alt - C,T
+ *  
* then the site is tabulated as EVAL_SUBSET_TRUTH. Were the situation reversed, it would be EVAL_SUPERSET_TRUTH. * However, in the case where eval has both C and T alternate alleles, both must be observed in the genotypes * (that is, there must be at least one of (0/1,1/1) and at least one of (0/2,1/2,2/2) in the genotype field). If @@ -106,11 +112,15 @@ import java.util.*; * (if no record exists in the comp VCF). * * That is, in the situation + *
  *   eval:  ref - A   alt - C   genotypes - 0/0  0/0  0/0 ... 0/0
  *   comp:  ref - A   alt - C   ...         0/0  0/0  ...
+ *  
* is equivalent to + *
  *   eval:  ref - A   alt - .   genotypes - 0/0  0/0  0/0 ... 0/0
  *   comp:  ref - A   alt - C   ...         0/0  0/0  ...
+ *  
* * When a record is present in the comp VCF the *genotypes* for the monomorphic site will still be used to evaluate * per-sample genotype concordance counts. From 7b731dd596f38d9449af1386c9b67b8bbb55e1fb Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 30 Jul 2013 13:59:58 -0400 Subject: [PATCH 041/172] Removed native method call and fixed indentation. --- .../utils/pairhmm/CnyPairHMMUnitTest.java | 51 ++++++++----------- 1 file changed, 21 insertions(+), 30 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java index bdc06a6f0..14ab552e5 100644 --- a/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/utils/pairhmm/CnyPairHMMUnitTest.java @@ -48,54 +48,45 @@ package org.broadinstitute.sting.utils.pairhmm; import org.broadinstitute.sting.BaseTest; import org.testng.Assert; -import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.util.ArrayList; -import java.util.List; - public class CnyPairHMMUnitTest extends BaseTest { - private native void puts(String text); @Test(enabled = true) public void testResultQueue() { - final double[] row1 = new double[] { 4.5, 53.1, 6.4 }; - final double[] row2 = new double[] { 1.0, 5.9, 6.9, 6.1, 19.8 }; - final double[] row3 = new double[] { 10.4, 9.101, 89.5, 9.8}; - final double[] row4 = new double[] { 7.3, 1.4, 5.67, 56.32 }; - CnyPairHMM.ResultQueue queue = new CnyPairHMM.ResultQueue(); + final double[] row1 = new double[] { 4.5, 53.1, 6.4 }; + final double[] row2 = new double[] { 1.0, 5.9, 6.9, 6.1, 19.8 }; + final double[] row3 = new double[] { 10.4, 9.101, 89.5, 9.8}; + final double[] row4 = new double[] { 7.3, 1.4, 5.67, 56.32 }; + CnyPairHMM.ResultQueue queue = new CnyPairHMM.ResultQueue(); - // Test inter-mixed push/pop operations produce the correct output - queue.push(row1); - queue.push(row2); + // Test inter-mixed push/pop operations produce the correct output + queue.push(row1); + queue.push(row2); - for (int i=0; i Date: Thu, 6 Jun 2013 13:33:20 -0400 Subject: [PATCH 042/172] Adding a representation of the hierarchy of flags output by snpEff (Yossi) and a stratifier whose output states are coding regions, genes, stop_gain, stop_lost and splice sites, all determined by the snpEff hierarchy (J. Rose) --- .../walkers/annotator/SnpEffUtilUnitTest.java | 132 +++++++++++++++ .../gatk/walkers/annotator/SnpEffUtil.java | 154 ++++++++++++++++++ .../SnpEffPositionModifier.java | 86 ++++++++++ 3 files changed, 372 insertions(+) create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtil.java create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SnpEffPositionModifier.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java new file mode 100644 index 000000000..15b370ee4 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java @@ -0,0 +1,132 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +/** + * Created with IntelliJ IDEA. + * User: farjoun + * Date: 6/5/13 + * Time: 2:31 PM + * To change this template use File | Settings | File Templates. + */ + + +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.EffectType; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +public class SnpEffUtilUnitTest { + + + @DataProvider(name="effects") + public Object[][] childParentpairs() { + List tests = new ArrayList(); + + tests.add(new Object[]{EffectType.GENE,EffectType.CHROMOSOME}); + tests.add(new Object[]{EffectType.UTR_3_PRIME,EffectType.TRANSCRIPT}); + tests.add(new Object[]{EffectType.CODON_CHANGE,EffectType.CDS}); + tests.add(new Object[]{EffectType.STOP_GAINED,EffectType.EXON}); + tests.add(new Object[]{EffectType.SYNONYMOUS_START,EffectType.TRANSCRIPT}); + tests.add(new Object[]{EffectType.FRAME_SHIFT,EffectType.CDS}); + tests.add(new Object[]{EffectType.UPSTREAM,EffectType.INTERGENIC}); + tests.add(new Object[]{EffectType.SPLICE_SITE_DONOR,EffectType.INTRON}); + tests.add(new Object[]{EffectType.SPLICE_SITE_ACCEPTOR,EffectType.INTRON}); + tests.add(new Object[]{EffectType.STOP_LOST,EffectType.NON_SYNONYMOUS_CODING}); + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name="self") + public Object[][] childEqualsParentpairs() { + List tests = new ArrayList(); + + for(EffectType type:EffectType.values()){ + tests.add(new Object[]{type,type}); + } + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name="noneffects") + public Object[][] nonchildParentpairs() { + List tests = new ArrayList(); + + tests.add(new Object[]{EffectType.START_GAINED,EffectType.NON_SYNONYMOUS_CODING}); + tests.add(new Object[]{EffectType.GENE,EffectType.NONE}); + tests.add(new Object[]{EffectType.UTR_3_PRIME,EffectType.CDS}); + tests.add(new Object[]{EffectType.CODON_CHANGE,EffectType.REGULATION}); + tests.add(new Object[]{EffectType.DOWNSTREAM,EffectType.REGULATION}); + tests.add(new Object[]{EffectType.SPLICE_SITE_ACCEPTOR,EffectType.EXON}); + tests.add(new Object[]{EffectType.START_GAINED,EffectType.SYNONYMOUS_START}); + tests.add(new Object[]{EffectType.NON_SYNONYMOUS_CODING,EffectType.DOWNSTREAM}); + tests.add(new Object[]{EffectType.CODON_DELETION,EffectType.INTRON}); + tests.add(new Object[]{EffectType.UTR_5_PRIME,EffectType.EXON_DELETED}); + tests.add(new Object[]{EffectType.INTRON,EffectType.NONE}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "effects") + public void testSubType(EffectType subType,EffectType parentType) { + Assert.assertTrue(SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); + } + @Test(dataProvider = "self") + public void testSubTypeSelf(EffectType subType,EffectType parentType) { + Assert.assertTrue(SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); + } + @Test(dataProvider = "effects") + public void testNonSubTypeSelf(EffectType parentType,EffectType subType) { + Assert.assertTrue(!SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); + } + @Test(dataProvider = "noneffects") + public void testNonSubType(EffectType subType,EffectType parentType) { + Assert.assertTrue(!SnpEffUtil.isSubTypeOf(subType, parentType), String.format("testing that %s is NOT subtype of %s.", subType, parentType)); + Assert.assertTrue(!SnpEffUtil.isSubTypeOf(parentType,subType), String.format("testing that %s is NOT subtype of %s.", parentType,subType)); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtil.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtil.java new file mode 100644 index 000000000..12e923b8f --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtil.java @@ -0,0 +1,154 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.EffectType; + +import java.util.*; +/** + * Created with IntelliJ IDEA. + * User: farjoun + * Date: 6/5/13 + * Time: 12:06 PM + * To change this template use File | Settings | File Templates. + */ + +/* This class holds a tree representation of the annotations used in snpEff, and provides a mechanism for telling if a +given annotation is a descendant of another. +The idea is to be able to stratify effects by large branches and not only the specific +snpEff annotation that a variant might have. For example if we want to know whether a variant is in CDS +but if it's marked SYNONYMOUS_CODING or NON_SYNONYMOUS_CODING (or many other options) still imply that its in the CDS. + +The hierarchy was determined by Yossi Farjoun with input from Pablo (SNPEFF) and Tim Fennel. +*/ + + +public class SnpEffUtil { + + // A map holding for every child, it's parent. + // A node that isn't a key node is a root node. + static private final Map snpEffectGraph = new HashMap<>(); + + //A map from each value of EffectType to a set of it's ancestors + static private final Map> snpEffectAncestorSet = new HashMap<>(); + + static { + + + //INTERGENIC + snpEffectGraph.put(EffectType.UPSTREAM,EffectType.INTERGENIC); + snpEffectGraph.put(EffectType.DOWNSTREAM,EffectType.INTERGENIC); + snpEffectGraph.put(EffectType.INTERGENIC_CONSERVED,EffectType.INTERGENIC); + + //INTRON + snpEffectGraph.put(EffectType.INTRON_CONSERVED,EffectType.INTRON); + snpEffectGraph.put(EffectType.SPLICE_SITE_ACCEPTOR,EffectType.INTRON); + snpEffectGraph.put(EffectType.SPLICE_SITE_DONOR,EffectType.INTRON); + + //CDS + snpEffectGraph.put(EffectType.EXON_DELETED,EffectType.CDS); + snpEffectGraph.put(EffectType.SYNONYMOUS_CODING,EffectType.CDS); + snpEffectGraph.put(EffectType.NON_SYNONYMOUS_CODING,EffectType.CDS); + + //SYNONYMOUS_CODING + snpEffectGraph.put(EffectType.SYNONYMOUS_STOP,EffectType.SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.SYNONYMOUS_START,EffectType.SYNONYMOUS_CODING); + + //NON_SYNONYMOUS_CODING + snpEffectGraph.put(EffectType.START_LOST,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.STOP_GAINED,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.STOP_LOST,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.CODON_CHANGE,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.CODON_INSERTION,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.CODON_DELETION,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.CODON_CHANGE_PLUS_CODON_DELETION,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.CODON_CHANGE_PLUS_CODON_INSERTION,EffectType.NON_SYNONYMOUS_CODING); + snpEffectGraph.put(EffectType.FRAME_SHIFT,EffectType.NON_SYNONYMOUS_CODING); + + //UTRs + snpEffectGraph.put(EffectType.UTR_5_DELETED,EffectType.UTR_5_PRIME); + snpEffectGraph.put(EffectType.UTR_3_DELETED,EffectType.UTR_3_PRIME); + snpEffectGraph.put(EffectType.START_GAINED,EffectType.UTR_5_PRIME); + + //EXON + snpEffectGraph.put(EffectType.UTR_5_PRIME,EffectType.EXON); + snpEffectGraph.put(EffectType.UTR_3_PRIME,EffectType.EXON); + snpEffectGraph.put(EffectType.CDS,EffectType.EXON); + + + //TRANSCRIPT + snpEffectGraph.put(EffectType.INTRON,EffectType.TRANSCRIPT); + snpEffectGraph.put(EffectType.EXON,EffectType.TRANSCRIPT); + + //GENE + snpEffectGraph.put(EffectType.TRANSCRIPT,EffectType.GENE); + snpEffectGraph.put(EffectType.REGULATION,EffectType.GENE); + + //CHROMOSOME + snpEffectGraph.put(EffectType.GENE,EffectType.CHROMOSOME); + snpEffectGraph.put(EffectType.INTERGENIC,EffectType.CHROMOSOME); + } + + //A helper function that gets the parent set of the set of children + private static Set getParentSet(final Set children){ + final Set parents=new HashSet<>(); + for(EffectType child:children){ + final EffectType parent = snpEffectGraph.get(child); + if(parent!=null) parents.add(parent); + } + return parents; + } + + //builds the total set of ancestors of a given node + private static Set getAncestorSet(final EffectType child, final boolean isSelfIncluded){ + + final Set ancestors=new HashSet<>(); + if(isSelfIncluded) ancestors.add(child); + + Set untraversedNodes=Collections.singleton(child); + + while(!untraversedNodes.isEmpty()){ + final Set putativeParents = getParentSet(untraversedNodes); //get immediate parents of unexamined set + putativeParents.removeAll(ancestors); //remove all known parents, remaining with previously unknown parents + ancestors.addAll(putativeParents); // add these parents to growing list of ancestors + untraversedNodes=putativeParents; //still need to traverse parents of these nodes + } + return ancestors; + } + + //returns true if the child effect is a subType of the parentEffect (including itself) + public static boolean isSubTypeOf(final SnpEff.EffectType childEffect, final SnpEff.EffectType parentEffect){ + + Set ancestorSet=snpEffectAncestorSet.get(childEffect); + + if(ancestorSet==null) { //lazy population of map. + ancestorSet = new HashSet<>(); + ancestorSet.addAll(getAncestorSet(childEffect, true)); //"true" so that a type is considered a subtype of itself + snpEffectAncestorSet.put(childEffect, ancestorSet); + } + return ancestorSet.contains(parentEffect); + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SnpEffPositionModifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SnpEffPositionModifier.java new file mode 100644 index 000000000..f393da6ad --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/SnpEffPositionModifier.java @@ -0,0 +1,86 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications; + +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.EffectType; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.InfoFieldKey; +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEffUtil; +import org.broadinstitute.variant.variantcontext.VariantContext; + +import java.util.ArrayList; +import java.util.List; + +/** + * Stratifies variants as genes or coding regions, according to the effect modifier, as indicated by snpEff. + * The 'gene' category includes category 'coding region', and additionally includes introns. 'Coding regions' + * includes transcripts and, implicitly, UTRs. + */ +public class SnpEffPositionModifier extends VariantStratifier { + + public enum PositionModifier { + GENE, // EXON + CODING_REGION, // CDS + SPLICE_SITE, // not a straight translation -- see getRelevantStates + STOP_GAINED, // STOP_GAINED + STOP_LOST // STOP_LOST + } + + @Override + public void initialize() { + for (final PositionModifier type : PositionModifier.values()) states.add(type.name()); + } + + @Override + public List getRelevantStates( + final ReferenceContext ref, + final RefMetaDataTracker tracker, + final VariantContext comp, + final String compName, + final VariantContext eval, + final String evalName, + final String sampleName) + { + final List relevantStates = new ArrayList(); + if (eval != null && eval.isVariant() && eval.hasAttribute(InfoFieldKey.EFFECT_KEY.getKeyName())) { + final SnpEff.EffectType effectType = SnpEff.EffectType.valueOf( + eval.getAttribute(InfoFieldKey.EFFECT_KEY.getKeyName()).toString()); + + if (SnpEffUtil.isSubTypeOf(effectType, EffectType.EXON)) relevantStates.add(PositionModifier.GENE.name()); + if (SnpEffUtil.isSubTypeOf(effectType, EffectType.CDS)) relevantStates.add(PositionModifier.CODING_REGION.name()); + if (SnpEffUtil.isSubTypeOf(effectType, EffectType.STOP_GAINED)) relevantStates.add(PositionModifier.STOP_GAINED.name()); + if (SnpEffUtil.isSubTypeOf(effectType, EffectType.STOP_LOST)) relevantStates.add(PositionModifier.STOP_LOST.name()); + + if (SnpEffUtil.isSubTypeOf(effectType, EffectType.SPLICE_SITE_ACCEPTOR) || + SnpEffUtil.isSubTypeOf(effectType, EffectType.SPLICE_SITE_DONOR)) + relevantStates.add(PositionModifier.SPLICE_SITE.name()); + } + + return relevantStates; + } +} From 284176cd7bba62eb27d3822052032346a5b61d53 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 30 Jul 2013 17:50:59 -0400 Subject: [PATCH 043/172] moved SnpEffUtilUnitTest to public tree --- .../walkers/annotator/SnpEffUtilUnitTest.java | 132 ------------------ .../walkers/annotator/SnpEffUtilUnitTest.java | 111 +++++++++++++++ 2 files changed, 111 insertions(+), 132 deletions(-) delete mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java deleted file mode 100644 index 15b370ee4..000000000 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java +++ /dev/null @@ -1,132 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk.walkers.annotator; - -/** - * Created with IntelliJ IDEA. - * User: farjoun - * Date: 6/5/13 - * Time: 2:31 PM - * To change this template use File | Settings | File Templates. - */ - - -import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.EffectType; -import org.testng.Assert; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.util.ArrayList; -import java.util.List; - -public class SnpEffUtilUnitTest { - - - @DataProvider(name="effects") - public Object[][] childParentpairs() { - List tests = new ArrayList(); - - tests.add(new Object[]{EffectType.GENE,EffectType.CHROMOSOME}); - tests.add(new Object[]{EffectType.UTR_3_PRIME,EffectType.TRANSCRIPT}); - tests.add(new Object[]{EffectType.CODON_CHANGE,EffectType.CDS}); - tests.add(new Object[]{EffectType.STOP_GAINED,EffectType.EXON}); - tests.add(new Object[]{EffectType.SYNONYMOUS_START,EffectType.TRANSCRIPT}); - tests.add(new Object[]{EffectType.FRAME_SHIFT,EffectType.CDS}); - tests.add(new Object[]{EffectType.UPSTREAM,EffectType.INTERGENIC}); - tests.add(new Object[]{EffectType.SPLICE_SITE_DONOR,EffectType.INTRON}); - tests.add(new Object[]{EffectType.SPLICE_SITE_ACCEPTOR,EffectType.INTRON}); - tests.add(new Object[]{EffectType.STOP_LOST,EffectType.NON_SYNONYMOUS_CODING}); - return tests.toArray(new Object[][]{}); - } - - @DataProvider(name="self") - public Object[][] childEqualsParentpairs() { - List tests = new ArrayList(); - - for(EffectType type:EffectType.values()){ - tests.add(new Object[]{type,type}); - } - return tests.toArray(new Object[][]{}); - } - - @DataProvider(name="noneffects") - public Object[][] nonchildParentpairs() { - List tests = new ArrayList(); - - tests.add(new Object[]{EffectType.START_GAINED,EffectType.NON_SYNONYMOUS_CODING}); - tests.add(new Object[]{EffectType.GENE,EffectType.NONE}); - tests.add(new Object[]{EffectType.UTR_3_PRIME,EffectType.CDS}); - tests.add(new Object[]{EffectType.CODON_CHANGE,EffectType.REGULATION}); - tests.add(new Object[]{EffectType.DOWNSTREAM,EffectType.REGULATION}); - tests.add(new Object[]{EffectType.SPLICE_SITE_ACCEPTOR,EffectType.EXON}); - tests.add(new Object[]{EffectType.START_GAINED,EffectType.SYNONYMOUS_START}); - tests.add(new Object[]{EffectType.NON_SYNONYMOUS_CODING,EffectType.DOWNSTREAM}); - tests.add(new Object[]{EffectType.CODON_DELETION,EffectType.INTRON}); - tests.add(new Object[]{EffectType.UTR_5_PRIME,EffectType.EXON_DELETED}); - tests.add(new Object[]{EffectType.INTRON,EffectType.NONE}); - - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "effects") - public void testSubType(EffectType subType,EffectType parentType) { - Assert.assertTrue(SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); - } - @Test(dataProvider = "self") - public void testSubTypeSelf(EffectType subType,EffectType parentType) { - Assert.assertTrue(SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); - } - @Test(dataProvider = "effects") - public void testNonSubTypeSelf(EffectType parentType,EffectType subType) { - Assert.assertTrue(!SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); - } - @Test(dataProvider = "noneffects") - public void testNonSubType(EffectType subType,EffectType parentType) { - Assert.assertTrue(!SnpEffUtil.isSubTypeOf(subType, parentType), String.format("testing that %s is NOT subtype of %s.", subType, parentType)); - Assert.assertTrue(!SnpEffUtil.isSubTypeOf(parentType,subType), String.format("testing that %s is NOT subtype of %s.", parentType,subType)); - } -} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java new file mode 100644 index 000000000..24a274cee --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUtilUnitTest.java @@ -0,0 +1,111 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +/** + * Created with IntelliJ IDEA. + * User: farjoun + * Date: 6/5/13 + * Time: 2:31 PM + * To change this template use File | Settings | File Templates. + */ + + +import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.EffectType; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +public class SnpEffUtilUnitTest { + + + @DataProvider(name="effects") + public Object[][] childParentpairs() { + List tests = new ArrayList(); + + tests.add(new Object[]{EffectType.GENE,EffectType.CHROMOSOME}); + tests.add(new Object[]{EffectType.UTR_3_PRIME,EffectType.TRANSCRIPT}); + tests.add(new Object[]{EffectType.CODON_CHANGE,EffectType.CDS}); + tests.add(new Object[]{EffectType.STOP_GAINED,EffectType.EXON}); + tests.add(new Object[]{EffectType.SYNONYMOUS_START,EffectType.TRANSCRIPT}); + tests.add(new Object[]{EffectType.FRAME_SHIFT,EffectType.CDS}); + tests.add(new Object[]{EffectType.UPSTREAM,EffectType.INTERGENIC}); + tests.add(new Object[]{EffectType.SPLICE_SITE_DONOR,EffectType.INTRON}); + tests.add(new Object[]{EffectType.SPLICE_SITE_ACCEPTOR,EffectType.INTRON}); + tests.add(new Object[]{EffectType.STOP_LOST,EffectType.NON_SYNONYMOUS_CODING}); + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name="self") + public Object[][] childEqualsParentpairs() { + List tests = new ArrayList(); + + for(EffectType type:EffectType.values()){ + tests.add(new Object[]{type,type}); + } + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name="noneffects") + public Object[][] nonchildParentpairs() { + List tests = new ArrayList(); + + tests.add(new Object[]{EffectType.START_GAINED,EffectType.NON_SYNONYMOUS_CODING}); + tests.add(new Object[]{EffectType.GENE,EffectType.NONE}); + tests.add(new Object[]{EffectType.UTR_3_PRIME,EffectType.CDS}); + tests.add(new Object[]{EffectType.CODON_CHANGE,EffectType.REGULATION}); + tests.add(new Object[]{EffectType.DOWNSTREAM,EffectType.REGULATION}); + tests.add(new Object[]{EffectType.SPLICE_SITE_ACCEPTOR,EffectType.EXON}); + tests.add(new Object[]{EffectType.START_GAINED,EffectType.SYNONYMOUS_START}); + tests.add(new Object[]{EffectType.NON_SYNONYMOUS_CODING,EffectType.DOWNSTREAM}); + tests.add(new Object[]{EffectType.CODON_DELETION,EffectType.INTRON}); + tests.add(new Object[]{EffectType.UTR_5_PRIME,EffectType.EXON_DELETED}); + tests.add(new Object[]{EffectType.INTRON,EffectType.NONE}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "effects") + public void testSubType(EffectType subType,EffectType parentType) { + Assert.assertTrue(SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); + } + @Test(dataProvider = "self") + public void testSubTypeSelf(EffectType subType,EffectType parentType) { + Assert.assertTrue(SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); + } + @Test(dataProvider = "effects") + public void testNonSubTypeSelf(EffectType parentType,EffectType subType) { + Assert.assertTrue(!SnpEffUtil.isSubTypeOf(subType,parentType),String.format("testing that %s is subtype of %s.",subType,parentType)); + } + @Test(dataProvider = "noneffects") + public void testNonSubType(EffectType subType,EffectType parentType) { + Assert.assertTrue(!SnpEffUtil.isSubTypeOf(subType, parentType), String.format("testing that %s is NOT subtype of %s.", subType, parentType)); + Assert.assertTrue(!SnpEffUtil.isSubTypeOf(parentType,subType), String.format("testing that %s is NOT subtype of %s.", parentType,subType)); + } +} From 4f3411f3d40871f69335373ec2f0ec1de20334c4 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Wed, 31 Jul 2013 10:48:55 -0400 Subject: [PATCH 044/172] Max number of haplotypes to evaluate no longer grows unbounded with the number of samples. This is necessary for multi-sample calling projects with over 100 samples. --- .../haplotypecaller/HaplotypeCaller.java | 34 ++++++------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 2d492741b..d46c786c9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -231,7 +231,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In */ @Advanced @Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false) - protected List annotationsToExclude = new ArrayList(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"})); + protected List annotationsToExclude = new ArrayList<>(Arrays.asList(new String[]{"SpanningDeletions", "TandemRepeatAnnotator"})); /** * Which groups of annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available groups. @@ -258,23 +258,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="numPruningSamples", shortName="numPruningSamples", doc="The number of samples that must pass the minPuning factor in order for the path to be kept", required = false) protected int numPruningSamples = 1; - /** - * Assembly graph can be quite complex, and could imply a very large number of possible haplotypes. Each haplotype - * considered requires N PairHMM evaluations if there are N reads across all samples. In order to control the - * run of the haplotype caller we only take maxPathsPerSample * nSample paths from the graph, in order of their - * weights, no matter how many paths are possible to generate from the graph. Putting this number too low - * will result in dropping true variation because paths that include the real variant are not even considered. - */ - @Advanced - @Argument(fullName="maxPathsPerSample", shortName="maxPathsPerSample", doc="Max number of paths to consider for the read threading assembler per sample.", required = false) - protected int maxPathsPerSample = 10; - - /** - * The minimum number of paths to advance forward for genotyping, regardless of the - * number of samples - */ - private final static int MIN_PATHS_PER_GRAPH = 128; - @Hidden @Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false) protected boolean dontRecoverDanglingTails = false; @@ -381,9 +364,16 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="phredScaledGlobalReadMismappingRate", shortName="globalMAPQ", doc="The global assumed mismapping rate for reads", required = false) protected int phredScaledGlobalReadMismappingRate = 45; + /** + * Assembly graph can be quite complex, and could imply a very large number of possible haplotypes. Each haplotype + * considered requires N PairHMM evaluations if there are N reads across all samples. In order to control the + * run of the haplotype caller we only take maxNumHaplotypesInPopulation paths from the graph, in order of their + * weights, no matter how many paths are possible to generate from the graph. Putting this number too low + * will result in dropping true variation because paths that include the real variant are not even considered. + */ @Advanced @Argument(fullName="maxNumHaplotypesInPopulation", shortName="maxNumHaplotypesInPopulation", doc="Maximum number of haplotypes to consider for your population. This number will probably need to be increased when calling organisms with high heterozygosity.", required = false) - protected int maxNumHaplotypesInPopulation = 25; + protected int maxNumHaplotypesInPopulation = 128; @Advanced @Argument(fullName="mergeVariantsViaLD", shortName="mergeVariantsViaLD", doc="If specified, we will merge variants together into block substitutions that are in strong local LD", required = false) @@ -541,7 +531,6 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // get all of the unique sample names Set samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()); samplesList.addAll( samples ); - final int nSamples = samples.size(); // initialize the UnifiedGenotyper Engine which is used to call into the exact model final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user // HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine // TODO -- why is this? @@ -567,7 +556,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // initialize the output VCF header annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); - Set headerInfo = new HashSet(); + Set headerInfo = new HashSet<>(); // all annotation fields from VariantAnnotatorEngine headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); @@ -610,8 +599,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In } // create and setup the assembler - final int maxAllowedPathsForReadThreadingAssembler = Math.max(maxPathsPerSample * nSamples, MIN_PATHS_PER_GRAPH); - assemblyEngine = new ReadThreadingAssembler(maxAllowedPathsForReadThreadingAssembler, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples); + assemblyEngine = new ReadThreadingAssembler(maxNumHaplotypesInPopulation, kmerSizes, dontIncreaseKmerSizesForCycles, numPruningSamples); assemblyEngine.setErrorCorrectKmers(errorCorrectKmers); assemblyEngine.setPruneFactor(MIN_PRUNE_FACTOR); From 1e396af4d0ff764cad466e517033714ad9aa8ea5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 31 Jul 2013 10:05:47 -0400 Subject: [PATCH 045/172] Two reduce reads updates/fixes: 1. Removing old legacy code that was capping the positional depth for reduced reads to 127. Unfortunately this cap affectively performs biased down-sampling and throws off e.g. FS numbers. Added end to end unit test that depth counts in RR can be higher than max byte. Some md5s change in the RR tests because depths are now (correctly) no longer capped at 127. 2. Down-sampling in ReduceReads was not safe as it could remove het compressed consensus reads. Refactored it so that it can only remove non-consensus reads. --- .../reducereads/SlidingWindow.java | 28 ++++---- .../ReduceReadsIntegrationTest.java | 50 ++++++++++++- .../reducereads/SlidingWindowUnitTest.java | 70 +++++++++++++++++++ 3 files changed, 131 insertions(+), 17 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java index e15c68774..964c6b401 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java @@ -494,10 +494,10 @@ public class SlidingWindow { */ private void genericAddBaseToConsensus(final SyntheticRead syntheticRead, final BaseAndQualsCounts baseCounts) { final BaseIndex base = baseCounts.baseIndexWithMostProbability(); - byte count = (byte) Math.min(baseCounts.countOfBase(base), Byte.MAX_VALUE); - byte qual = baseCounts.averageQualsOfBase(base); - byte insQual = baseCounts.averageInsertionQualsOfBase(base); - byte delQual = baseCounts.averageDeletionQualsOfBase(base); + final int count = baseCounts.countOfBase(base); + final byte qual = baseCounts.averageQualsOfBase(base); + final byte insQual = baseCounts.averageInsertionQualsOfBase(base); + final byte delQual = baseCounts.averageDeletionQualsOfBase(base); syntheticRead.add(base, count, qual, insQual, delQual, baseCounts.getRMS()); } @@ -533,20 +533,24 @@ public class SlidingWindow { final int refStart = windowHeader.get(start).getLocation(); final int refStop = windowHeader.get(stop).getLocation(); - final ObjectList toRemove = new ObjectArrayList<>(); + final ObjectList toRemoveFromWindow = new ObjectArrayList<>(); + final ObjectList toEmit = new ObjectArrayList<>(); for ( final GATKSAMRecord read : readsInWindow ) { if ( read.getSoftStart() <= refStop ) { if ( read.getAlignmentEnd() >= refStart ) { - allReads.reads.add(read); + toEmit.add(read); removeFromHeader(windowHeader, read); } - toRemove.add(read); + toRemoveFromWindow.add(read); } } // remove all used reads - for ( final GATKSAMRecord read : toRemove ) + for ( final GATKSAMRecord read : toRemoveFromWindow ) readsInWindow.remove(read); + + // down-sample the unreduced reads if needed + allReads.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(toEmit) : toEmit); } return allReads; @@ -632,12 +636,8 @@ public class SlidingWindow { @Ensures("result != null") protected CloseVariantRegionResult closeVariantRegion(final int start, final int stop, final ObjectSortedSet knownSnpPositions) { final CloseVariantRegionResult allReads = compressVariantRegion(start, stop, knownSnpPositions); - - final CloseVariantRegionResult result = new CloseVariantRegionResult(allReads.stopPerformed); - result.reads.addAll(downsampleCoverage > 0 ? downsampleVariantRegion(allReads.reads) : allReads.reads); - result.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1)); - - return result; // finalized reads will be downsampled if necessary + allReads.reads.addAll(addAllSyntheticReadTypes(0, allReads.stopPerformed + 1)); + return allReads; } /** diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java index 9942821e1..067f36d58 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java @@ -46,9 +46,14 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads; +import net.sf.samtools.SAMFileReader; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.WalkerTest; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory; +import org.testng.Assert; import org.testng.annotations.Test; import java.io.File; @@ -70,6 +75,8 @@ public class ReduceReadsIntegrationTest extends WalkerTest { final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam"; final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057"; final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam"; + final String HIGH_COVERAGE_BAM = privateTestDir + "NA20313.highCoverageRegion.bam"; + final String HIGH_COVERAGE_L = " -L 1:1650830-1650870"; final String BOTH_ENDS_OF_PAIR_IN_VARIANT_REGION_BAM = privateTestDir + "bothEndsOfPairInVariantRegion.bam"; final String INSERTIONS_AT_EDGE_OF_CONSENSUS_BAM = privateTestDir + "rr-too-many-insertions.bam"; @@ -221,13 +228,13 @@ public class ReduceReadsIntegrationTest extends WalkerTest { @Test(enabled = true) public void testCoReduction() { String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s "; - executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("2fdc77ff5139f62db9697427b559f866"))); + executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("58c2bae5a339af2ea3c22a46ce8faa68"))); } @Test(enabled = true) public void testCoReductionWithKnowns() { String base = String.format("-T ReduceReads %s --cancer_mode -npt -R %s -I %s -I %s -known %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B, DBSNP) + " -o %s "; - executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("6db7fca364ba64f7db9510b412d731f0"))); + executeTest("testCoReductionWithKnowns", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("5c251932b49d99a810581e3a6f762878"))); } @Test(enabled = true) @@ -258,7 +265,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest { public void testDivideByZero() { String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s "; // we expect to lose coverage due to the downsampling so don't run the systematic tests - executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("82758efda419011642cb468809a50bf9"))); + executeTestWithoutAdditionalRRTests("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("bam"), Arrays.asList("7dfe2647992ce1154db340fc742d523a"))); } /** @@ -299,5 +306,42 @@ public class ReduceReadsIntegrationTest extends WalkerTest { String cmd = "-T ReduceReads -npt -R " + b37KGReference + " -I " + privateTestDir + "rr_multisample.bam -o /dev/null"; executeTestWithoutAdditionalRRTests("testMultiSampleDoesNotFailWithFlag", new WalkerTestSpec(cmd, 0, UserException.BadInput.class)); } + + /** + * Confirm that compression is not capping coverage counts to max byte + */ + @Test(enabled = true) + public void testCompressionWorksForHighDepth() { + final String base = String.format("-T ReduceReads -npt -R %s -I %s %s", b37KGReference, HIGH_COVERAGE_BAM, HIGH_COVERAGE_L) + " -o %s"; + final File outputBam = executeTestWithoutAdditionalRRTests("testCompressionWorksForHighDepth", + new WalkerTestSpec(base, 1, Arrays.asList(""))).first.get(0); // No MD5s; we only want to check the coverage + + boolean sawHighCoveragePosition = false; + final SAMFileReader reader = new SAMFileReader(outputBam); + reader.setSAMRecordFactory(new GATKSamRecordFactory()); + + for ( final SAMRecord rawRead : reader ) { + final GATKSAMRecord read = (GATKSAMRecord)rawRead; + read.setAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, rawRead.getByteArrayAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG)); + + if ( ! read.isReducedRead() ) + continue; + + final int[] decodedCounts = read.getReducedReadCounts(); + for ( final int count : decodedCounts ) { + if ( count > Byte.MAX_VALUE ) { + sawHighCoveragePosition = true; + break; + } + } + + if ( sawHighCoveragePosition ) + break; + } + + reader.close(); + + Assert.assertTrue(sawHighCoveragePosition, "No positions were found with coverage over max byte (127); the coverage is incorrectly being capped somewhere!"); + } } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java index 3534284cd..720f00f98 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindowUnitTest.java @@ -619,6 +619,76 @@ public class SlidingWindowUnitTest extends BaseTest { Assert.assertEquals(result.size(), Math.min(dcov, basicReads.size())); } + @DataProvider(name = "DownsamplingFromClose") + public Object[][] createDownsamplingFromCloseTestData() { + + final ObjectList myReads = new ObjectArrayList<>(20); + for ( int i = 0; i < 21; i++ ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength); + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + if ( i < 5 ) + bases[50] = 'C'; + read.setReadBases(bases); + read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); + read.setMappingQuality(30); + read.setReadNegativeStrandFlag(false); + myReads.add(read); + } + + List tests = new ArrayList<>(); + + for ( int i = 1; i < 25; i++ ) + tests.add(new Object[]{myReads, i}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "DownsamplingFromClose", enabled = true) + public void testDownsamplingTestFromClose(final ObjectList myReads, final int dcov) { + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); + for ( final GATKSAMRecord read : myReads ) + slidingWindow.addRead(read); + Pair, CompressionStash> result = slidingWindow.close(new ObjectAVLTreeSet()); // no het compression + + Assert.assertEquals(result.getFirst().size(), Math.min(dcov, myReads.size()), "Down-sampling was not performed correctly"); + } + + @DataProvider(name = "NoDownsamplingForConsensusReads") + public Object[][] createNoDownsamplingForConsensusReadsData() { + + final ObjectList myReads = new ObjectArrayList<>(20); + for ( int i = 0; i < 30; i++ ) { + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read" + i, 0, globalStartPosition, readLength); + final byte[] bases = Utils.dupBytes((byte) 'A', readLength); + if ( i < 10 ) + bases[50] = 'C'; + read.setReadBases(bases); + read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); + read.setMappingQuality(30); + read.setReadNegativeStrandFlag(false); + read.setReadNegativeStrandFlag(i % 2 == 0); + myReads.add(read); + } + + List tests = new ArrayList<>(); + + for ( int i = 0; i < 5; i++ ) + tests.add(new Object[]{myReads, i}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "NoDownsamplingForConsensusReads", enabled = true) + public void testNoDownsamplingForConsensusReads(final ObjectList myReads, final int dcov) { + + final SlidingWindow slidingWindow = new SlidingWindow("1", 0, 10, header, new GATKSAMReadGroupRecord("test"), 0, 0.05, 0.05, 0.05, 20, 20, dcov, ReduceReads.DownsampleStrategy.Normal, false); + for ( final GATKSAMRecord read : myReads ) + slidingWindow.addRead(read); + Pair, CompressionStash> result = slidingWindow.close(null); // allow het compression (so we expect 4 reads) + + Assert.assertEquals(result.getFirst().size(), 4, "Down-sampling was performed on consensus reads!"); + } ////////////////////////////////////////////////////////////// //// This section tests the consensus base quals accuracy //// From 285ab2ac62154b2c67c7c155966d9f0e126c8d93 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 1 Aug 2013 20:10:20 -0400 Subject: [PATCH 047/172] Better caching for the HaplotypeCaller Problem ------- Caching strategy is incompatible with the current sorting of the haplotypes, and is rendering the cache nearly useless. Before the PairHMM updates, we realized that a lexicographically sorted list of haplotypes would optimize the use of the cache. This was only true until we've added the initial condition to the first row of the deletion matrix, which depends on the length of the haplotype. Because of that, every time the haplotypes differ in length, the cache has to be wiped. A lexicographic sorting of the haplotypes will put different lengths haplotypes clustered together therefore wasting *tons* of re-compute. Solution ------- Very simple. Sort the haplotypes by LENGTH and then in lexicographic order. --- .../haplotypecaller/HaplotypeCaller.java | 2 +- ...lexAndSymbolicVariantsIntegrationTest.java | 2 +- .../HaplotypeCallerGVCFIntegrationTest.java | 2 +- .../HaplotypeCallerIntegrationTest.java | 2 +- ...aplotypeCallerParallelIntegrationTest.java | 2 +- ...aplotypeSizeAndBaseComparatorUnitTest.java | 82 +++++++++++++++++++ .../HaplotypeSizeAndBaseComparator.java | 47 +++++++++++ 7 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 protected/java/test/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparatorUnitTest.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index d46c786c9..24bb41a94 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -980,7 +980,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In final List trimmedHaplotypes = new ArrayList<>(haplotypeSet); // sort haplotypes to take full advantage of haplotype start offset optimizations in PairHMM - Collections.sort( trimmedHaplotypes, new HaplotypeBaseComparator() ); + Collections.sort( trimmedHaplotypes, new HaplotypeSizeAndBaseComparator() ); if ( DEBUG ) logger.info("Trimmed region to " + trimmedActiveRegion.getLocation() + " size " + trimmedActiveRegion.getLocation().size() + " reduced number of haplotypes from " + haplotypes.size() + " to only " + trimmedHaplotypes.size()); if ( DEBUG ) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 74f7929ee..38ed73c40 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -64,7 +64,7 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleComplex1() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "df7be117bd3d256c4a5fbde925ecd19b"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "88c10027c21712b1fe475c06cadd503c"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index c7b242249..fca8de330 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -63,7 +63,7 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; // this functionality can be adapted to provide input data for whatever you might want in your data - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "55faaae5617857e2b29848367999aa3e"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "3ce9c42e7e97a45a82315523dbd77fcf"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "e32b7fc4de29ed141dcafc0d789d5ed6"}); tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "ecac86e8ef4856e6dfa306c436e9b545"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "7cb1e431119df00ec243a6a115fa74b8"}); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 645ec3c4b..e3f7bd706 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -233,7 +233,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("58a0089e6ebf7cee414adb7a6002d43f")); + Arrays.asList("f3e636d64042e766cc6515987e85a968")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index aabc41f36..8937e8868 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -61,7 +61,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "6f8c3cac54eb1460e2c65fe00978b1c1"}); + tests.add(new Object[]{nct, "e4bf389676fa090c95980349310ba5ca"}); } return tests.toArray(new Object[][]{}); diff --git a/protected/java/test/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparatorUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparatorUnitTest.java new file mode 100644 index 000000000..6d8fdd6e9 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparatorUnitTest.java @@ -0,0 +1,82 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.haplotype; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.Utils; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * User: btaylor + * Date: 8/1/13 + * Time: 11:09 AM + */ +public class HaplotypeSizeAndBaseComparatorUnitTest extends BaseTest { + @Test + public void testComparison() { + // desired ordering is by size first, subordered by lexacographic relationship between bases + final List rawStrings = Arrays.asList("A", "C", "AC", "CC", "CT", "AAT", "ACT", "GAT", "ACGT"); + final List lexStrings = new ArrayList<>(rawStrings); + + for ( final List seqs : Utils.makePermutations(lexStrings, lexStrings.size(), false) ) { + final List haps = new ArrayList<>(seqs.size()); + for ( final String seq : seqs ) { + haps.add(new Haplotype(seq.getBytes(), false)); + } + + Collections.sort(haps, new HaplotypeSizeAndBaseComparator()); + for ( int i = 0; i < lexStrings.size(); i++ ) + Assert.assertEquals(haps.get(i).getBaseString(), lexStrings.get(i), "Failed sort " + haps + " expected " + lexStrings); + } + } +} \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparator.java b/public/java/src/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparator.java new file mode 100644 index 000000000..9981ce495 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/haplotype/HaplotypeSizeAndBaseComparator.java @@ -0,0 +1,47 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.haplotype; + +import java.util.Comparator; + +/** + * Compares two haplotypes first by their lengths and then by lexicographic order of their bases. + * + * User: btaylor + * Date: 8/1/13 + * Time: 11:09 AM + */ +public class HaplotypeSizeAndBaseComparator implements Comparator { + @Override + public int compare( final Haplotype hap1, final Haplotype hap2 ) { + if (hap1.getBases().length < hap2.getBases().length) + return -1; + else if (hap1.getBases().length > hap2.getBases().length) + return 1; + else + return hap1.getBaseString().compareTo(hap2.getBaseString()); + } +} From d5dd3b23db793058cd6dda7a15cf8a395f1c7a46 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 2 Aug 2013 09:16:50 -0400 Subject: [PATCH 049/172] Rev picard to get optimized tribble feature reads -- The previous version of TribbleIndexedFeatureReader.query() would open a RandomAccessFile each time the GATK crossed a shard boundary. When running with -L wex.intervals (or any time there were lots of intervals) we'd be opening and closing enormous numbers of files, radically slowing down the GATK. With these patched versions of Tribble we see something like the following performance improvements: SelectVariants with -L wex.intervals on my local machine against non-local file pre-patch => 3 hours post-patch => 30 seconds --- .../repository/net.sf/picard-1.91.1453.xml | 3 --- ...ard-1.91.1453.jar => picard-1.96.1515.jar} | Bin 1645927 -> 1688674 bytes .../repository/net.sf/picard-1.96.1515.xml | 3 +++ settings/repository/net.sf/sam-1.91.1453.xml | 3 --- .../{sam-1.91.1453.jar => sam-1.96.1515.jar} | Bin 620475 -> 623321 bytes settings/repository/net.sf/sam-1.96.1515.xml | 3 +++ ...le-1.91.1453.jar => tribble-1.96.1515.jar} | Bin 265537 -> 288992 bytes ...le-1.91.1453.xml => tribble-1.96.1515.xml} | 2 +- ...nt-1.91.1453.jar => variant-1.96.1515.jar} | Bin 556361 -> 556681 bytes ...nt-1.91.1453.xml => variant-1.96.1515.xml} | 2 +- 10 files changed, 8 insertions(+), 8 deletions(-) delete mode 100644 settings/repository/net.sf/picard-1.91.1453.xml rename settings/repository/net.sf/{picard-1.91.1453.jar => picard-1.96.1515.jar} (81%) create mode 100644 settings/repository/net.sf/picard-1.96.1515.xml delete mode 100644 settings/repository/net.sf/sam-1.91.1453.xml rename settings/repository/net.sf/{sam-1.91.1453.jar => sam-1.96.1515.jar} (88%) create mode 100644 settings/repository/net.sf/sam-1.96.1515.xml rename settings/repository/org.broad/{tribble-1.91.1453.jar => tribble-1.96.1515.jar} (78%) rename settings/repository/org.broad/{tribble-1.91.1453.xml => tribble-1.96.1515.xml} (76%) rename settings/repository/org.broadinstitute/{variant-1.91.1453.jar => variant-1.96.1515.jar} (91%) rename settings/repository/org.broadinstitute/{variant-1.91.1453.xml => variant-1.96.1515.xml} (71%) diff --git a/settings/repository/net.sf/picard-1.91.1453.xml b/settings/repository/net.sf/picard-1.91.1453.xml deleted file mode 100644 index 5d1bf41e8..000000000 --- a/settings/repository/net.sf/picard-1.91.1453.xml +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/settings/repository/net.sf/picard-1.91.1453.jar b/settings/repository/net.sf/picard-1.96.1515.jar similarity index 81% rename from settings/repository/net.sf/picard-1.91.1453.jar rename to settings/repository/net.sf/picard-1.96.1515.jar index f196ee5a43bcee0abd96f130acb28135c89792a1..8d63a71c3cb1dfe46acd92083151269a8773a024 100644 GIT binary patch delta 164744 zcmeFacVJY-7C(N@?7e$;H#fbI1Of@21PGxMI#Q)~LJtsnPXIw+X^M)}1x5jBDj-;> zSrBQW_*7KvUBH4B73>J|{hYf?QJ?R<-|zeT^QUC)^f`0p%$b>U&dh%PSGO#Ws^lq;Z<=T6y~`VS=bFoxcW~FOoKq*K&hk&{8yQFnl;P4wIq@{- zjF4wISN2HEwzXfK53J_8TvPAd<{GWn9E*(6+4mUHdwQu2>||+lcHr8;6;~G5+*8Gv z>18ra7q>SG2nI@b=wl<)>hMn$5w$f;)QJrkl5Os5Xt_tW1BEQ)V~ zI$n<-VZ=(&kGv-yT2-D%{?-Q0M&<-2wi=-)j!Aaw>xJZV(W#>fyB7?dFn(D6 z=hl+;gF_PH?L4bS(Y$z_P~kQ6{0u)fgu(bJDtcfs&oOAZ~2u zM1WgFi@V+uPjpMXUVb4W zAuw@9OkmlG5>eHMj?FJDeBqBL4-*j}9@;A2Y5l^Z%dQ5ROzVl(S5A9UId%Nh|CLjl zvxzom>q(hjsUs7~YiB%^?IaKRF!J+7DMX7>^sW)^P!nk!EcXdO$+!y@=o7ZU)rKe-hd#&L*RAhm$XfZ?i=YdMTD3^3lVzMPjyE zv`wmW>U_lA^o&!K{1$EZ&9{05W6`r7I^m-ndd^4xqUWW#7o^0CwLNsw zM=#OKQuGz^o$}GE^qP-S>2-_V@X(u*`7IwcrnjZScPx5Wcdg3h_Pn;EmgR}fA3L^N zLH@A7=wM_0$Do+Fz?q-p^s@IkRS)`+J-Xp-$*KC(d^Q8to?!eoi{1k)7QJuL2S74F z)fbjTrUu^Klg=Y_*PbOx`E2@-KCeS8G*Eu@vhHlg3cJH5(CrrrMgbj7lD=gIww!2 zLYuy%4i zv(1XWVX7K9Td73gR4}PDyKFYlOE#&4&2IY0V#{U^d!>v|=hWn~fr|UPMxN#fK*hGr zksK9xVqA^D-u>nE@wM^UI%h@X)IFUJ)Kz+7XHE|sIygi}9ZRgEpXs4$1e!dxPCxjT zr;5(rkx)iYsK-fq&>c~3J;RQy>!%Eidkcq%Q>8QTrBW+uO-yazS5yx^pdW5Kgx&bw ziZ2V&!f`S9c0|rjh18k46jE1&F;?N!2q%>#p$EJezi>jLB>2fC<$6nUAIj@XeG!Y{ z4d=P3>H(5vAezAd!_9Yq8{zME-=>K)3GoJj>YV(ok96`&{wQ>u(33>;q&^_NQKoUYjUuOsW-6XXvyI>`ac<%DZHr;-}O3tG@JZR9X^^v zbDcUcrKAo}(=UJluyI7@UaC+`l`@N|atHBe6;l=bWv-zSf#_qE{h{<4M=7V{Zptza zQPn(`Gmzwpt65BD)#u2nx|MuYi>cOrs$EQVipkr(n6kIhk}MQ1Q%rU7iF%Hz$JKKm zqSQQBD7Jq0JOjRrVuH_vuR$^8cHd788*imLarLZF)}Brc|2}V?TPog9jhs@Y?x)fR zsd3&l;F$@67+fas%fAMBGXPXssz~Jm zgPtGeo)eH+HQPN!Q|ojLkpPh)|>7#%LLB6P|4M>NyEUaFVyv3PYnz@T|K@URkwj# zfLqQ6$M0rQcg;Vm_`tFcD_{w)Og(Huo0 z%FxSpdlI=xpzHf}ZffVdGe^BlDP%TFp)}oKkEfJBCRG{*y_n!M<*5RQrXoVu>1{VD zOo^jp>_jY6CAcB(SQ_~&Q_4GxwAGD49|~{i#4EVnS>`g0jGPk?xm1 zA)B zQ_7$B))Z-x$D)Y9nNvLjD_*SvZelYzgPwZ52-9DvQSaIm6|aw;_r?cK9j~q@jWpwH z39Qm1g~9dVE4%|lPTs%5_fl-1{MUf)*J5T;6>uxE6!r%@=ME0e3%q>h_f_;meF!?R)D2OfS6ZG*;QV!MmRB~#fNi4 zjGRhV)BO=x3f78mokb5=v>wO@XKy!te8J>fNnh&^nXWbv+YqCx>-v6y?Y+} z>;lI|P7HB}!X6X4O&O2Po8A>|IFtXzWYiZH#4e9zrEHm0zk zD9k=W#r3h;vB~;pZKQBd-8I6OE=(lq`uKvIROuDy{6#NrCS1jHEkSqL>q-y&{zU{g z4YeNA-5VJEQDR`gmkGv3i#7!YpRVTWPCfLlsa(plnI5$1A$oYv^Is}1zePf|2%H2v zSR1>xhBj>_95x=YX*=z(v1jS1V@E0>&m_~PN2#A)o)ec`xjQ{3+^eIFo#h}Ko5|WB zr0NzuZqpOA)23ZiVAF2eqcfh3O-Qum6b4XiEW1IQ_S1nd`PRJF3*>&14%#viEqV%E zn(Ethhz{Fygz_!gXwy+TX3McGaQ5p#+V!)F6^5znw`b7!AWx#6WAfC%2jA?~D_-}M z(&fgfc%3~evYbwRHX>0sz2HvKue6OXuLnnP^}vvC59`3f_%eZ>-?i}zfAZkO2{eEj z>L4VHuP(mq@P$yEY?T{;v~vrwTs11hj7Jy1HA1PfC2s((fYhdviWR9Zrae0cgFjb* z)K-$OHBy0Z@`sXTx8DGNNN$S{5DHg-xvAk>CAB@`F)9G16H*mU4wCDp;$0AlkwLal zhHjYgj16|^5JA}3x)3I69NNqW?)kn^yMDoMm0wHNV0mq2=JZ$`;gOsq3 z5&=w7XeGH(-zbX4#^*-+3E}pc24nNDocg->;Snb8&dfqJvI@9xQo07xWOQCmNzy$h zSGWopH$h;x}TcClffY8vYo0)$pcglRAr>dCc6AS zs^EAk?xRZZU^6e<_!Q#+Wh&~u_39zXgujaTlP$k}N0xs-WjRM0!|8TSbcS%~u`$jW zIDAyW!J|F)$Gx#j9*kY`DC~bHW8-)`cEPi;{|$h(3VLLB+#4H%;WPpx)E*3bB=WjI z;jJ+SV;tnI3L~!ycFAKM3Ip;A9PEXn#sOR7QCT#(GOkdPMH4KVcnS2boWq<#%aAi+ zC3S(iu)1J%tbuP%OD}kL4AuPWDm1!N=}oiB5X~xcO)?b|IDIh{yU0ry*Ol}h$GVJ- zB0Z}&MDc^w%Vnlmt^-t40lBq`sW!%ffuj;z$eS6Lgpd%2fu2JmMz$Uj%~}2l^bA0o ziKuR&s&G1+jSwmYlI}!*#J3RBM*-{|K-k>?z7Ni_SesDUP#Qw_BF;oi9~?TCfKKkm zBDNHZ5{?2WEjy4<$rUIW${)CvU$*{E0UZ;=C3&2#8pG|)DVSkX^~@OlEN2F4nF$Qf z!p?LymBJvF#vnF=P^A+FvMXTh4=5J_hNWozep;qi#PXZYp8HtY>peNayt&^eV-^r=ex1h3X} zD)ZVG z9|VVf*kM+o-Dq7*RGD&UAj}xWHyA62La9u!wjmCsg!qyj??eqlQ~`=WFmsve1ot*z zkBXF2-V>i3I(H1v9t$jsFYO5S^dAkKF58XxxZxn@w!U*exsX6~A>o<`*Q-ypn2Yu5O3INNNmRl-LhuxR+A!fy^PbQ?&zBlF8UeA=H%;)TI?u z>3tL7mZK|b^Xw<(ir|O-MB=z0if=QSj(TU{NI4Uyr&(C?XJZMR2?*z6b(rU{(hVz_@*I4TBlY7&&?_|UC3=Nmn_hgz zy6hlLvVUVp}}LQ`4Nhqe7Pu_5!27kvTboJ@Umgky=Bv|rmjt%L5 zuJrok?D={`^4C|6X1dvr-1ue%P;m2i{7&M3U?#zprn-{h!2WA5jKckuR6bZ_bnW;i z!3LGpeA5A~>H|GHmrNrnnRe4KeP2zLlWta5X=%>rUwXdt{gBpf*Rdz^6)Ua|?bQK)^s?^foKjU*yP(=^^ku800 zjY`x}W3Owrf}i$LFZ{=j{cF5xljcCb|DF7U7qf^iW?hHA@}H12;$HP*mXr1Oac_Ox z7F}p->a%r103_?DW`ZcgboIo3b!(^xdlP(JG#^$y(1Tuw)i$4_lEYNK{$Gkd`=TnP zvz}Kz2f=6e&#$W+Kd)K=!J;Y*o_}6_5?bqecX{%=?b!8gSI!|;r_WBVa06{+PO3+h zQ+?aGjgRP9BKfN>)n{qz6(y@i;w$R@SSQcq@2fRAkIjbsm6JT_`t%tm&-pXzJCD=P z1l@jH4(51{E7abpGb&mS{6@v-w6C13SzoDIE@xEz^7-U(j&n`D`csZ{spt&vQ z)&3C2{%!cHs}5l%9dbKDFg@p@s*Mc$E~+D;`Sa?n-D>KYzp18(ed;%LuxyAk++NC6 z!-3_IfvX30Mc=zBhvn<<=OkCU-uIABwb^ucjNVw*l^k4><~kNa+5A^C^v}Tc&lH-$ zJLf_RHEwRQrJtB>fk!z~MEzn}R|$RIZN)jVM18u9Ymd{bTGM)OOF^%oDeRP#s8P83 z^)`cFrMpUowhevfCG^md6 zsWctzo9()En=|IaYTi3f_uu9chJZ0IQcmoSO0yr=Rd={#k1~FTYh@KD_WjrAWNP1= zuGWa{`lc(>bRZr5{+5=VfSyj;|D-!nu;bNNPrFL%=TEyLb+ZLG&#%+(xw?cjrH|a6 zW)`7;MMh{;ZWO!iLzmFYogccUh6b?vdv)tv!CG~NSHBCkgPa~+@naYEuEFLXyRL?? zUGCTW=R5}}o>ThOwGp9Qs~`^*+EqsYW3UJ?@+0yAf0o;s>t{DAcjv*c1c)`^z#Lec8`0eQK zD5Bj_p}sg-FaP24{B^yl@r!G3lruCb&&}GRzcG!5sHn2rn3d(k#<(y3tRKrU8YA}W z9HT~PZgtYZA07cdA7R~W5N1r5+uS}~vZ3MqYxRS2jU6HEMm^ScLK1Z2N%4A8XCp)J zyb@*Tjd$IQi1v+)T#o`9@E*Vk5(V*Y;+Ex=S=SFSM~4Rjr!i26ag`rMLxZu zmXV{!^)Qw@(3bmPd1vkJX~;Zm-P1Vd3L&sYo)OZXzcajc_eDV9q7VXiF2+L6oJXhobB|K2Q}Gt>~GE*xsi3ax5+yJCL01)Sy< zmwvSqR(Pk{_alr_;c9ipe4}4*{Rrc$5cy&SODSZ^MFmjL)3EcV|0jF2T4mPs2z1;Yh1K+MyCr>v= ztIb_HKi>>BoBU5!7kYV~5f26>J=ZUG@y6&=(T1nQKiFvKE)N(3^oIM4(JAHrvBtRz zj8v|ryG9yGN&mhc{q0C&P|5$W6VMY!8BI7sKQRgziPhhXGJ0}p9ete%u?(&O6R(T{pOr7ek|cYeK|SatE+v4uG9?9|HxSH?)nrNyqJ8v1RMNi zl*lV6l~+<3UQOlsKB~fNs5-BsTKo{z=PlHjcTx*JOl|lG3$7u?_ zxAU`l@@K|d4PN+vQSFO#E1%SJ&l&lNuaSpe2SjgBGQSDw%Ue{L-`2mJGnS>Cp-4Up zAU~!`{0W4YpHg%Fj5_f-z43FSnS+bQWqM6t4}7n8$+js?r+)+5-1(IesneHXQR?`G zAz118g^};#pY^ULiShb{uZ=8cx3KYzrD@xNX!+})FMMva)1AL@25G=I#u&H0c+r@l zPh2!If!V4s=Lk;z!AOnIREoTOk)rqq%1|z5 z=P2FNGFx)AUSpY!I7Yv1nPt44AW;Ax<|V)uc0 zpx|vGax>R1Ial#;9!1S?b}GbF9WN>kRe~z1Qgn+-$3`N9@>E$Gtjf_8Ri0+5iU6n* z-K#3o1F8z`ga4q)qGwfA`ao5u^Qs10Du*k@z>}nEaX(du2dTO|MAhTrsu7P;4S2N5 z<-1fvUaT7H7@s+`{ViM$hyK-orYpnbhwj5EFOMT3*;oU(2D6*iLOdXP{R^}XK9fh| zT=oD2R097JN8e74i02-C*k{i1vB*#0JX9{&HNyOr{kM`!wI{dgKwi~}%Bs!~t#zTE z@b^PH(?e|ks_plxapJ*o!N9+gi|sv&e(4WnafIQ^)`&>w07 z4&xI!T1`SnCv!D5g=?#++)z#DW@-kvSGRL#HIw_Q**vJG@}t*tc)Xg+Gu3>arxu|1 zckps`C$CfY@Mg7$i`5c7rk3)vY8jtWEA^#Fb1AP2-Vp1p2ol*u zwbW+ns2&7~Y@{(ro20ht5i#cD4iy`V+B`q54biwZI<}rCO*e}*%lm7pya)<|B)z`Ve!Q_@;Y!hKFzW@JtWS^6+dA`#n6z!*eBZo)nyK@d6lC z@l**wn^T7G6bIUzGJKafcZ+k6IQ~WA-fQtv@C48zrzQf;aPn5&RH8EY22-w|X(Vw|V&y-Y&i! zK7N$_k6HY<5Avg(K5D?be7u|Yh_lz?eI72BE(JZjUw}K{<0ly=V$1m{srr!g@UX>4 zd|+NjEj}g)dfG?r_;>_A!_P{d6XJUgOTP1;+spsr=OyX|abA>ozoayhgp(eANt$`t z;#WL;%EPZl@N4{f1ivA@c#UB~){bF9)_`F`){fs1=UorK=S9r>;(Q>vK8)ax_(2Jt z7UzsOXT|wgu=I(K?%+=)-)9!2bNqRPpC<4Z5pcfrQ8s@i0DLW(zL9!h3fF+oOTWJL zL1_5BI2SCw=;0rvwjTvMKY93P8J=G}{Huq5^YA4fnD*}$UzTcspDU8}4-a4UDCh>V z75iwRQkHUgX|Xc=;+o>PEoA|@$|H^!g_I91s|ZWkP}A{LAVNiYRFp@-yf9Hv`-V8L zsTlDc@~BvemQhi07H0_5LbEE~rxH}6rILKKMV0WVWcj9eRH{dn^r-Ao9+l=%r9BE3 z0bBS%OQm}h3TLabKH8Tiv#;H(7m2ai9xW4li6I`-^1sB6#W z9oqHCW4hd9WI_74QR7FAn=&qa(v}i-7c^$@$|8hfkP{r@o+{J zrcccu>z`6kH67uC$&)8chUq90CQq0)zPi9LtYCQll(9wW6NabPl%#1R^NWyPklt?W z*eQS@zfxg(AsR-;!l9E#4Utx)g6q&%O&>C)D1FrU^of)6hZc<*THqw2)hPw(BawAT zK|##;^a(=>3notm@D6yvacSl>S6ZndL&tUv^}Ur8$S)c-VSIOVzo=lubbVpByQHo$ z))$xZk9mS;)6Ep6YaQpx`chf*2hP=VS9;Tp7SoFg^rV`;Wc|ev7~(l=aI5m>lK9G& zs$!{3%p1%?a1slL_H*UUO4dd2E^q^V_V=jzxr47D45Y0J z)NoskP$O+MicL$6w$&Il)>h-xcw0@7+9s;8dT_Q`(ltr-*RK}Fm(!cF%@SoN+p16% zVRUUZMNPHUGqA^DSsP8e2Dz38?g*n?LeF$4P?LEC+LL$N#czsxGVtA<%4@fA9RI(y)+yRGh5 zYoz#EA;y|fYMpw(R_n#tpdQe3Q$3C2k-E`Vo785T-T}37C+_TM@zI+H)kwV|$INs+ zq#o8MbIb}cThvxtZBvg}YP+p=V6eHSS*H}LKqq6uun#IMm^7te{Llg$0=LI>x0>dT zh{x3vw%Vz7G3=8b%P~`;gzwmDw_tA%Y@W}4T`kzZmg)1BO&I|@i3t-XhgnDK$)U=? zB$+%gxSu>7La3M&{X=V`RcT{d{$$u;B#AA@jV2@?M?M#hb0?#JZz&*(UBXdd@gs#6L4DefC)|#0jzP*x=$OW`Ox)Z-9FkAveB9@D-i!v2dIy^`FMp_IM%mkIxp;34}4f|DFny^S<-~ z;z1pMPkl+szAPpL7~{VupOWNP5fA=^#@+xBir#d9ibrf1fVb#vDDw+3*enAcuI8o; z?}jtHh9q#LaE6<@;D28l`Tz+qKo2L2LBiAcLk~~m1HSn8gtLeOul#$$Cx`<73@2P` z32l5P^_@dJ80(E<{%b9LfjDs6znA-3%6)@)u;Cl!t`++h2EAbX4tl^=oiCOI=L?&4 zeB;Ds9R))u{1s@#8sWr)V?$ww2=ER>q{46%BY{aOaocdh3Y|{6&F!TN9J}Q;z+DB=b#y%>Y8Bb;QQ#uZBFr~$73?<%H>qHI$*3K6tjkaR<5Y*2Ij-v{<2!=OtZ0lnmC7Q3mm{0pJzz8i||#W3zM69E@P=o{qBi19HCU9KFA!;Jl(H4;EiG$^_56a7CasKA;6;r$% zn$z~vExE=)N)!_hSWdP)K&_a9)H+v8phFRWKi8e&4)>s;_?#ZJDWk5*$H(93L0oTu+Q)cwD@6m4aCY<28yT*3u`fE7Vme)rj`q9JPl>(O4}WMV=3ehe zyIc!(AxD3P#q|Jm*CTK{16EUfPIt3&y^<@%dw>RtBDg^ch;q=4U@;Bedi4clF(e@$ zLHQ0j0h^%fV-YtLJ@NOk9^HENcFDPyhSe3Tad-;qTCTb>c7S8Jz%c@Fc%^y=jvC@~ z;AoKROLV9BP;_uzccLY}g28pIL{He~*?RS4qI*5}n4HHM39z4~FI z^(Pn$7A9H(<7fxQf2i=!AeMGuyk1ZM8+`-D(IFTSgIdS@KY=k^1Tdo1*kT%YfUp>c zDC5S6G`>C}6LNjX7@lr%6Cs|$v`g_Fph--A2&u$78K!IdsSv|owDszdP`y(UJ?p7+ zqG#bvz)fXZI6+3~A25*qca#<^bh(vinX?0tOOs^3U*3?{|0N_wjiul4q9FgKp zi72M&Td#hSBJCOQJDy6#Gy|4c%q{qK_QOoT5N69W9P@>hFbY`(x!h_Ph1^GJ5MyOQ zfK?5mt4=T)$fFG~CD;UGgv~H8co@b6{w*+o*@oq|!<6+4QCkMT_U7eIN2=h6wjo1WuE^a2OzMTkOA z!j9x6$o5`VW#|>i?M|s$^eRpRuc=n_hHC4lcT`t;7hM8m}9idOvXY`r+kQabO{DFmt9ZM71we4!v*^!m;VwLLW^;uls!f*_8J4( zXXJB)F_NQ;TLJ!b7K?*}ea+K~0l>p{Sb24@dJ~fW&IdNWKE5nJi)hm?i#6h5o0J&qyN(iBX@| z*aqQpX#iszDtNrlq(qwwso9-%9ad{eMd6nCU-A%cUOKECMra@sV1_Rt?gMeWWBB zyR5)I5`(gpC$Ou;W^dJ2y5k_N23)!mqG4Mrdl&2%!)$z{_`;hKvzqK}?o-%YW@KF( ziu-}v6jRXdUlT%eF|F>_bfK*s#5Oz$HeyDD0J|M=8l|!a!Ldha6zP4 zRNOEpCf;(8wg?fe4ARz)RpYi5(<3sCwr!=js&U)Hq3v6N{Z7@MqgvOvcxJSi7318e}a4`L3Ja<)%dmNS2HLAuvAz#`0R}CFy>bk>pm}^xnrkyu7#vFTn zW4tTZn}`{ao$E8|iZM5ehE6@X*q`~sB-i2bxD1Y>Sg3WCNkY*?a-X>a!h5xn*4wiF*uMrgv4njQMDe68bg4*lEG zP5(Z)f(YOt+}%(KnFoo+{S=wvmEP}lSW<3eq8W+;BEnHoGFwt2Q=+gX_Q^zvMs;9c z(tdGDba=M>@ip)hltB(nfvMOV5f3E4&4VbDhd^;W6i15!OoI{F-;czOaumpPG|lHR zmI1kb?#xAP*N z3B}PY-o&%{VfH%|b(G?9ijUOAn(tolMPbVo@65SqOqZ+NoAbay@8)EuKG~dXxyGsC z3r#ee*Ra1Y_rr|H1eE&{-cFUL`ETvKn$*Bp{lBkkE7`2sEnU0@|Mv!)X@h zhnHc36r`NRL%h9E_&36WTo0b)=1vPSaK(3(D&rd4-zox3j#!0<#&~!bM%r0u^yCh9 zeB~R98#TFU&YRFwV`GB+BR~b@ie+M_X)C8m}Q0tTUW@04UL3DTk|2 z>!Cku_SXx(cdqF(q7oE}^49VE;xz7a|?z%qa zv<$fnBzNt5Q3=>Pr$hErmippmW;)r2pBZ2V^x)^CJUX+9yM!ZFP5Vb3P{Y1e zdg)2iqG|;n8EEci-mLo%Ht&sn7)IM$AY0vvE!#HSA9_UpHrVXz7@{OE%HON|<(nN~ z_j9RCbdBIU`R4mBUZp#hi=OA?@E)%4oo+JR90WwHA8wWmnL|xmyQaN9HqtDB=~0{| z*DGNyl$caPZ>(XL2o4x!t_$6II1u%(A2g3K`=RuJF=l(0SqmHg+WN8aW*=`|EMN67 zzw7I$31%N|ru(gnt)TCnV3u=GHr`)<^lA*uYBOXXdvsNDW&PU(Q>^B!iRK?JFRohQ zAmN26a#WGo8t;DeEHb-s8NIE@9LnXjJ;l70<8<#SW?RRmJZ z-DVk(ro)t0i25L)(jX+JQaa~?Dir~i8m7kjfdz3*0CKI{&4Af}YEymv z;_YTt2UpLGS};Xlz1_jppqXaF_}KP|PouF6kj1xu|y<@4_iF;`dm=$!HWoD*1 z0Q;GNy3;Ze1Hky5uQo8OYb2NgRHkq>>5JvA=b;pmGKl10;1EuPXTcBv0zS$YJ zg`3TNZmy#nKVsgM(Ew+pRyZx@;`rALitXl9Lbae|bqkDDT2gt{TAzEw)K0Z8|5EDR zPq1Z_zZ=z7*KciiX8vc}&2QuQ>EIiO%we2=f)dnouqS#R#x5_y(B&kJR4>tZ^%l%| z-lp5tX{g}O&}wxS1}-1dA$1O>JfG8>>I+B$zQW1=Ynb%>O5dyVbV2<(8o&i(Az&=OYs|h`;V%4K-^VGjA_jAsHlg9)tW7 zHCJ8;KclI)ilLz@g=VW%3aFBRs8sOOCuTHfR3uwfq9h0_(o{`o`D>v}Z5p8JP?2g& z(^R|S&&*V=(2b&04T&j}C)kFjs}jz_HKE+9qxNxgRUAD0rJ2YwI$wdP z>$*%T3&BZMmxt;?crwuCqronl?sP@c3Re`ZaYfUs5U9NFilet&3G}Wjk-l&x>9OCK zZT>VtuU@W0^m3h$X)1=koKDVl<8vN$PXcZ1DKNf$-8Xy)NkK8ofv50{q2G8U(cIYpaYbsv@EY$;PioE#ry1GOT! zHrEjcNBep{NLK1gKu*rs*@ziUs)rlNx3M@)Jls^i&3xFuHuuTC&gX~Q(#Ng1wbY*~ z#oI*S%+^jEoQ&H0uvh3H(KtDE@_~7F7N?6iUB&5Uad(S*c(|tzC!pRQ?&IS;?i<1V zxPJr>;5HTy^zfhvjL~3m@_jr6H}8;lm}DGV;KR0QgvBF0JW66mTRg_YW5qiz0#XW$H@51 z(K%JD1pSC|r}13Pm(3J?M!6GX=P~(uK;@?gLlzkVIc?;C)Lm3x7=3cUKC;#mjul~?- zZ~fnPG5@R_4i5IZKj1P`EWX>~dn{gLiKQF1!;3Ax%i<*#FJ=DiFB_%c#t8Rl{$F&@ zL2smc4(o)cqbsYpBHi^D*hO!Ma^nTAzZlH$t2HgIC(P^?HYmYm8fLY{2j`04}%M zkex2JaUu3Cz5Mxjm>?&)OH|qclQ1%SPaai->-@MM%M|U{IZP=UH8#5kOeH3x1h3FX zb1Y}?QZlBdLK9YoO=FkZ ze1e|?k=SA-Xz>e<(Mj9?$>@Y%I=sEeY7iYU*#I*++C4NM zMlNG91%?$2nKB|+wY>W<`}3&{7NdCVA2F%mOA!>~n<`j#!k}Jk)$fE~;ih?<%(m;{ zyAhU~=3;_?8FxK?3Bq#6T;effoiC>9_4HMU$NUvrQzeu8dcqncV4gb#F#YkxtBm1_ zFbkb9=G684n~@(g|9TjrGhqVP!`sm{1|N77^*jdA+nYFh;HEJ@hX);^V2z;9RCIUo zXQo8&ql857{ZvEb#s&n(xv>dUqFx;2#VL8no4Sumro=)*R1(qnS4^cuB@c6klE{*l zmsP4-N^~{3F;yXbLU3Fs<1&)dbs0u?bB=!gu6Jwnh7>*JI zw^dUh|EszYRs^Ax>v<1QH5Z{mw6!1>lD31A0>xIQ@*_;M5) zv||uRJ?)45_&CJWM{$}w!NZ`3yaRg3wGdxFPA~HbST8*RNM8WqjD<`B&v(K;WfB0z z12VX5Z(zpk1&Lz!%Fm&(V&oNH5Dl{9JAhQERuG3rWFRXOpUf7+@hvC+3Wz-vu^iu1h!Z*7bAWpVFeJXibOaJU@g0RP!tu$p#M>H(llcI<7WiZ)$X$!)oJkt( z_>Q5L7?>6jE{liS+UKX^VVLiQ;YUfME8dQZpFyc+>QqNn3NB5r}oz>yYFzs*lccC;6p% zH-fnH?o=HT*PG!1lnQUeNQF#qLcNXR=t5Ql(D2 z$z2Up+QTGAj2JqGtdX2{8^RjNX}41@ z^jPk2J)NP&LVcoR>Jm0!fVZn-z|akCz&PpYP@O&SDOpTCdvB$L4yZuhV-!dndmW@j z-e|K!R=BTC;R&@V-AAoQDNlM4F4{NrMH1SE#Y8thTI>fn`!j95dUz|%b1aj>ttREV zQ8eXR;eXR^$a2Z)Z7Wf2g}ZB|Sl1fQ_57`MNenmN9SW3wqUvzk52l!Sxe<@-Q5W!+ z*3Ne_#=JKbk$m5T<=`0zCE???824KtW5bksNYRI| zv^xU}ytCjj7a-J*hY~Ff;`l}^N45x@)og&k073yE6R(-LAiy_xB=Rmn-ZjYkB=R0Z z-sd>dA@j|c{8g|>`riiUaADF^#?*Qjv#t-8{P*a6#BGAe_XErd11ze!lj=f$o|nBW z=)Wyu@rEx-oJCb<>0^k0)9_&5CoqY0!L}gVDXTECS~+EzdxJGfJ~!^!RdVuqc!?hj zUK+9ZHXueo5N7nRo2H+*0}VxJY~ zJhT&0PISu_2weqSZb`y_2s%09E>pCVqy1GN$tfs|3~vvIxCaE;&}GXd8+lx2Krszm zLsm)r?WRFHchg`;ZSD}1^OmfuB1O3>QJkyFO&l=ESxKbTBxircPR|TdQ7$}M^gW2p zVN@6l#{aO9kXb7gE2m=cy>tX>{)zEjl$8sme*k3)mzGZ ziKYJg%)fxn{|=Y$PjLMsSYpxo_rl(r3YSItKXpp%U1PKm9oWNyAB{=a9w zibf^#cQxFJi8rAO+8@RomZYP1{r5&Ob>-!;$3#{1UmL|-m!<15(G>mn%vN|nH$9c~ z-!pr{U_ejwUj`IY%zJG`#1k~wHsHT*@sz)xhU~$tI7CCmm|~b?Oi|z%Qw;B(2gW|4 z`yLtuV#mM3#^C)l@+ghkLxVy3_;=VCwwp$GKS*Qpf_@sihbqZlksY!PJpp$Mmflui zTk?i;JGk=D=AFocTU@Y6mah$~U;;?d*h=O*G@XadRo zPQ&*$e5V@ER0Gr2!`n^2&_Si*$;a=!h7DJo81S*}SdR1Td z(8vHtb0v1>0kpPSoBbfWTB9fSbGPIIy|16UH9nX7xtrnBxWBuP1G?c`TuDFB-z}=N z-}<}PS&^_ff#sMNXM`xc;kbF2dziNbnoCBVDLT5q-7m5fnn^=1NI`t+?#Pva4zGLpo>#HBFQZY(K6bGU|HhINd9!(FSJCI9L?p$ zsUS{8aVq(^GQ`N}My6zhEV(LI6JK?4vVB~GbHs-S6yiG1!R5!fTuU4nq}7odi;!{8 z4U3h_h642koCXq|>w`r>LrX-sd?8xS;^yLO;p1DlrTAJ&xz-WfhTDp- zoflbd^@)*<_(ZJiCI4LREP1;Km|ex!O(5(pz8>QA6sMQPz45Rz_pvxnK#t{V~7P=Gq%c^5u(HwXSl^9JUr3|bKX%N9_?c&)M40{$K!n1 zCBh|#g=?-Wq_G?#>h9=^-Ncw%-E-y_Z< z!6Dw6og`w~B_3WXUR+Bq%ga3?f(!s~UTN_v0mGpd+)5tE8p27r@(Mb-UpMXS&JGS6 z?RMM$|25ISUK1TL)xApn-;3|{q^jH|h=+@E*<6EHQY-7T4OBTj{|$Hf;E5UT2RUXH z$jaii7Ow+wfVA}VneI=GJMqg9a{u!i53XC;g!@pTYn_fDnK$sp;Ply$p?fxoTp3Rf zikLacWkKcq-Ib&y{r&SsdS?+UKV-vJ2+kI9w(>(3ZxboC*6STh8^)UB4y|(Nc-pBZBdO#d~eu$Hg|R%mX$ysyMa;LsD#9lY)r& zNq)-WLpC4AuX6Aae##O0X4w#6KqikT7etbclw%P3^V59XhG+(T+A9uz_yj*?p?BTp zZtZh4DbnmSa=|l8PaNV-_q@i>$&msQ?ecb#-Z#&k;Qp7*&qKuh$~^Y~B)=%a>sOqs zg^`ws_rucb_naT@(9`C-nky}F${}~?qIFePO zK^%Y1UjRX!Z2nTpe8nH>hZnfpmDs>v|K+x(&EN2GZQkLo7k8e&wQ)Nh&i6nvU*Pq6 zQV(}2y>qDBlN3>*uzH1I)raR77EKb|;I{4${G-0>4)*}S{*#`&&RtCp?%+0|pZHm> z%KpN?3jVY7##(M~u+N?DhIqQGW(N>=OF2kwG_^!l86J^&+DrC{lt;9_*MTG$JTC;MkP!7q+ z_8>kWcesD4{b4D;1@V{y;Z%`PhauY;MiwBIHzaQ~i%21Lm-_FAdz3 z@&r;a6X5=(v(J;4z$fQ>FV61qSi=goA#>g|3tS1&+f=04Mo- zk)x15fU_VW)VJg49~d8cLBd_G$Nvi=2~1JM#yJ;EMJs`ceLejZlsiQwUquElBIO!s z=nf<<3cyDc(D$uy-zFCmOGB_OcDw2OCmn-s}9H zO2}6i68^!BQE@$~xIX^m_fjMce;HC+TB)&c8{!YoOiK!6c2bQfBAV=`rU$8+6uqfd z|6Y=6gVa18&-sLG=2L=n3v>~?sU-x1t>iYN3qxYWwJyd>*(hC1ZQv^=8)8B+uCM6a zN5oY`NIYfGT+X27igX*3IU0I`(wNT`FrTYp=GMZ@ZH2dPV5Tvv!-(OLATdNFmw!8opawf)os z^3a|^>J|R+7@@|{Nq0ZU`T&Ga2SN8w;Y!V z82$PTvFY+#OrdP&9HMLr8Qi(++0G#~)HN2{clta$DZfAttimA@R3AbFFB0zw2dTFV zZ}lMc>42fjgIKsP;P2P*AVpyW`*$+x8i}Iy0RhOUKS<^M)vk>$gk5+{84@oH>;W1C z1T8S2iF#eG&kjZ?6I!TzAaDr2LqiuMh6#^;Nrct~B2yloYjUm|*+($2+9=$p6EhNb zDIEE)QzS!#y-txlKg`=iJ5VL3Z-A5>Kio|-KZ~a3jEexvztW#iFuTlX)A^ep?VKdM~dGNxoK=%e_LNo;0OF`r7A$9o%QcnCP26S8BVs&z(zCI9a zE(QLoV={{5(E|VuqKzLxGGhSBH+~cW$-=-g#3*f;%!lNLDclS9)`esHdwKJp%OAuM7FV@R4$_>R=zxp-!cju3Qby+@M)wDBl^-b`!;lRD zZY~C)JJ3H0{t3X}4Co>P5STs!{}Ygf3fCD95q5V zLdreoC-)pneYs+Ukr(^;BnM%#gOm-yjfLKXJV+LLxp{eQr%?l~%6|#*Gtt*7K?w%Wp$ zSbk$mcUkAY=t!O(|8V!NLr@eSiWIB)>N@5nyq-GYA-AdTe!vm-=?C21y^-%g5cV!m z{T`$r@9ST-xbw`9VAp(F_uT5f)%*nL_*DO~9p>`~x4LVYqIvl;6!vX%&rduLnc26v z(Ru-bn2WHqzCe9&%-Fom{Y8aek@YuJdI{pc-!Zo@qdQk1^ZNtFgjZp(cNw;VZ4leIdU)u*3beVV(roFX=~i zy4&+9{nJi}Sx@RtyWAnm{F*;~^{L*u%Mth4yWRhXz4wl=Gf_dfULo^x+*LUKb$ zp(S)e3rO!BDWMlB3IReVlmshsL9sUwbXx%xK~yX#B!CJw9D5r_XY8XhI`%S-Gbr+Y z*FHBz#Ce|g^FDvRXwE%l_pvj&+-uIS3@u98k9hR};+6f#wH`75 ztPEY8beOM?oA`b*zTX_)9~#DwcObOL$`4=ta^1V;+ygejKOG3QcEZn)AhcRPYrIE8 zK4jxv|7v+|5A=mSPWACE$Btk(_RZ~~pt<}}q%xfx6XDD>@_}WHRuMn#!rqa)6`shR z95H#^rOQ3NN`BAu#N=_8J{$BqSL4knRpZSlRi7`HHFAkZ^jbl{OA!RTUHZa^UN6XZ zyAU+#O9b?8mw?{w(wAGoUpUS!$alMZ> za$Q7UuZ`T@Aa`Kk?b0{N1y4r1^eu9^HCZt5 zL|h@ScFE;#DeXOScdwLox7^()m-}78#p@@{^q`oXB1YIFC3{FdK3oNn*AXe5uF#dP z-Y@SD$OW9dZu*gkn8QVogCCN|!>)kjjnfw->o_8WWaJ1>x%z2OABk|H@flD5AtFM7 z2*dIDh?*p@cU$xe66{4!zvStc<@S|i{i=RVF0UgAkgz*Fyje#}6@9BSf`iBUo%>Lz7 zLF7sE#G$o6McCZ@?CE3jTjuIt1m^Azxg6&r)N>0EEsm3{Zgv843Ce2_u|g3}yOoB) z(DS%+!g7hY38cI`Wh~$6q(s1~Omzh-j}viRoV19O?qFBF3q(BKYKQ*bbLvXdd_WsI zNWfF{Q(Qnp6BFSIC(CuRT_?vH?^YZ*9IBJ{|4TAYlw=-xCG_b3fg!S$WLmdiW?Sb_ zgNpB83oX!`UQml`WSaYR{fa(sgtU|Pmg_WNEHe0Ary(6&G5XEW6;ALMQBihGOU^fi z+q~@S8+l3Q$_taS%&hI{85KkR!iuFq-@{F?tVs>9=4260;Mp~D8b_TxC*R!nc4$(Q zCOnDJ6WgwV!=o4cm_(>&_5kV{Lsv~rqTG`9lK&VWO0 zD@Gb$x=y>O)86SIPJo|tokDrQv7z<%D|mPCKgXl6g~vrAnwcSbTC_kq&G+Q%bfU=B zP-oMBUl2-ur&w-7WRp+2PUoo8#p#OVO+6WPy2;%e>X4xH9g1>DdPG!@!=T=2=)>)C zSek|bEG_pF`CiBAi9cbS2|K-x0Y+Hlt2|ofUNk;P==W=nN77 z!RG`v@Hunn-O#yG=FaAomxHk|$qk7LBjw}f-Tq#vVR)!T=F_Usq*q{Yn)YlkCqD|) zLkZvFB8YviGcxLo0;iAoqod9kXM93d>HsMxxO~ne3U%8E-PD}^eyDHb$xdTmR_aWN zI;S~PqhKyfk2*7aRKAJk!26*YX)~Q!u2Vni6jNYlHpgJZ%X3Aq4??bPS;y(>I&*z2 zK3(Ks@#7{$sur4$AqYC3v&by`Fw`ZG=PWk2eHiMMmFMI1QJhk8!hEJtRHF{_#j4IS zaB)Sf>Z}k%9``6HdK$$bhG`c9%Adip0=aqw)tDLcuxC{d7$Qe6IKomdHD$hLL z9C-eA>2;&hjDWm2gD0ACPp7v`Ig37W&X&tL&go|3Q>oc0=Q?u)_m9hJ=X^7uVK6V| zI%~lFbJj(j3oPiLbCGn+F!Rxfv=(9KV&@WR-Ahf{$DvMXmpPZa&J_Z^cS@9<-jinh z$Dti5S2zd$Fn-|D^)_R+hw>_l{}#%3%oCr++M93wQa8dR=1|Qy(^67Q+UAr< z#e1J{@&R$uYb&$eWz8f7 ziB4=8WoPfpY#--WklCJ0yc4GUDR}Is1%&RIGIqb(iUj=_wZ1+tUti!Wvnvr-Qh$nF z2611KuP>`tII|4oT$zgd=73c>i=6W*{s{R;ewc||rAW1s_>&~@W=#^30&^h|Fq7Xz zUVoN=SW-kf3DpUR=Vw?}w=_l%xo0<0@I#s@sQEi81JY`}O0huaLpt ze*JsZS13o#S1@<{M*UO@@((^Rpc0=O@SFJhnN&Aerd}*lZwW<5#B&rb{~ zxPBrlaF7#-)bkr#sj7yK(+Fat1j*$p)vfx%2evjstP&q;wZjipb%Bcg_Un&TUm@MU zP4pp&BW!GB zgDTrS!S=DWltqr69D7N{-qM#-uQeM3!Te2W* zlK|q#qH^;oUcQWB{-vKsoSVKePZ zs&goPGm#TEtTtkA>5HaYCV=+*CjjL8*s zjOwvSC*_Wkawp{1dsS!dI)gfa`|sX-IZs7eb(qAT${o--zK!k(DcDH<#2A>C zEZV0;T!fFsF-7tQ%iXcknE~6hcnIhIX|B&8@cn~oh)k~;G^|Y=Qq>88-X!MQRH)Gc z=uR0jycewO?qgc}@7SSyYWpG&0;fQc@Ou`;lFpGKOPH|E)%6*94ag*zVk^4_$2g_(&>EeM5aAbH{mWo#Hen{T~IgUyDtrw@Xf2kH?I!gygGby!b?k7 z2Xz>!v`Ea+mb3=^fZry100l`Re|4iBd)7R-s4c?$)2S7ev#rueTBXYWJECkE3DPp$ z#b(OoG-QQS{c#};MD$s{b#}1`ef!h8vxVqc3HLrVH)*?oE!8Ahf^BkA4r218*gUz+ zuSp6Qn=*d&2&PZ~XqUr~S|GEeb%k0uDr9X0V~b==EyhAHwuC{ovx`AV$1idmGsC?RoW(2R`s&1>Ls01)0&9Y6CJcF zh9Flat8_}0nj(=tY6`Rz4!H(mXG!~un&NE2`f1jvDb68VQB$1Dj5!bKZw`lrR`Yy* zPe7LTb0n2!%$7Ub#cO(oIDt!t#}n!bTQB=&E$Rx3M`;H|z9;H(=&B!uqc>Ex|U+KX(qD$1VkZc0Q0KF#pTg`yCXLKcW|znDp>#6kgpR&Jaf+LF5sDDNb`m9uMkz^g z4iWAeG*#V@jtf>Fh#hg=gMeSbQ}jeX6H;$M-6S&9+G&}Xu2H8xT<}ZYQ%(BW3d2Y@u(Q@3ol?gn0Ndg<9H}eD!0-Wvct+@(h(`k?RIA$6Y!dLo37>%?H;n8i5s78qpoUHLb1FL%#_wu=I>}n>S z4869oYe*t74%@~y@PPVJzHqnkh{`{xt`(ZJu|jR)cug$6S*Y)IRcuE}+t_BENU3IB zv#c1#v{)Rf762X%(KFHY7IUCvwpxgWYdHtW&c{&T3Ox^!IiF#7afW4YBs{Le?V1>qH4$6fnLi6IRR<6qt5(EW zOz>ichtGM}LX_J@BixO`>Yhq*zTQEnxym7+N+h z7y$Mlyt|kV-7RQ?@(YG_Vj&e2#NRT_tgH9kq~tIuu-AY9f{pfo%!pC(XF$KMHTME6 zAP-i}Fajhd;9fO~UfWX*8Muele4l#guzL8!Sz;hNH$We6rVnqWA-8~XxD}YtZOHHM zRHy14;C0-EZ>?SGEPaoFn^tih7bhl177UG#r6nARw1fXb%D+g4+EuCcKBR`oqM~i{`Wn6p8hZ)v9E{WV^E+Eb<5+|260M*b!0!R3 zOh0}P@;iv%NBG^&@1yijD#P+?G;(W@#`hwtn~}LsM6S6*b#!_w?F@D{sRN;~GgftT z#_{VS7LWJO`L)|KGOhmCh`UmAdBAOHRdj7zJ70dxtBu@dH$Mm5 zzuQ20M+ZN%CCK@+pjj8@JotuIZo26ob@NP@Bv$}bh9tS$eAjJHyx8D6^JUl-53EBY z?wcW-z`sX6^~oZ*^hK_?Zfj=7dTxrDkuJGZEJ}BmxK_pW&7uZRn)k9@TnU;+IqqvV z>KCWXz7|3XF*R$q;uVv;GfeYbH=U^6a@}E0!f{ADz02L zLM|iaGAe?crYOwDV047N@mP5`&Q;?hyqOT8x3C+}Rg>gyGFx&r#Z{-#b*f0-U_PF$ zFduKErpsjp?u8U4Vn8FefurYFoRQ}))om9<*^4iVVl>fO>?xW@A?Gc{gEsQJ#cGMGmPS>nS{7yF zmoN5~)?G$+P84f1Ofm};a1>1x65UP`p^xfo^w4_E^E<1uDAZ_GRGleFo+X#F6_&Q= z%DeN_Ip%>vw;<42buk_P=H~ki_eK*pE4W&#K_>rUx3L*pgwM~yZ@o10x636+VK}Q| zaZ~puof;{gGpD4itXW}Up;pUGdT&M%O&>9X%j>0?a9`N z25$Vj0`Xq0CyPIq4^u+7t^E#?JtdJe`!{} z3&T(VShdFRi>bpiGl7I#w{Y8w;hn~aszOEfs$`k?4oAxsUC)SBMaU{flja!MkSYmD zZ&zeA}j%OMpTuJX8R!hIvUwQ(mVZ!Rz7Uk~nk z61f2~vO%HfuPLy1)8E#-4J^dlb$#EyP_Y)!nR~+wM~-*YR~+wXpuiQXuQcIMs2n;#5a{b%k86 zbk$X1RZm?l7tVETP#aOcs!btRZ3dHFl%3!HADm2RT;Mj<6@u%t!u$}4)v?nvSZReQEVFz1q49g4YABW1brqUR06Z z&fTb8Dze&D6U?sm?iG5L>C(ZSu8rB)!M)&rL2!QGK-@h3h+Ee*D|FXowu`Dlh1;n@ zbwiZFgEsF*RpwK|LUmIOB^=xg-Cm)(IYE_+)RHLDkO@&{Fqf%nS`-tyd9FYU-J))F z73VDk{72oUZjVASMn@UM{RMI6l62t-w&c>fur|{aRRUF!5_IKvlf>cQkf*3L*h~>FrX!s>O$pK7b>^9P`Oel-=sCS zx>UK+=QiP7TR!BS(ou=x^`Q<@pYTBy5>cfq{HX(h_jOKKT-n^K=uK6V$T87@BoTz| z8%lVo>2NZEj>m<9ds7P^?9`6GN|kDbT%Wg*#tGQQX##0ea3h{t$yg}E3ja*H>MRCm z{A?QhEXuRmmPrH*4d9u^P%l}2yCw?f5o#<_UQP7YGS8@GjD?)ZMi!F@nInkrtoN>( z>{`YQ-i-PmWp%RC(XWw{Lb3=u)saA-Se?F3SMyU>cTv+c5uhDWtueS{hmtOSQ>?Iy zyCgiQ$UAdsH@90{+Hei?OrU(l+&>A;i--0u1EG3D4DFktsw+TM*AW4GXUwU4px*6+ zuB{(ohF}pr0#t`0)MR5Zgq@07Yc~7z1(>~-qt-eHMb?GLx35CJeIwtuQurOHn#6GO z0Ft3Y>X*2BT06F(#Im*_lYBZ2C2nxL*(Q_SWpkG9(<#(MhJrMsE7&rLyCN98GkBLo z0amG*Hm;nM7_VkgCApi(9m;3)cEx;k2>CdLlp*M$V9lPxZP2!NM-@FzzAP?t`QNZf4TlQ__ZQQ$T1}s*Ij2{^9jW7^*(& z3;F$#QUHBXFc1NqhKw#9lie7`a~YgZ&O|^~4@0Xg*!?usU)Q(oAQL?Yk~iPhCB&3o zz#U^2)eB5>o~_m(TWQVxAhn2hGKB}(N((R5-!_MN|pPNiU={*wa8xwn$xEv>?`;t=iy}|8sknVfczWGEFB_zQyM-`BtzA7 zf-~sWD+j(|+VpY`<;7uLJyt`w#2!n*V-c80H9z^?Rz-mYgV z`q`c3j~BSRwEyO=_3mFQP8;ZECEHD9bpKCxO-NU2Lb~kznwzx)LlJXtk&Shr$eomK zL(PBm0jC8;X%P_z*v$;*`@5Ll-2K4M=8>O|-24dL{)p!TM#CegT_1O|%@t4ETyA{Yeay2}J}~6HYs}1- z+(E!sW5Nw>51DH&3O6+`z2u%@%kf5T%k#IhP;3u^Gfefzeio{m)~w?RsNEO5?C$Wz z7OCH5%rS+3bVpL6)qiwzo7sHcdF|WB&6a<=6M6gnzukWYbw~5rG51BAMU#C`J~t86 z@rgZ5+N}(~{&!_}G<_!pUCeCj%V~9hT~`aR>uMc~fUEuSMtT`fo4SL-Z1%|uYE z=EC0V`VrkgHg@rll2+$ z5|PPjJqzAR7l+l^dbWg_Bi{gl?XKr0$GMis%Xz?Y>jhHKg~;usgo~wK0KP62+t{nc z3VMKE?&=ltdS!$oGn~AirOV}UmDCNF+-Gs}9daWxa z(53#v1wgv;c7@=^t_R08g6#NW33W+CUupqSBNX`xPhaWTgZ9$c*glWZ8>BuPrNDRq zA0yV!W5oJ-fOr5Oqpz1cOrQsd8}I>Q0zF1dpa+N>@G;^Be1N`H?zYHXaI1^^U%{WP z*s8tB|7+*OEq;H~rB$f6>@WIqva=R)>;F4fp-Ypz5jx{)4752{?&5V??W2c5l`z-z zO)9FW>v}gh!R@ZTv*LlUH$9lr+7puAw^>+M@{8)bG+U=z1mhK?#wdqV@&EdCdR->k z-BAR>yxOgI;}ktAPSNGK_!dr}>pebR>d_3ZNB%={mi!@3lPkAv-jd?dl013j)H62V z5_`Yw*;<4#Cy?*h`w;mxQ%h_U~>4+lq^Z|iyuljZ0+c&P+G zY;rQa;UP}dKdzrJ8#e|s8pyXNEgr3_pNet@<49CLqyHc-ICdJ7>75dImW}g^nO?ux zZjI&TF2ynYC)HDOdg}$#sGfIS=JWaqIb{AKjVcJKd!qVf&MIIlm=)Boa<}4Kuah}^ z#LKGVyI#iW!YfhzI_DmGWqGUecI!7lNwp^CFxWLvwi8Oqqx#SKFJ|0L;kp63$?jLD zbL0vTRRt#MJJNpd3Z`lTH1$I{cm82DUaA6pP^;9M6h|#ABgyyl-=q2y{SQ}v7UkSI z#r;Bm$>|RL$aO1{@~zakwH!GAM$VdlC(ZVK6vZjIa(3L+|B=^XV;)6Kd8}ra9FdT} zMD=m~FaPEHz1^_9bih;BRWCodzzs9H9L}<<0(0B;>;khP$6K1Q+X+OSp!L))>Hk5i zR%XBx;U=c^mT)LZQt?cmTYLH^y#c24n@K)+>4_kpQ+Lsi+9+pdCw#rSDTQ63B?v<)#6AI373aRj4dEb(szRYK6+# zivU8R^Oswx>We#C|EWQxYUm>dHWE7wEF@~uZA@~pJaJK*6T(R?zG-=}PuZj;?{vMP zLNya*XRJA2rsTL)C2E15G)4{$_N$f}d)5qM1s+wcD#aQoHf4`02=WBtqRR&i#M(&V zaE6pGYF9rJ8G;G9SGD5}%Ifw~YwA-|x(=j7tqb`PDAq*96ZOX9=~V3F#0*YY`XHzN zL5*VTtk^IHxeK#9O+_(K54}JGbv_%eHSBfPvUggC81y2x7MJpL1$);k87)__SKWXO z+D7#{0!WSk>J)5qZFObD3Y86nvd>{TLe^|^#VK4*s5_SLyg@McMpTQ z2m8vMj1u@g?rx=_lE~_9bZlGLB|XoZN)&G{n`K`L?|<9Mw}ZUFg6lYCWpnq4i}OfV zJ?5%IF4|>G$l~DiJ^r)0+4Vgov-`E<4;hHR+Y^aBCVE}`9h+Qz@$&7q68=uhOVk15 zU-r?CjNe+jMSA7L$oJGlj#CrCfK`bJ_YC>s5j#BMi1CMc7^}|Yn zS&S2~{_;3LAWX$zV35QZY>zp=Tr+n@w~Gyt34wu?Kb2eXgfZQJoitc<*g10$(?jXT zwp_#n5yPNiQhyoj)W0i|C55mf$PkD!05a*u#&khTYCnSJn@<}o!u)#`CY7r(soaL4 zGuSn@y+i~$0&&cXd<&9NXTHBG1D1hM)fq=H!4%5Z61mRn&_;)p#@PWIPzOX$Eix8A zkw3}^C+ch9c2q@pg9tK;0TYYHK%I2KG*@pPW$*2i=(m`nf*RqEyLU-Tx~ZF$EkY5C z=%Z7mO^5sTgSHoPt9oHXoNkb2A1S7A$3JRY)h?nvmvr7G_4g3Gz0ZvJ0LjE(ZM8*M z*P1VHGyj4#SYCCtyCDYKLOe=9Tv-Uh2=LzHonvRG*joBAx7xbKCBoL~2zOLRkQGI$ zovIx1H>n=Cm2RA<^!p?W-{n`@IZQ8z9U}tOQG&Exgw>B7g`*R{J%fR+NemZGUidWJ zwn85aQR4n+93#XTi5n|351_Y&hbdC!(FOoaGhC3%zhew^0GMDR<{znvK4O%`Lk9D8d&j*wte zq^ne81oVkWED>X-vP2-AIjw5$++>MPZF2qMuPReTWkw5MWE zJB&p28kEGjRa3B~e%@O;^oCvXoGy*e12=-HmwLY=o_QjD$erf}5BWph(9a%Ms zxD)n)QH1pKAL=tYJRRZa=ZvfX_4pP+>So4KLrj&vgyJRHF(#wa&rDVTKQ$R*ei~m@ zry&Z}xSz<^BqF2jJb|KaE+PlU+c&QI*3Py+QU5o^OWhhwKcW9gNBP(WvXH!!n9w8ksHp{sF*1$qwl99S z^!IkC>NBdK4RZMXYPuan`_znm>U8W}YrPw7h4U}g8W`ABLw|HbSfxe?3>qUo$WvW) zJ~ok!AsWrA=Xwm%_9JVuP$v;&|BYX{^ZO>xZ4RV~$$EvFGkT{MBYxh@l_h}Xyks9JAYyAS=LG!c`3a+9yFlcK zy}!f}+<92Kg`;<>@3Pt*QzV7OlCzk|x;b*~T%nc_vyxk4F0BbV&(aGC z0slu~h+0|`Rd5}Mx{Ro6{HV*NCOzRA!3JUVTFIBW@`Xm4aafi0^bV@>sX3m6VjW$@ z=OKQ?GkJAZPn1??OY*LD;!V=$5TdE&5#x#V^jy|c%Nn1@O@ei;HBuHa*_7^x9o#7h zAv$vizZ*8<-I1sCP#bkm_IiEPUAiwWyZfomF&;AMswiLw}6RJ8zeCu5Rm+{sexP=&5qT*Wf~WuU@7 z`BAlx$u7a}N(|YU3ir+gXud~}oJRRSE940gf1W~`JixO>gUUSBO@-9(?~@F*x>BA0 zknrGk4SGniM)1D?aSRYyy7+x(X@FX<#i(0W2m5{9NxxTrzM$&!g|(wse_rnG>JL{|eYmRTgY=Ssgo*78lNJgQ zMwgm_!m|_PeH@Fya!CKps4O35fq4tSXc@I2FRvQ4UJvbViXes`6`3~bB{Y^{5-_v_6J?aRrGemEl> zfDQ2=6PxF)Ng9gz_%L(bJa0N)1HYZ;jnorOpZOlLShIe<*I&<&r!4c%d~dj(RncOB zm!q4PFudn;N^JpJw?$}F7c(E1u$l9H(Y~RVV^dyc<}UOev2C_{^;yZL)gn*aP+q#o z8|H*W3UG$6 zG|ko{M%-+ct@k$MY@w~TV!FGHz_(%Yce@(KeUaW#k$sVOzMlFZrF@9>Yp+UUTa%6X zZ$9Q)P4z+A^%3Gcitg|+YH)}~ISeN2fvelcIA(D~%?Dm%iGEhC)X&54ynxN| zi);{IQit^`6<=M9Cs*WT|GLB*03u`eOTEQOA3=9MHg{dh`1+dLZz{aYyfiHA|4Au+ z0Mg=LSi%1QB*TA@j~@l<;U`SFkE+>h;TAYrEq5GM<^*u37gV<}gtt0w#rVs;uA1Jz z>MR74@$4&I-gED$i-GVosjw>2OZLX$F_xT%GH*qK7%1^$4e{p6YbdGo2oKq0>_@cY5nM$DS{7`s&@B z_^xpJ>vx?2`mfGF{k1blAH&*onscf%lhfH(wQxo{TREP+-6?WjbH+Mv;OqV^j%B~? zOmdDolLLj$l)x#@w2Dp}yatXPvTv+<@@!LIDft^NL$;Ht)6%P3HToaA)|-)$Ge;#k zb5*WWqMA7KReNWF8sjWd)0`!y)eaBuZ03d?-XyS`zuMugOu2wLccDsi*3%3Zo25Iw zGm|!e=CILxy3<>mkEd(rX8cIrLWo=OA-M&QiTIv@k7&ocQ=IMQvb($&)9)hK-KvFi z53lZ3U7h>P{9WEHdQV00Ztpu?T*2r4q2t z)DvAzbG=;NtdKV=K}nW}a(P(g=`-a4xSEUf*`E04+pOc@Bu~}nxu|ULat#zNdYRsW z0lilI46aMo7w8M+;;U&OG!dwcucncJ*w6L0zUwgZt-(Ydp*JMs8+mL=IPr!eY+I$4o`2F7Q0j0 zc}GO!qs`O1Jiuv)aF4tMoaR!!JEHH?_eaDtn}E^q`;h3L9+n7u<+9Jy6`lp7i9oJw z2gVOby${Oe5lQG#DeGflq8*Yj(>#4xVm|KaC*=1@Pd|mlR05f1vqh$niaqP<=RBZk zD8LJz#iWr2DT;_DLB78#X}{*_*FF75ACpGv^(IhTZ@F6hs+k^@-r1(Z?qGxTw1uVR zC8cEx%g-5w0%h>Rm78AuXGVVV?BcSLImJts@B&*X(XAkxLLd=bQC6}-t089H!C;}# zimZyK_j^z2O+dGp%MN;t&Ep5XLtUfQsoDhGxy9wh_70P{@{*OymfMFqRk{yoFLFGksy{ypojz=PVg=wypQFQmvYp;vvC$ z=JURFQsbUA@%BqSd(M)f_;#=GfwLDnkNkyd8G#!sXRpw2Iaa$ZohQ zSs(zDY1e^%(mdsjs;zP{X@^rXO_PCCX~xOL@CfUVN_pAj+FT%aYnJSvCiA_DW4*?-m&6FC}aULje2!9(nbZyz*u)Usl$> z{OocG{I&1sQGX*}-1hSpE}=3}M9hT#R^CNt&Msa#2M3ydjNS6#J2Kt)oR{U@C)zK; zI>sepu33A%mu=ZR7=|AN>i9>iHZvK|d;Kyh^^Xj|sQ$Moua4@UkiwaDeIuh${{4(5 zPajiHfsusj%BrdyC+Y-1ECZZ7J5cEaeW2**3tqeAN-&c2J5dMmi5dKYH#g;b2lN&X zL>33VlzDKrZeU)0!7FUwBs*X&=V`D^Qk+zpLgqtWD>PS59ndHw)btS+_??>^2j5Wi1=&tbVd3co9r>(v z_Os9J)ViNEK?@Af+7iB{EnustuCE(V($<2z)7sZ%=EcJ~i&NVdv}Z=x_^taji#ovZ zNH_A16eSn#T+n%(b`Ibx9Yk+eC~c8BdRciM6DDuqpiy~rdP!N{!qPmga`F}wpIMSu zzMv$ptaxcjUP;w-&NsVX@&>2xl>#N7F{ssyxouj{Xy36}jV{=nKGRDv4;;?)%>6yG zLgxMKa2@kNRxrKd%a^^+L+156F+bXx8|h|>+XRN14bP>9;Da;EzNm5=yJq(14W z!9<@XdN!t~{q^-}F_}2_xkit6r+sD~*k6HomUs+)`^$*8zl>%3%h0vI3|jkpzluM= z&Q11j4twM<+J}d^52I<3-(0jS75ug&jA&J~$a0(+i1OIk=!?Z3R8(;e(t_w{MC{R? zK{+BO#~q`w)CyGxFI?#5kU6BFQma&H@+!n`IAvkYE(w(!ff^DB6+=y+nC~|+kN12QB#OLWeQsi* zFq#%T9GCCS_};Yod-JIUu@->})pD;!8g+geRV5I==Y_*C9bN1rzXV2}72>OyQlH>+X> z*KGQKF6xgGgdX$Nhgea5rxs~Yx^zq})lHDHoT8Q^%2}Z&A?_)`+~rJlCX$r1IK`&e z+1pXUNj70y86S4|imc^gB?H*Ogz0RQf?JTa)YU81VeW#B*ecB?%Pk0Wx^X!53GPG- zg=vEfH#7DxUOf)*R;i}WvpH2+HTpUEx;8sDkv8~RvQ%j^x}=AQKzoX0!k0Sw3a+&_ z6<<@9T2)>e>lq6O0_Nk#asX8@J!%(vP;h;KIOvs1(x)!OYFG0D8w!8!x z>SY}1xI9k(U4iQ4D%ArC$uIy~CZJKBOs7e+iyEapg6iiHtcbR0W}-rM7y~b%i_#?F z%iSHi)jta9e|x?9n{US~Ci;P6s=lvQ>gFWq!dr2|`D!JmKN460wcvap+CP+iBc{ei zR;W<|OM%lstN-KGXnAG(AOL2y1MTp80>qM{K;MRb;M>)78>HchK~O~5)I=LaqVa7i z)oP~3Ib_g8@;s=<(+v}16Y(o3E)OU33ucDox;}t8@ofq1m9(gj#V!)-Tj=AhD73dh zZ12E)emf0wC(~dDBX}p6H`@tuk8M=Z8_Yl^^|pFP`oAYr<6VZZwC|Sc0kZ{VmVh#K zM@0EPcYgbd3eS)F0a;4;_n1mmp*|qg8@$a@p;T0O*)FotpH**uj;cl?1@|Yj$S7|| zZDL2Yne$qpZp)fUaQ{gs!FELPGGoqo_g6FKp_2yQ;RmK5HMY#q$P*_@#YrRYJZa>n zwIa)iVM}|`*mooR^CxTVtXtKJZ6hB#Y2iOQLWcdK)! z{(UUP7gv`OST|!e`t1Lx7@)qNtQa3sjK5Nhk88L8Ns95g`XbR9! zyvT9Iu76T&R!v5=#+LKXTJ>y*q&+sxFO{n8!c# z&Wl^xi8^Rfo6H~zt9kTl(axIKwIa{2iELGd6Tl)`ASPP=2(b1N&M~BBzaV!zuG$A2 z)iJ;!jzCZi2qdWyfsh&#;3ymDu1^F~)Cau(dmx@}YbrFhD_R2~tU@S1M~5qHsfUK< zJ5i!?faU;tX;g1mI8<6isPCvDN4Y<%_OANz7g3)6TkGtf)YR;TLOvt1hxMBYizIux zO0{c4J1rEaxQlK====cu#Nw%KVN#b+8CK@DN!7O%YH7{*26};tLa77UDkYGk>ICZB zg6C0trP`sbl)jg{pRL1(5R9@YseB4=p`mcyc^pADJK{u{9+1%?wH<&>S?Q<{tn;$D zeOMA3+^!l{4H$rlCh zyuu-*V#iXog2e|UW@i#oFLB?(78h-qV|g^0u_=oF#4jl=%c?~AkrgD$FUA{E{0_GG z?*Fp*iBn)g=O`@HDR2sqde3sTeox@Ht zHMmQRHF=i~TP{axCdq*V#-wyzM=@|~p%6c0y4K+zCl6em!~gWOYJZ$&4A%Zelt^5} zJBj|CMv4SfjSFI^e^=kAvJykjlEm}nJGuH2(i2y_7aXpP{$m?XGh17yE(5u0lEJ7P!Vv)PG+ zPi(i}$v~RaE&<1P)@nkF2Nx(&bp!Jl-t$$zz(RF;V3Ar9SgckCma1)mQgugQxot|> z`kjuNS2j=~b|^C#!>=;bsCIRm-kIQ*`=LDa+q5qB0{M*LxY+MpVOi*4^%g~<`i!&x zO_*tE@yA2g*8kS6+r8hu+3n_`Y@#xAZ@mlVY= z9UZ%@XRys46@&!zOrrfyg*;r2B}UQ_^*~P2V_VeSIZ0v%_E=Xh&^4SH&h&0q_+7~i zcLLWeloMeYJhM_=u^&AUUT4Bki1(!R8OU4IvYe1Vj1Q}ned?;%rFI^6m!-IsrBxn_cXLat!h|T)ExCGF01vl)FGK#1 z)qVd*-HNZsAF&2$%~~k{jL+)=W>$b);?L+d+y*hecucnyipXNMSGQvdhU|EdiD%#F z_Pmi%aEb~YCx3i+G)Yi127-)*92pBfvHQ1b))Z7qtgC)fL$ImhbXA9HI#;vzs8sC} zXQOM_@bA0K} z7phxRYiyZ>hSy?j>0%Kl_(V=B?boA6H9OUl(7pvSD`(QlW1p;I6~GlzjGkB}_@xj? zVEGqh0!lL44qjh0)1HvMs~Fu{<*if!VLSg@d1(P6_rYZ3#LAkbYcj(BV~_RqnH*F6 z`L8z6CpOD%6vG1|uYV$&Qxdhwh;6_Tb!_89>e|>Q$P6w(gckXg^D$!lqv8<^2pvo^ zsEraM&1e&h<>7y8PaN(WJ`ZMow2W=l3QXXtTOGsNrfit6Q**83^dY3n%)9 z4kv%@nVw&GvoXE6^$Txg)T)hN8vJVK5%n;Zt7kTTo6O`2 z9%-xaW4{*vv1*+>T;K}qNJy=h&liPxgWB{Gb!jqUsmqh0*jLKsDpy^T41c-76{wLk zb-AmqjmXK72!iPAaOj{o%XWrBQHpm&xw~23-Qrq23?NHRRa-r^O|VODlgsU%xkk2SCU;x!3ARD~sCijR9Y2NC7xngKI8F1viD8`_&2zF+5%++J6W zmJ@5L!c~>jk1DwwhKr|#JD7AQe3J)IhV^i29z50U!Rewxbuh}RbgKE*36E}9s2+*3 z6wQdzkBD|2wcZ9?bvTNk^9d47oZRAwfUBM{D@rr7Lw`_51RsCn1srfY5D2FiJS!)= zo{!SSFH(l6i1Gvgswc&ghgSsbsb`c`_C@pf-I3mbLI_jmV7Qrw-$L;N@JBN%817i_ z4fQ9hHDS#CNs9TFJ(^~A2E$VWZ>ydrkQAO4cul=yUj5F?zy4G=%iNF@?yldhcr_`U zuhVM*jc{J5RqM$W--N<#98>;Ow1*ix(JeIXZi|dETXq5@_F*_YD=u&z@=s=S4VuWg zkaXb&QmYU>H0-23#NZ$J_ooN;br_AMFaO-OU>qKq%kkb;4nT$F>8ndR|BvgOm`|UN2k}{P29G!oWKb(}lh(!rw z8I6JmDfd1>!%VI+sE>k9d!wEwyiSU6r>Q-0#jRm};CbrEi!_ilgxA^6rihR|2e~dfbe!k65M6WUD8f<9OgO4hS_|h>WWiCgu?Y<;~K=` z4fiP^8vw!9=!Q1c(^I>XvK89oEz?sM0*a)O*=gimCrmDpbjW?=A-fna1?qxlx7hL zXJO(e;X=~oRRfqi@a7m6@@^?Z8E#Imh8!(oILa`R!PYKr6JAEbe$@Vakh{g$f@(Xy z=cw>;lyLmX56T?I;wS z)JJD{ARD;6()&P``1`^I;qx7!~$R(F57Z=Fxtx6#MIL;U8nx;4{?X zh1KS(5#brU9XT@mnqQJTSN-GpwV6t-%@l`l4NY1nw}B~qka|l3upM8Dv%2+R#>_>o zx)_C5gkD(B9zuS>H46%^Sx|7z0_sW$2C`W{g$uE363j-b;{1F+#rgSu7TwG;8^Szr zaK4|q5x-svqpVcnIIgiA2X<*{3(lm}Hc-g{;gV;#jU0ttR*vu+{nUxD8KX znAJqxE1&Lg)oxGS=c)UpM7RxWrS6r`54vhkGDC=zu2I-(U9TRN;CtoWK6zImLE)mV zQI%4<{c_jrfU6FA3Tv(Y3TrL=mVl(-=X2Pe8-Y=H(o7!}Zd$Q#boiiilBIarxbQPt z-)d%E7sikFIBX48ObCy{FZp8=!ta=)$t>ol_X%$?KaLF7H_Io5gCRH);O)$-o82}A zfl-BpQO>s=k;^ktIcgYXUOpFPu^bYWmC5z_kF};aH??k(cp5s(d^#!Iq{&NB^onOm z=($mr(&14^1a}l?V-J2^ZVf8>BuHZ?7#g!?7GFXehoeehej$yQMmYcM;UGm1ju&XDxpjG_TpD^8sL zF6H|~3|r1IpeCFei^AW3=Bm%dPZLQlQA-VoYTYZAo)&&JkR>iW#-SM3C|5;q;U96F zWG2lBPmBwh4Y2SE8X#v?>D(aQ1|3yxARC2{jlQ(nU>bKK%*7-^_}t4RHJQa){!LP+ zL4cr{+|dOPAi0y}Ul#C5un9$^IhA{#d6AgYcnZqmE9x{U6~N0jiI_c?4D=(5>poS7 zGa5Ao^v{EiLK09!vQ^2&5(TNrK;Do6+z>5Csv`*NsF^&|AJxyZxtpW(Tr=(T@O|-R zP|M|z>=p3cWi&-O)jtzzDbQVK!$q9KVsI|4cplAgKD2WUlyj{ug)~P`VwO;K;jgZ# zZkTIWe1?%r7AeT`Bbb^%Uh^(ct2re6v>YNX;xAdW)a?YHsYMTBN=I%xBm8jua!CFa z6y<8ft=CYAYgwH(QJ~Fe&NuO&u|<*9X&H*5(nK#@mnh0o=IAoJI7uV<8(H}#z2JWn z=p@NMRRue@Jq8>zmX$}5-TF;eCj1sd+x&TEcx?Cv%<~OC0YnwLRNpc zaoX%GvtU+uQTpA4x`+ME{ZNVr7)1}8j|g2yLW9?x+lsJ&v6`&naPO4;ygC4X{D^9x z9yPOy!&w8*rJ^#V8q(scX}Ow$H%t^(O?-AZ*OK_At9rg^ZkrwMBk)DD!_V25J5M`pqu2`8HuSB6spI7l}g zzX-Rk2$h91`~_*v71s_m`DfXcZT4B=dsFPn7R`xW@IB@FUinLI;YHy#lxD_7;U8?i z?{BVmrkQ$icmVm{d~x{d_V#V+jq_TXj(fv{c)MzExYA$T9+|kMa2#vXxJ*B-Z{B3r zIO~aUE}_nQB0M3}<~VrFn1hHF**jU(;OpNCXOm&hTj4+3q_@m^@FH{ct#EH5b^CL; zzJJi<-u^qrIUR5C~gu5dOHbM+VQMANSi(8#Iphj&|>`A42e zU2`|Ami)Cf-+mr$ZszO_3-)o~gYdn+1^Al77hUop&G;ec%vJDA*EdN@HDCYTvK`0% z9{wn33;)8`tMeB^TowlW122{lSl%p@YVy7ar;=!gFT#rhHscxpyy20%lgOCA0g`bC zllDE!$=lCaP^%xm4&Uo{?FEm1y4c+FLs+n!j{OjRu8wVoo;|KgH7_Scgny_TioDs# zcKGDMFK)V3A|G-}^T@q^iOX-UXupOctVuEt&JQ&-54;g{ z%{P_VY3A0-Y`2C5DlCXx2MV+~(mnzhy9soNjNoZ>hsbPO@w?|=-SricdF6y;vT9P$ zg^`bgbmjdU)3eO6&XHESi)qn?WICA@T_Uq>t+w_abGLcBOGHqjT6T@ha5Bd+1II)4 zCqMxvG7~4kGfXx&bdSu{)6B8%6lSLB*@K_C<}CSHXddnn>1xX~;NFirtg5Tjs=9t> zd{CL)*tF>xN%5sGodg#)&+@Zj)k#oc0}3CkodoB!8WlF8){0!lO~_@yX_hzZT>*ow zqs~qiMA&2mu$LvP%heTf5ir^ zS0p1z7n^mDxb02eH&|M~+&7Y>uQ2N_Olo3gt??r8f{|i#Z~w^5ymMf$T#@G>Zb1ff z6*})BXU{2FE{L#Yh!5@_nBVj$J*tLP&Bd5T3khGO`<}`=SUVAE6{s5nfY}s5+Do+*mW>o4GET^dU~1k{*qkRdFYDQyrua)#43o@&A(0uT{fjyi zG9Ad&4NdVsGa8z|4T)?sGY5OQ?6t$oi&vI|6uYd{Rqs|@J2a9SXx)i{Cd*5%6qdVO zc7*aP0OdXHFU`kq$XwmNUO`eiL5s|z?dvr*StBCN;B?}Emwn}cr(x@o0~U^O}l%#^`dUIhL9Y>G63#T@oJG>NQ*E<2Sl?0RY67i?dxAYIJsq|e2wiZ*ZWU4Vt z^EA?PDA)%upd;wf8H7O17f)o1>xx9S;CpU-W=91iK z74#jss1o4rT{YiT3tU(~stTC}7r*vf7k>hLj!UR{O0_vI(QAhx!NvTEg%cbhcMdFG zB*D@#9w~hi-_Rh&+9%mDfM^ADoeqox?(iMgK|0^pYt)SLVYNniBefzG9TiGv^q=Ekn-ohj{98sHbK5=^*#F)? zzDj}?Z8g00WHath8O=?%&!dA`30Hp}&Cjcb=?+W{Ah`GEW*sJjzkcPJM?a4?=#VzR z&Lb+pmtXl3K)f3|F;YW`lr`1&ED>ZYCUY+jNxuAA-x5sHW;|u^m&*_oMY#NH$QG{= zvog$G;}fNc^%p!pCzFz8)-h`j!1+qGpd^R-TVmr+Ja;e@UnyEJfC$PA{^v2d0Cn)6f>DE{_K@rVzpF znG*9X&vL2q3NDpxXEM9XHF6Qvp(hOL1?oaiq+Qd6F>Ra>>@`=@BrkM_3q`b7ENa3l zEN*3f-tbYQCKU}IJz(a5QKN8!_A-r+xWml%dqO$?LyI#L>&-7IEm>JyUUFjQ!BzuX4`kLQ%D|lEDQ~4M;(*e* zgO-)fwHz2njm{ahY<_j2Q~e+{wMZ$LP+mM|F>`og@oYI9Sg*LOtYl?*kzZ*);=&S6 z_61s*uQO6lO-?RZxpLXciOUw3l$xwO4k0aDwlYz;p({&PtSTv;v#`W`{Y@|{DPD^$CwR9mQbOSddWb{>I{hQex|w!P2y7HBH;1w#s%O zE7zWWbj4W%?ipw)@zAitx@WMPfv&qiI!Pc~)*^m*ZNM`~qRsM}m*p*+O+TJlGFR@) z7cMmeeoW6K?R&+`0~?L|WV>@^6bTS}mpy8S-8W1dIF!*m*Pc%ElcMW6l~!DyH?M?K zYqJ@S92?6^ObNcMEGb*CY{}fbW%J0VCx29!ZO=_|Ud`#17lrvlb=*}|*u$By-dn8- zTQ)q`*EpU3ePV65J!eeI$~2drnU-e0Jd#?+6b_AK0w*C$W80Ih9##E}e;y$BYSt#x z@o3%bT5Hn57cx@JQ`@}klsaYSl+IbOa#<<9E#{hKmt?gvP45LWqRyL6k~!STjhTl= z>v|^ebFf3bItxqZtSnhtQd-VL8&*JG1Q>05X>+>qh3DOESZ` z;n`4v$qX7M7s_nrsmH*V7iQd{Se7|FEHc>4jC+kdcVXF(rOV6D5z%)&6F;5hedYTZ zLpQzjVz#gBzOyk1owF~=ika;%Wpt0T02DxlMM|850JA-8mZ+B;`1%-py}0ucm95x{ z2%F}hOA}^Pc3^TcL_rWkyj2`k6Oc>smL?i!9)iINl~k!h0&mHIH&56>hwXvS4k|Vf ztU_@FvaMN$3hd6!mtE#t-8kVWo0LFI$bEl2l3zy>5!Tbq4FZ?B(YLLURQ)4F;-|+JLbrj_V*{f$9ZWzMjVb`x zsAd-_2ea%R@>Fj8Eu?@C`uj%^LMS>BuWL%W6B z%Vv%O+$1=nji<)){0Nc&&^h@;K|KrHh(r(!{H-lte4iQ8|9>Kev>mW`&Dle@_`RlUThnxH{KoNH#`bT^N|nD~ zwE{YcE$*vWg|!xT$*;AK890?XmTCBzM<(Y7;0B>N?j-++@3g#LZPVx}vO1R^Wa25)jf`sw||^ z)YYDd-qO@IPXupiB6zDK+DP9aeFCmvB!+V!pKE6+e$cjk)tkA+TL17@TX&9Q#UNsM-hmyI&O}Fu} z0(1VhX!=O0^GmLJ*>>tHw)eKf8nC>Ku3c7E9ZFkgK}qpkup(HE%EoicrqqAKLOp$; z8L%tTtPy-Se3{Pyjs@4EWTjv&u%51(Q@(1YNl!`1s*_r>#IAm2!%O9(>DVNcqicD= zOA3}On^$5E{y8njEL<6>&jEU`Dhj;{jRhH@%wG&Ao3E4ICgzock*Hb!uo%R3&TJxX zz1og=^BqMnf~qa3n#MG5;y^@druD3)ht?}%n&5jRDg2u@Ld zGE15yHwmF6)+!;OJb$>?#C(w9hd)W~Cs0(X3| zlUf*WVbM1M38{9fKM4>7YQQ$)wPXhxaZ%Ik%d|0EZgZ+7POUg^;l$Vf! zr1(z4!{KB#45?@oEA#g?!rH~_A~|ut8-#@_ga63uoHu(_IhoEWSt#tAlj$1rbCOngWVoS6=L* zg7sBVu_8*j-)GLIfcXCI=l*fg%$c1tXUa3}nWv(uWo^4DE0sK{2sR*QU;-xs_a0YB zT>&WN^Tn=QOb|0IF@p>tP){%%P&&h`HldpJi_F{NN|!l>u5dSj@mHPwW`dOTJ8{?3 zAs&ExV1?gF*dL@e@Sk|4foTp*37vxQp=NDoQUhEYfA?uEF z*N~6ZGGgVuUeQj@e!-|C9EEc47g2FG*fWoidBcqeH|PO0O)$G_VKF84;x%NJJu+H$ zDs`=viF;gLx$zEH_R2_Cf}HY@E3y{AO4rH%byrh&JbC^vI|Nv0d(WBavdb4%yOZU) zL{BtfeOf<4udf2lpVP6nkCS0*S4rh860(sID-@Xf=&DaQ>{dqXNM?okeA34SYBbb1E zlkr6%Ks6EJB>YzdM5`F2^yyFpzXO&CX2PoI9O%^tA=xq?N53L~BfsTbi<~9Zgy_U@ z^72FqO(Zu#|4u8BIYO>atD&dH!QLqFJq;D_1oe6h8Z;K~;p_7{=8PRgZ$;jo09t$@ z5#+~R$Ee+uEL5(EmIhb(2b4QF3B>cFD@64y%ya@6mX{}z2KYOK_Keq$I0?mb1Hy?d z%0QQkc^A~m?0af;Jb^}Qe2tP`1)&9i~!lor{RV4XY^5Ee97HwgC|n6gry9G zKr9f#T2G?sHr#>gm5+jD(qJJ{?*R$uUX00Nyng^W?gzfM1gP9HjMj31?Lh@)5*Bge zMlcoY2>PTTrxBD15gs1`vJF5i1{s~>J0A;9$8ZS+Q5Z0|LZTrv+5jTSNQC%=wE$4I z<;*pkCRrp)E64>rswP5s6xXc^P~a`iHfTfF_Cvxn0>Y!{m;wu-^wzv0N<0G&7X|_Y z^q1BE&wp4o-MY+Vm(K+<902(lWULQ43=;dHamO9&3TsB33Xl5!R-E49JkU$Ms zwc!Tes3eAB-ObjknykB-pK1!?6%`BLsJO~cu&iv;9(>l9(q5I)Dplm6YO$4lSC>hFI$(KO{O2Q2GH~420 zsVnHZh(Ev@+UKk&VU1d*4+fN7dDuM;v&MtdV{v7J9enb1*BE##`F?DSDOa|QjK-y& z!AgW3+--7J(HH_dO(D3G4HC|9_GAzXe-GyNKFsB(F^{1E0_<91(I3Enhr!Z(9#T8U z!B9Sdd+{m6UPbH;_7e7$m!U8G8h1l4stRQ4XpbR(^V!&0g4lEHDB0R$>?qUD7O0J$ z3|`-b7*+(YjtLN?edjQC74;g0MTK53K_eC-BkaKQcy>1yjJ5CdBDZ~@KqzJ#t{yYs zvl3ziw2P!%$AYq6U}wRLL_7Xn$9kniOfHp3PXE+zPQPjXarFOQYtmsXX%gZ9qWN}! zHL*w*G=VMQv@@vkol29~8Cou2Sd^F10KHRX9A-^%#xy&MuZ7yvtwr#DrSF5G0zk>1 z3)K{BJyo#fO3DntR4k#KN?+SUwQJaos1B&LsOM6}b914%#yH(U__&c#7MS~9EOb6d z??XQQJKT}wOm#g|%$;*w&zVY!WNzC#hRN3Vxq9Flx$HjIlU_@(=+`?vSMl*x z)AD|%xMDpwyL@GSmG4+M*;tR#MRA8o&4vd{j$Y=R}oTo?|0R4 zsLI-wq(j=~plcjbyB>0xme5Ypl|c>U@T0ErkZwWhiqhMTx$d3bEt>*(EyIX#6OlSCb5I znYiL3h@_x5`CA7+3yK&@R>Lrq6b1w_vTMrI7kFe1_#!5czx_djpyxm^)I-jH>qA45 zD2)STQa*g!P5#yOkVR3fcfejF#j9{$ML6zISd@qRUSzEfX_dHrDplrw6EV0IP)vde zZSsCWNl@Iqk!~6@%Y8rfvY}Aq)V?oZldw6(<+{=1QnZCZa47VMA=S zN%%$jl*GHDB}abI>5F5FBcoG^y=ASp!C+gJ347Q>pK-_lS(*2?@PnJU&J3#xg37Hj zrm~vy#dT(lYS_?j^`5-1Q?2uVcSQ#)YAv{BX3If)EZRfX2~=X2b!-8u2+|;;`WAJ9g&`WYdR^HGtL;W1n&Rf+IH$9HJis>r058NY zFh#Y3j&>7(stu44E|Oagyj<*#WfWh9fV9+Ey&Q;!(zb{xKs`fzIL3~)w8nk|GAQO? zGA|FxuAAf!&_)#_b((x#MMiHX}0T#vSk3A%Ow+qUZMt%LV6K z+#u(ae{iFL2+L&-(FPW=+sgVjqlelK5}%5-^~HA3*CnU)v_;B!MP|7CbDVPTc+6(> zx6E#@Z9V978Sgb3pr~G6!_eerzu}O1exnr(KX36H{Z#9kxms0S){rs!i&w3aZx_SF zUP6Qc89Gq~Lj4>*hrr@=8IJ?m3?1Bx|-W}}s@5wvb8 znLH%Yvnd75pxfl+&7q76l~PJ*1)WbP&w+D9r*e(xb!vL+T!;nI6j2_HV0dG6CbPdao7m?cqgBr z^Fp0Z#6Ae`hrEbScERfP6v{M}f?_A1=Hk=g^AH(k=zONmXHhzMy1mKwVK1_GI+s3# zT4;Z=rrVa!hwnq3-{pd9#YrwUm@jnkyLk^6Ulio`IQhMl1-586@x@5ued%RiC%?}L zBn%NFwiG=zJpP!LJU_AY|`I)BCqySuPeFn}OdLTxG4`2qTYNwkI^jCXR- zOV_~sE&?cYtbzGm4p)CVKc@5JipfnHsqN&CQO0t5?0wjhFRqU**ii|>s2cGaOi9Y{ z{${G2m>ubq4XYVR&TZUsIw+_7?rSO6BFXgx zBgOd^xez4hf|&`%T zxeI_lRVCiR4<$vT7@W&hCfM9veDhMjbFq7>ilPu|Hyxl#O7lvff05aSQgR*3-wh|$&fP5oD zhj<$YvU!cfU%y1auW`XyqP%^CH$gV3Y4it-u7hRAUz4k98tLhR!!IJ^FqWzbnC+!NOH5r%xMKShw$}yh)I(Kr4k0^zYkqO)@_Vr-|s7h~%&7A`0_D zM9cH0J5D~9Vszv&^0PD}M*6BkLP)>O9V1(h^M;j%ryA*^bU>Q1NCS({lW$2ywZ$?h zrezRUD@`o~jpQFHR!-W7nkf_42)k=~aY4I+!f_MFO_TfqGePd%=CsS#?lxU==Lhz1 zxqrMCE#LOK?XqVeOjm@zXrKa+6;Q`!b)2?jo|Us5@_bbuow^gr)=`~_t78T3v67BCseZ8u=7BojItb=(C? z;p4b~J_#)D84z9mgR9Ypz}PNN7*?5 zjetq_{IMVql8Id%CC(xi901%A&^UychSJ9~u(pq&KYc+zd=4?PN_iW|38ffsaBfL9 z9iK4h!5vosqSy;+bZAawF}?A1jF7HCn19MsUrYD%-nQqtdtw@))zWv&FpT9xJjbc|Aw1>2$m;mL-^{6X}!vdDz? znrLx2+L(qmWGX4;*D%*#SEbSXY6^Vy7387$HLy~-Vayg@#nuV!L+RMvRq2}=8J*i# zwCy9z^Rp=Zlk(ESE0tESQfXM2;EYpII672OwHq5jHP%)`zr(BVfwo^j0Y897dlA(8 zUobYms-o#G5Qfy(F-{z*6-^#psgh_SM#7rvi%Hjs7!MFD_~J_(DO4%CSmBCMgCUw6 zOWcrpWNjVSDnZ#1Dw;ljOSO4Wma%YTIdHiFeR>n(^B!1A^T8rdBv8gG@VgjO-sa|^ zmeQ0N5-N^j@ ziYd$@`)NJ_xln~hH#0^S zP@&2?A0GlMdbhzb&Fv6B843?Q!!br9R0SmVL2{BGfrBYP^R(s?jZo+Esyg3T6}Sad z6>tYt)hEr2_G);PqzRvhOT{GEW+=i`n9Sn%G&lvC$`W}oJkm^8RT6>Sf!+L5>?i_L zGZMD9N-?Dq!M-ZnQEFqOI*&#`6^#^ZQ4#GZ&<0U9DNgXqETRpXVW1sADi}`H9dbvu zF;X=|9c3IwsQ6qoVjchr2_+6gRD2<;&KJNb&t32tMiZ|GISoT2o0G|t3_++RG*p~(7=0>&pG@{ljct{ms5mMac1-Mbb56Dgq z7B8yLYPT?Muh>}lM%+#}129`KB3mIly#=QKc7i+ph$@bph$aFEKchW%?6~R55+->% zOoes^jZq{{?cWizp&?z_AFAB~V7XMjglmy)tVp8I?!}%18;*=6s(om04!@4_1--y_ zo+KzoW+EwrPBjYBQ8W23u*BgdLWQDm(lMzzouYT=()Ct+cPq$1A_`Fl%abfuYe zgy|e46aGAA&QaLpI|lWy#AurpqxyNw4;%2Ltu<# zIUz7s!R}oK#{Qv3LJu2oA=HMZdY}fgT_X`HK=3N7YaNi6nd48C7{H_qRKF_ZUUB<({8=YPby@UyHN{{*JtK2?pR2+(7l!}1*_pxOZt!ju%_JLHe8ja1bPYi@q2sk>BGKgSOXkMk;)i&fgUl}a;l%1|T2LjLZkj0GqID^9M; zF*1hG0#UMG{71~8pD<__vGjk&vj0sLV{yom)zA<%GXcjH_0`0V>&3hlp<@Uzua2H# zj&dJZ8U*V=uF+k+sK@RxlPB3Rep`~@~f4XU*+cVlS1W(hsxiTYb>sq zbOIItuvDDF2`6)5@D~M2v@lpZ;bt9#hjkQQRUeI~9auR{HC$yumGPrLgvQk>$76~@ z8@ZZRq`dB=*~A^qX-F}MWq1MXc-kXXSNFFu##d@e0Dk7GvT7ohC5m{KCaSR}B7xa4v;MssSarqH$?fh{G9N28ExaC=q-XTJbQYVrUYr2&UA${W?2S5+H4H7cH_dZNx_ zbsmRyMLVyfS3$0WYZRJk)qqV{Gp3v~10$-d4zSfX%@&cN+G5Q;r`pwNw&2zm`u=eR zFsLd)>=;IiN|-EV5Bt}auytgicRkFr;;{j;20{O#9;jnuoxR{(;u)|VMQ0cehBSm} zSl;y1fMSPmQ&AtTyBe?>qEY!w@Z;!70nU5v&=i0x{mrYX8<@~}XajSj{( zI!K@}QdJ~>T4aRDWz&o}WPM_qvDu+irm8*sLUt1@y(aQpV^>sp&RF^2Y=ex@Y?^H} zv-}uNez#8x`Pn?9DNNPq3*mi3S%&un7WDr?V>EOVh{ zH=y<4iPtybRNrJHGeN59PC0D5h%3ulzlFKQQk2gxzT?peC~tyY<_rfdQMK&(T+@`p zUq{Q_xhAzND%YH7&9fPkdexWrcQDCN!Kn`BN!=R!6+O%bs^yPwINM9U*29GUduen} zbFD|EMqcebPnHff$)3XHp=P{Kr5+#k)dg8?l1bJb@+O&6!&T~}`}(KKkAo&zSxA^~ zeiWfnhxIM-NxsVLgw!sp%xp_fu;88N&dLWin4OXO-Uc%eukzmgZ0B5g>7dyfsZ9=< z&v?`XUwmQg=1~C3D6$$6B`=;eV`Z&Z)JSH%Vis87IMChs8oX6tPqdWlkt;!-eM9B? z?hUhyBz*sG78F;l|S+*o4!%LF}~%~G7q zNUE3k6E!|DXS*!eHS1QTnmqQ6=~4Ly{?JP1pqV)~vXzAV->%Rzqx>-nU^>ykArtK2yD4~52kD#e za?rkG`CdvQhjSo#!xjNw<%GvMJmGP!M7f)zAFy1MV72cMWj{;-XxGp{%dpx7mvevf zqm+D%(vA~@;{}SnXz&yKC3RW{j3CHgq3Ek&pz6Nv0)hEW@;LXF&fg}Nd}vt< zayr+7|Az`WL%};be^=-4QOWNU&*1}d<@X`|{0QE);1>(lw)rP;QRRYs>M9rijGr_3 zdHy*CUr^y+Qr@p9_BD7no%lB{{$KvBlYi&r-&5)ZC;!38f28P7PJYqFf2J%y^Is_V zm4e^s-S1S)9~4}opMO&9GQItay81W8uIT)#lV2m2hor>oNEEp2IRwW~0q0s;5DgO= z8YXPQ?i3EE(1CHF)gnw|-ZnY9a5;tHf-f+)Q+O~VgcltVKI-)6PFRJ4Gf=-%M9|Zx zi%6*75f|ky5k+MHXXzxOU80JJ!5E8xOH>uHVIoe%yAV`!iG-j?bdgUk%2*=|K{CCr zi8ms}5UHY;Q>0-Ah;;hlqcSs`0^GGkQO7AVouV#%SdW7Gx@h1O;D|UxBMQJ%>m;&V zqKRlqK{HCrrl7e?v=A*RXysz^#7|~KP_z~8T%x_`K;LxKMW-^rWB{1-&WgLqT5(`cVMRS|>4p0`S&4i9s$gSPY>UxNDun?bPL=m`q}r z3l2_4M?U>1aEj5C9lW(pVyshux0WHsQ)eb%%7{Xjm?$P;_KG6K z<0;L%Xs#3RTH3pYZ;(5zxM=FMo>L*_t&1t8dB2*MwCJ_Ez^`ImOw+}5Bc>3hYmUwqwVV{Cga_e12HM#h*S*`Si%jN+d_qHx( z=whZWX3?}4cj{ucF6QWBE*5a&v_^L;x+R-K2P@E zZg!BrT{pjk&&F$`%?6mC?jN;Co+1;EXmRp-lW5)Kks_!pO>hQ}YZuHkKPoWAe6hd; z<>6D}CVZvb(^dJA{X`6g>*V|I8xuFq_u=D>eIu72^F+zL&D~D9{5vx|gG#*1_@h~elW!YkhFmvz#q{pac!~E6szDhR1}+HHR=#+gp|2;!jmGz!(y!| z*73!rST8o1Vx!n3x7ysdg~QB0^>Z@?TTI@Z%PCL!5@c<=J4uTaTV*f1yFuMqrr0KS zVgya`2w!4~UE)zwJSMi99GIyo9;dV?#QnP1ZHg!1rFNg)-CTPLhEsmByQez#f}sj| zomvieP5FSs?Qc5M6#K-}rr0l@F~zeKl%NC-Lyjhxowu2ylz#T)y-aa{8hem39}*j6 z?si9VUT)t?Qw@UOQ;VmO1pMSY#FTV?xi35=6c273TzEpQwLDXjDGrMxrg#oD!+HB4 z6To?1P06FCI3|wEza8%O+6&@ExndJc0S(gKZpR5zyd+M_3A($w^OOLq=M~Jo)w;X3 zz6(C!8BdbuyZCAi6t9ZcOz}E_;tf;0Dc+*0F!fKHa727Y7w?#GM*N;B-lrTNnBqh6 z5!RLo&%|d<@rgKNicfhXQ+!5w&e1!J{CP-(_jJ1BqCck*{eq`}RXK2K5oGF3aj*DN zE^xXt!^Bq#g5pux^C3@~JnM8<8Crm@@d?zwuLWlBe@*eN_|6pHW61dZlzkWBrt z_(A+=il4+qQ~Zo%@e6_bR|EcgQTo!+s;%~8mvs!Xrq%9!t9PIZ7 zO}>!dZNiu9JX2f|drWavTrDyhL&vE7T)@tA9oa&JA=#j-m^!^ z!QFfieTr~9BVwg?z*k3RG)fd^~<^~8Rto8U67J<5o<08!9SyQPX_SEna!?COcV9Zx@Pk?J?LdG#Bc zy84ZM9nY#ZoAP%ODv=v5W&_RxssMM(({b*I`ebcEk(c0gooq5#gj4}yPdI*2jeZ`1 z7I7ZQK`-R)(x`Yh{G;P0SIxaXNS`bNft2x@WuM^APEbQj5hJ+aN`M%37)?ctU=Nn+-u(z@ zTZQ}qcp6~DF!1o~*FkiEkJT9RRG|I`B~j;9%SbPvzJLY;`U7O3#~|BvbmCUogCSB) zrTicbqSW{*vDP3Gy@e<8B!W1qZiRw~c;X->VxN-81!1gAA{VBtS!tzVaFoFmI4uo0 zwg9e3CaSF4m?rZR-AO&D0E;Vl9H4svG~g4J3W+W+B$_AFSSlER+!%#uP0&_U%Usz2 zlaDd`bMj20dxFqjmN`l8UxT7}A0kQwi&{mKSJQ zt|*s!xs5#&ut&l4(y(m9iVV*-1I9^a1dI|kg6u(=Cz&^3;zyb6UzR&6Y%QFP2J}*v zm+eUpIO*vP?!rfUwkP1s_NE6s0dK$vn5ArF00uWl!P$J2do7EDE7AbmoaToT!|t~6 zUCaUd-~mYN-H^*mxhQcR3nTmC$YtYq!xi`SPSo;(gK&z%C^kmLHnSjHD~=uPA6Lr8 zA1&{om2DfF5^AjxYOR@0ty2X>!4mfb!HzLi?wE?7Xgf96Ktt$>zT^{7RD;T!4ZwsF zyjDk46Ytr@Hppr<+*P9TNCimGM!g~U7Q$hc>{7#>5Dax=5=?Np0%$}L8SiRNF^DRm zIrNbNF&c|8!FT1&pjy`gE@jQwb^SQ>fjW|r?{VajrCr5LIKZZCVvPtihhfF5lqLD6 z!k9PehAm4({KfQ(<_-Orh5(8w1VYGCrmtgh=r~?ZM@<3eN$5hr;Ft9?KFT)2f5+r? ztPyfLZO^e8gSE06W}+CR+9}VX5;lv9zSAli)y&>?h5UZDoCC2Eh`X1txocVN$_<28 z0)37*@Ux>DIp6v?)GoYOAO|G7tJS#+9a@O2b)s~Hff9CiHhG^3I7;B*rvj+Q6j0^X zWOw!8Jtgek5(W=5_t8Xun%!T*me5=v*-}`vdf)(CT7pr)Kre%4!cG=XnI60`(~3|g zX8@}E4+&Ug=22k^7)?;hjle*HJu0jW3M(gAeM3`MRRYNM7G%+VH6}D#zUCig_$Bhf zLb@XQ5@^!bGF`*Win3ASx{8u$o3hb11zywy`D9J^#NZaL@yBq3c$Je=dV{|YKE&tT z!@q{FtnYAS{K2DeosNb$QWX*4F(Qsv6=^(H)aP-cHIEl<*mco~R~N&14Ka?Vh}k?< zEabJs5}qdZ@!H}buOp80y5cQfPyB~B5a)P9!Vtv;o+U2xCPAQ6O@R_M(-L{MmdTrI z^?8=ogtyWL^BgUY=W6448*K`2i;I0b?JnM4Tf{qROL!-38SkQP=iRh-crWc^-dp>e z_tC!MeYNj+KkaAUU;B*@(XR2^Y<2liTLV7K)|3yowc#Ud6Zk0GR6g1^myfY6W=Y_U2e4_1pKFMz9lkGk}#h%XZus7s0?5+4r`&53XeLA0QpTpyxGIFt3Bd!X*~65$dfWmW1A#qL9tC-f9MfSTm_;@e}AK z>I*Fo{2NL$wZUMH*QK{wTaY;F@%kWn-36($2BM+syP>hdizwLWi#%svo zP0n_L?NM(H+a7kx`j}3Z&_QI{_rbpoX{UtSck?E^DNe*B`v%?&jn-JQeJ#&M%*Hwa zX3cpEoQe7N!Eh)8mM1`8m$zo$1abQ8fP0}Fo{N>Si%0V|_#~|SZiwvw^fpSJ#iNXy_`A8L%S|t~WS6DJa*oQ#s z6)LRDRo2|ftg<5A3I9n6msz%G3bJY~cUTa3M&&-BD_!x|g@MI8TNTz3<&lOFEWF(+ zFF3!vo6fuAe|*=2xM2U56Wq>9ISCKx$%F8jp%9Tf(0#%}Be97@ty7Gn^-vAX00U`y z`QBk+D0~s7_dPHNvRGj#7MC!x97Bmz!wiVQP6**DLXD`!#G-fYLf8weGJ9;n5Qfzo z*~nLdg<(xZQ>ZZ!8DL-Gz}pKIO4te61V`dW(LvJ3a6(K>Q)rjMn)m})d`mHVmX)^& zN-exECl{h9sJwL%s!bOD1e`h8N$34hcXK`p!m{oul)DT$^P^x~;X?&a4RtCO{ys|BW)*G;qK8`#ux;43A!D(fAZ=A(@qM>q z&Tqr)-40Qg9jq#Uq`Y^qb6DP|SUwnVrbfr|W(wkjAp<+CiUxpY5o*AGBmq-rE$f_Q zVd)w0sy;Cgw4H?I7I=gvB%*}v0Pdg~wzD?*V~3^9NAL(=$4vD*>j2wT(L^QP1(>+_ zJ~Z!X$P?^mwISNjkUtA5vxNf_9~Cv0H?<|dJ=D~ed?;cWG&(KS_DBQXQf-em1wcq) zx(qXh*@zTCU68KY?)c!N0N$gU*$e>gQ9!NsE@sQ<5&DsVAGNb>bxPP{q?`HpHr5OA z$5lK(%U+iLSbmlx%D#zJse_a!=r78iZyRxd?LNSsjB>z#WdKG~k*tC!NBKMS9p+)w z1ZbdcD=%OroWNdk68rEe)(G2r4u1_Y3a?|YeiLpb--029Gw|#F4lILxrpAs0E4smH z$Z+g+c1Th-<|7azXO&}k9sr`j=*nO|5;0t_U<9n}4!4YtQh>v4X&KK)6nhRbq%lWw zj@o*#-E^kdHE442&KxZKSf;5xgSI$iwQjkA1t3=!fp6ZSpK-o=0G-cA@=+w{2E({2?xqD zB&6X=7xD;C0==ONFiq5IV2O<8V*z<$3D9{-b8=7l7|?+jv=-VPKCXNWlnY&Kx?Qfo zGAAGZ7jt9F{+o61&_x|2t`N+(qXrTjIff1$$KMMj?8Opxg68hyn1e6vVC2yLCH#C0 z6E4dZWkVEjIeCDc%C^JG@5_Us_*;j)0#E@oPh`>iC$j7a0WKFF1J|+`EZ0Ur$LWFc zWlmjpWbj2S3yr3Hlr73JB0o#dppx>l(!hTiO19BCMAXBkiBm%_Am?_Y{Jt|Ta zjLCazD)uP2mH>FJ2v@l|QyziqpqIG>*Qm zeao*wKo5OA6J4b3Fcnod#Tcqv>znFolkqHTiqtw7v$xdq0DHS)BUW-|qAt@#Jr)kB zjc5UFL`ZI=h{jZ9XJ!$EQpKvKkss?#Q8bX-;pyj||F2c9gE(aR{SAa-gBh==e?jU24;Fyhr z0vl>`Wxy-j(4x|)wYw$U@8c=-v0UE3ous;?+v zxY_L3_WPp`%>hAjj>B@_1VI5)9_!$am47yJle3K0rOw9g1)8ThS+PXW3cCRWOSYbg zX(%VRb%)E3o4S9K-P<|fZK|2OJ%3Q9X1nKxKLj**B};_OiU!bOXa|dr&t|)!geQCK zb5xNno4dO!L59x)yB|A(b$^7*`4gd71KwThum&tjXFi=p$UV*6Riw9tJJG3w$lVX> zU9$H!Pc=FJZBXxvTDa3}k>!oaIqhpKkNpr9A&)L}M^H)7!fxqK8VVf2Mv`*qC7uvC zS5Gi|)zb+5Ht9E3H4!gC169x@p+#8FV&OMKZBt-@c{^;d-L70sDftUiZfWce1aGpH z`wm#R3_ROZRz$&M3Z_snl>#z$OV8;p1{W<9BM)OvvUIC6XhqIpbLlXEx!WWbq+mV; z3zW%Q3hs7cYrltrd!bIq?sKvM@L&Z)w-i7Faw%J8V9S3{dCH6rxyhuI)i4`IMZy0< z5__0}wG^y#!5R(OfupK7DI;(?+oD4ht@mkPGG{XHSagljv#s1qwZzyNc~d79dYFD zZZiUC+bg$~%1Em0-A9WIXR!aMv+X7XMt7R5F1e`$8!z~G$nJDvpZ2-!hWE(r-hOx= znC~5wm|C+q6)JuCig!4X+F;qqnv^%OK)uXNOdMP^J#k{*ti(wL1|wi=?}(5|*Zsop2qXRqk&4a*@$_y^uBJCq06Y1ylzyO9Phl$GbduOq+f z=$9B>^3ZbYDK%hxv90PN^i@leiffZvXFEbVFL0twt0tocp5deSFk;S{zA z+@N`T(rsv+w!1d@A#>UrlrrzigUlzGmEZ^^gqJ1gp#;CY(A6CqjG+F3kPO5yl04N% ztz`{r*TLPDoP34~s$(zzNyXFwB{}rgFPLrjKN=eIKN|`_XgKs3tTDv^wZ#ay@G}Y! zs|rXZ0ak6GOwkeI_?@8^IRt8v6R@J^K<#cG>yPbx0J|5dYoUC%4XTlkv0>2F8x9?} z5$rvd2X1aYIFDdJL(a?2#;b9ls~OoD?Se_7;j03a-wiRyQ;#ki9Im56np`Ag-2*zA z0+oaALnwbo(#>n+48KA)PI}kIEF#;j*4%+-T6H)!1ONjqwN@J4yW+O7$Mq+osnmVmi6JY$u4&6#J-{WI50ft#D5hkfJ@Vh>GNppu(J7Q2h8CSKdYtI0* z9>+7*K-qk12&Ft~vYlmu5YP-whF=A!jZ!bE@g~!N@q($D2znv|NQXdQQJL@s(Zz-- z$Z%JNLl(Hdh(BV>w&98xPiBs*9e|g5YI*!Nwjm?Reh?C7>=;WTH!lZSb;*0+Fb^d; zDX9k73*a)~v6(q`v7RcMl~z5J)|@inn50*D#EsZNIcw6-npRFc@(-|-xa}+~V2jdi z{qEMml6&xOJHq^cjjmEQHE)y#Z4d`IEQX_x4+2iGQiszy6WX86q50Vn&FIIrLZfpV z=FN6kYuX82&qr_&Y=WxfI%sr0u3$`BpZx%o+cDg*Tmy~Qp@5x^a_+&b>WLAEMM(sA zKo2UPWpIBE^DQ`n<_Z$?LVz#9d?vt~Fg*nijA&~C-dkG$eJ~pNX;7dcbqzNccvt~J zApvewuHbg5?8S|Zi294gUS@V0t#QaIFn^tx{blH97B z&18cayaGjC}VTU8OE6Idww*J;B`zf^AB?e#B&VntY(p{k3{M{`1Dg^3I9w zq4?;7iSBPLvFa(&y9PoAkPUQM2z9-8V^ZZ???O!4H_c6A(lOKAgDpw+8ts<7BWKNX zL$18^&3W!!R_cJezFsfq-ly79a-VyWweFdTp z%y5Mykv~8^^`3DDWX}7Dh5WrP-{W?>RQ~&XulhD&Q`n@*-nV&TWa3^($$a*Hj9tEQ z!0iRLA99W7Jm{WoiIXqg^v*NGTf&$O{%XkHdvGy8ext&**^wje#g5eMZj%b8)h!-f zw>WR&w4$QI;<~q#t0^7&B4q4%%ax~BM?~IA@TPRhOYRF{DNX<1mA3pXcYeAWi*5I0 z9g}}wc0=;p8Y=fyp#O9Ja=)kgUGVIwcejCTunneU^6bFZKZr?`_52>Y+(QuC{I?pJ zU4OfGST1z;eiZ-b3|!G?c%^S+c#NFy@We^qhcV%0AbsSTdzNKdtM1(APj60OY;!{S z5G2SE9uEosPx5%SCa4BHH+OkrHyCy67LezIrfPs){?*h|MRw0rS#Qhq^a!Y~^nYX* zWV1SYAUtn%rBinZHCFB9d0xF<*l){Jd9ITu2jwSq_O!7S9ac@aoF*sq^N>=+BmF!Z zEyaaDnx36UQ>mI9cbkV?4c>j5XQ}x=gq~#L`|dWS6UTZMS%UmxcTXYzn&^QS#?q#f zJU>|?`K~_)o|Scjp4*Z7K+yA;HMj2C_tWfYxS>w-$lyF&9HVL&DFbT-n3tzZ=W$PdF2RzSDw(G(j)!B`5$Q81ofOrW5UeooZc zB-oF0vdIuXCy&m>E~E~l!pLtkJ@24khMr4~>tfk3C&cUV#N{fM&33Xm^u+zj&*nLm zGZ7b>fbWy(f|bnfqF|v;{_gIPAHCfR^4NM$BYA15 zCqti)HlSviw#@S#zgI5!!KezW4SH4CX}M>=M%S2gANdl(aNWrO3xL6A$P!PLCUn7R zVPf+FijOOt$>2-Rgh@EWmQt{c0^H$Dm<`8^huBIJm+A*Ct0jr|+p5W@9`qzQ>Z&`P z+_wZ$y?}hOJht591Xe5A!(^>D;9SernRL68`(`-f<)IjpLxi$LylYN z`8YNigA1Vq43MB6V#LX}Y9^jxvgfD1^!Ph)u9qDb81BM%V1}rSl&D=7ATVGzj@zWqao&PVN)&Z7GpQ2X?>&tGdJxQa& zfZo|Mo?%7_I6GF9|_W0=_8-n9D z94BT2`Sxm0lOS~`5kH3kmnPhtIy%0*W8;A*g7K%$ZCH!H#D?mu16=~h#~oSrVtgyk zT5b@agi=GqqzaIBNYHfWdn*!L6;Z>A1}UFueGzDx5qHE1>$>z4eMir# z#%*Rbs0(F|?a8ftT$x9E__Hr%)k(4i^_6|?3w;d${J;1*v-;7Dz1H~~ywT&5PdwqEqipk9&uL2s(EEZhFaeno zV2Iw+r)~o>j~K@4grVR}3hGi&kAnIXG@zg%6s2{RrL(5;+7?e{X}zr;lixto8gZ;BN5mo<=c8B zI=ZMTlB^jq8b?W4W_x*NGI5B^ctf{zTI)(0Qdq-R@%0$>b%>N5`kG1xFz`K@NVF== z-5C{~TJhPqTUJ`rs#NCf^i-)|k$L>T)B#wlI#NP)j2NOvXH^2(4$&hj z)id#5>Y<^azSIiUL*^cX74;O|vYxbxdMX3Zelq|k|4XGb?nL=bw?-EO03lU`AiRY0 zM*F)nol1hmGKAJZ4nsXem;ng@oKEXLK}$RyrMPflHM@D)T=>t&Z(ah~a)XB7N)kVQZ*&v$|A4V25vAp$(s1(B5gFziRafpbOk2^1@& z>meu^EETF?k!0vEy4=uTBpLdP@Q@)yFljy}7dLb!z7|M=LGVs@duh;noN&?Bd zGxQx%5gywl;KGV_UNQQID~X4 zy=2yJ2;WCBnDraN;Q+-;3XVhgvlJ_#SSiI0Q0ySZ4p9s$j$X3qH-tY&vF9m9hW%V% z)b|Id3i)w^zhJ31K0rEtFX{XwC7+@I8jhRzD=tVYzUG1jy*H?cH%V~uZ3<3P)1lZn zm7j6)cUhQfQpYFVHlzR2ru$l}0E3ndSf#8sFl->ijpI|88lpeR#oR z)A=6~>T20?i!H)l8vmOomB&`?+pkaOUc>wM=#|^K4>{<|?bm*IkN(};wC^ML_0VI+ zjO{!+5w1xJ67!%Iq)6_dy5~(woIZ)Ph(Ki@omV_<3Q|C?A2n+F)VxWf3P^n@wQl`X ziz+{oDC-l8$jyfGY(b><0!zmzubh2Qn)AEoK5ipBm-3B2JQrP~Fcdn!ggIDx>XIje zyH4u-PfQ^lDn3{K^xSWIO6QkL@4xJ6#Q7z8^e@i^*{>)v68^gLhJ{Dls8#Z#zdhp| zKd>J;+ao8O^Ea0Bu6Smf+u086kGUo&H#?M)TPnF{22t)DDZe53f}DGN&A!z6-(+SF z3TFRh--5~{Pyg$-p|EG~ozBVfyH7!&e)Jxm_Y@veq|9N{HX)D$Tu z>5ZDaDQ{+qG?8qIbdhZN!ajN3(@^_CWXRLo{nfI55VcKFM`V~H6J?3I6sw0~M12zi z0Xt05P(XF7F*P*HBt24-#5Y&>^0sg`6WOL{j$*@4dsBLXB!=8A@I6R|h_$5HNs6_i z*ozcvO|e%fmP4`E!EF$^0<5q1D6K7}K^Mmq?L>Q}ItZ|J5TuG~iq4`7W)bzgt73>{ zi*Cp+x{DsB=qY;XqPHpfu*>`9zE6qYO(cH^B& zd)d78!eY#kAP^PgO$s^2b7IvM4;(jbtW3JSN-KH9?ahI<%|;wGQ8<_}9}(lAaPC03 z6LC7IA5m$T)}b^Xo{!= ztX?q-zED;>rqVz2oS-}}m1V)a`)8I@l;!2JESQG>%UgHq&#nx zHdvMi8(%0- z8Sqj5dCK!S(y=3k-qI!cG5q1hS9oBT{O1c2tH6%=&llfenqdW@TZAoQW5#I+kt@hV zV9pS_m?bCrymg||HE^SzWtKRK63GRlL+FIz=HEJ-oe*t@uU zN1S5vFbhVa$S7Q_qX#3cN-2vWqvU~8EIK0+s&UUDU8@Z~THK&r}W82zJ?6P|u3sjbR=LXzM{g+XM318Bot?ScrU( zi#xzykcokn1H83tgT;bObPZ{AHG4NSXnR;HG!QX=)I}kJIj-XBO49S9M)GAA=WYvT zFCBoP!RU>morD)JBFGx`zaN#xq8r#vjX)Y|j6P@8CQTXm3~AB>Tr~1E0ieL$ z1Y{TCHexMnhDY;kM|y22E0wSoS}AK8a3E}ztp{|7nzgPI&=0a40o^UqEXo~7N=(;- zCn1m82L6g|Hv?}afVT>BY7OSN#O>!{Fq_wbe6b#cfDNE~Yy|OS6WEHI!MoW42SZyy zP}u-*?*MprD%kbG&8HhMa~oKV4&;o6jVv9dqQ@bK!6DQHSo``L5J`Yq8QJ`Y3?fP-O@1=J4d9KnuC+>qvsYJq#ZSo%?hi4lw%}xY;(sE-VP+q$9(0j2jgI z4%`Iipf%~s7647{?gVr=n^ZurQwH>Ghbtn_e&~{LTDq-y*8f{v`1kE=ZCY95P z(pqJ^18!t#7jPpxWptNi1O*P`Nvyz$0h=06YMrHKfYw3OvBO}0$5PhmXq3HTobSXv z74z?L49OFK({3DPdvM2o3Z&P)AQ9~YooqiyP0xUv`z#oF(D(!PH4%r`EF4~oa8|9r zF|`pUUbo{=dJKos)9g6jya;^e1hAHuu-~7Aw(u$Jw$Ebsd=)G6bu5}UFye2k2?DLy zAPSuWy;(aNF%gp7QHX)s&c^d-%%m_>GmKY33`WgaAD9T+0dmepOojkv5+P3UyeeXl zyy8Y^P9?GLc&wUIb#RT1Q){#V`%q0T;;f%ilgoyVoxtP@lC!bC?0}kGFyV%ONz8T~ zNl;#~X4ef9>~FR@5(HL4Bty&iQ|z!NK09-mbsh}TWS4Avz>Y!g8X$rAZY8XHz>e|i z5wJhadSW8lOO(`n?*pt4Rx4EIkQM za{~|+O<*4cLBjYDo8CvDA$|-h;#n-)PeA+o6l41_aF@?9(q95_Utu)A#wh+*jpLmF zzz7Ua0^S+;Y6wr{x7z-PTPmjPZ^P0_fwRkxLZt@g;1+VOEHJ zcw+b3SX`5k29|x1CD{*w{ z(O)1TvXJ80PYnC%X6(mQV9Tf3bj;~H>~Q@j8BJ z$Xcs!f!bb;b!@T98o&{4E#$k91F*X}Ye4reR86W}G)lf3Syjo4ZYVhr=D$Z}PlKIp zG^L{ad&|nd=>x}2Qx57%Vb~eH*b}3$dnI7G*5rv8o$45w8t^2M!UkfM4db;S9F)c$ z;_2);UYlLOVu{1Nq8UdE9ZFpZWy1#h_}Q3z7%Rk@BZg&*7;V28Uc_kog(wY_4_ja| z+fliXw?sY%n&9HC5YxeD`HQzk%!$3{H=cu77$kRI=DBE$3;eF*ybWT8nh^Dv1ABI_ z&`QTL$N$)$@qb$tw^Gt*y;tEpEj_a?W)g0+p~a5R@gHMrGW?4HOr`!@KCFQKeE=o6 zR+;|$LxZ69RW-61{v{>sfh-$l4xcJ{yf?AxQm86J>)+NOG>~yBPNO87$9n@odRZ|B zmPS)!tb`^ocHNZa0CXJyw5|Y6Uw~p1x;~rdviZ={T#Od4nMw68P$a6(7(@u3p75-|XTx+D~qtq9+|N2LXA839HHpndRlMu;6n zyP?HkV-O7{RPFOUnMWFx5oDrdGn6j{o4v7t zxpAeLI#>((*Y@3u6w^0!M_HPGUEh$JqXM9%TM1{MJRiGdfoi375lpdKscMHNXSkoi z9S;r+>q=N#RR=B=^q&u~_4`@U0k&a3n@M}HWyb`AOyW)rv6iL+@QTL>m~25B@%1+0 zD#LtEoa=?`vz!1gVv)j`6Sz^}=}?OUcgyD!yv@PH`zyiwuBKQF9bUVV@gFE!{l`hh zC}ZSNmFy)}Ure(19j9`Fw6gCL2Y$j~LVqbF{1YX6eHIg0A^ekB$NO`fGMt#~ zJJqeg##n(ozXB$gwtN;-O%CsZgB(nbobVc@_jmEOvrH2%+dsWln_6%ZQESa4h#(#7 z>h(+Cxfn-9sgXUsAIJPJDV01kqE2bnSnmZbYy);213K4hWk#Vlj%UkGc!+>qPAv4s z@E1$(DfD)-+n)g);R|_YinnGk7u!;Apf~*>5;q#$`C=T^GuQ}RDM#YAI0`qU(Lkoh z;Pe`YGkQD_;|ch<2`tDDb>0eCZ&&D<_h9p2WIf0yVBODT3wRJl z@gSYgSFlBV3%d_msrN(mb16U0mIe8j*c5(btDsi(FwFXH7V&Jmz-}srv7MrTJu2>m z%H7>;x456}5o=+GZYSF-o@Dz(DfGmSvi;&E_KbLgJuBX3rQ!@bBtB+`#aHZz_<=nq zer3;#%j~EI1V+=@NiB|@(yFtUwG8%(R-e79HDSTmv>xnDtsi?!yPciZhO_@@`RpC7 zh`p;VU>|4?vX8V?>=W%__9-kRoYM}n^V+BEbL}Gg0%mK!vf0?zwp#2PTRQvB)`)#? zo5+5!ZDAK}kF%d`Z?a!)r`aXj@9a-|fL*q?W`Egp*)@9sq&JH=x3A_ly9@%2*oOI? z$*Mu1IY8t(ENF7a_9UcSY`~>*fFeDG+5HtzSy1Y*aXt<)3i5Ml!=UuSZDu+4!&bO1 z(6?bY=u*@dP7KL##!%1+p2dY1eyLC*dAxvU5|nv>1rOrmnK+UTp%MqDiVlNT zXTz{G!$s(Me7ph}wR($TsRdM7LT9WLx8Y;B80i4{eXu-p9NPF3ah-Y*jR^w;GJ#Gn zXD;m^CnS40>`Q#g590E)9Mcn%Q;{#R;iCVz?wrE%cG;JMK6ekct73Z!=)|X(COq1b zP-C`em(f-GF_a=@e$)?4~%q3X8 zZ_e-@QmoCNKK$c%`QS`%SL`BZXL|3n0RfcPk3>hwiF3Vut8o|)0O_O()=(^Nh}D5v zCZqB+oO|G;$WP~b=g|kl=6QS4^Qn2>UTU)v$Fkmw#jYP~m-~K)wNv7;%EV>f6xlUM zJC^h|ifC0TgWk`q9cIOZjaAQs?S4LBNjaXI?{>?_?)Jj?DZ31xj%+EUYKcg(lsum< zr{F;fR#5N|9G|h3E?lL-KJ>8F22O3L$vjLNG7qzLI$N)^4N$BH&x~!Nm*5v}VOuG- zje_ku+u>w8!4zYgsHjJDrM5xBW=nCHK1PwpDR{!kc0;w6qI+D}&sA>l4?PU5!)ylD zVKxKn5HxD409->%V;2H23_a{1P2j^+A9#g_*mINyUZID<7HSSVM!|6kUZA30bg~ol zTon#0?&RO{q|Q$1>}8$3VmS+YYmqm~_9|yFa>aaa%$j?=M?6Wzm2C7`X3sn^mDBo` zK6Q_`A%_#E<>BsjGIgPFG+*pZ$+Bp%w-#S7PlvlBXJ07Omc-rr0P25=v1hXHC^vC-*y~{4vEgJH^GC}E7f_5}$c7|Z z2nsXVkVJ5af;-u4*=4D>0KNWGE|}#`E`4vQcewVy4%411y!Q;x#;U+Zg=vTeA?5>( zh4AeTh?8x39|5k?Z$H8ChTi~ZrQebGjs&wf1gsbP_^K~MMEC*d!2@2>0LAe;SPvQ^4O35Q&)!<;5K5uOZBOu%#oqZ2OB1V}Rir(oOi#Cm0k)aFos zZbv+tIj-Px#)if~NB;^73&qe&@{4Z4DhGuU9HXti`js^*^ahawEaGO!4K0TpXhYHE zpu<`S6*m&a2^1F}TVCE)9Mpt)+hPv_uLbBl)-{X<)G`8>4;-KXlVpr4-IAXoH)Aw0 z$j5-?3Y~fcK8_bP<+FldsKR!k3P~E$hQWyvY#<@t)_Bw5z_36KAbkW(t8BBpY*T@i zrrT(=z=BBS67By|;$%|eobrDZIp&r{I{u}|X=vp1|D(jQ6(vIPOZ81jf&NR8Gg0KM z|D(up%TuvKk2=+HjA8QttO&|M>)2DL%_hJ6rkNNXJrM~{p`7;qD zJ!`P9Vt>MIhusmoo*A*&Z~|Ld-NUTv4WhGPZ6){aU%_F?;;%q0X^f%Gy~wP4Qe}xfl^K}twqI>MryEZ zk4W3bmfbL8o@RgtXcV=HcUduN<;gv;Z#IKfo<2m}012bcHWsdO0@2RY;Ows1|3liF zz(sLn@#9t913gCrGd;|Vfaq`u2m+o_6ahiJQ1QMG1VmI00k0g6MiXOVjK*kbF3laI zNi;Ez#w2Pkb4PP+&dt4>``Bc&yGG;x`>JOE{bl#}|9}1_^mJEO*YT?A)vI?>BE7tm z<;ar5FZ}q5vRV@Ok${<(^tmJONKflHKsrYq)HWh>CHTik*uzoI^9)X0;Jl`X*%bJC zfVeU*4n3s?haRaAmP8eWvT@{rN_aRMJ<`|(&fnfXcnh(+F`O5O%8SJzB^S7}3}pW- zkaU%x(`#@tS%p)H0QS2DXO2BMQ5;0R$8jWh0*8XT6jU<>^P&}_M`9WiTfPG9xeTifpMyeL zjynquMK$3`!c#+&2Ho5M?Xze<#;}m>tvP9E0!Rb7alE2>&C| z0k=}20J(Vc&JfuOPJ7D}ntmn-vsxw>|J;HkJw-k^o%k!_B z&%O6Q!ursbK%P#*Mu(rF)X#xdzQD=tO9&zV2CCq5ER3%~3x6YFK=a>%-I2OWn{_F1 zBF!D66oOA8Vic^RE3iBi06$D{(7R!wB@sm(vv9a!LT5;OoVE?xST1F&aV1)03Q}lI zc@uVGHPhS+vcCK229k-s?Hd_wG}sb<04Dme0~QKzk+k!yK}*b~h+PaaGN8+~_=;-z zSoQ^Bq!d_XCIXvFNDgm`a?@ zN7(*?T7Ly>{|VUs2Jzy*K;iud?f4bc$Jq|B9oq)mNo^QqQX6O|uyufZN6-f7Wf->6 z6z~nO{T*yonkpu6uR>F5A50JuD02w$A7$yZwjoWz)g&2vqsiCdNx>!qEn%#86*O(W zWZU(}Zz0xx5q^u~FXr1V8k0HkOMLMqGh?otigHGX{SU>&h@xBNp;uZYzW7Q^tXO$# z%oFnbeOvaf#iHQ0m|-}7Uv*o|&6+$uSAF(!;Y{G?nGvS(TezfXk^MGK@3-FWA+BHR zJu$OQ3&!`Z+Vjb`*xbIgm==sO|4*NF$_N|3W$umn-Xbf!V)-4(qRal60VrVV{+LH{ z{>MRs9BKy#eH_!ABU`nyWd1?KPoQY+n>+JZfm(<~sYg2}nf7EOAKP;Gt7(kAnzsEt z9QyIzBX)FhQd{rsq2% z&irOEgX*9dt5`i7WsRNYjK){9r#b&^c4peE^Nxzyvz<$k`qpgcGOH}$X8+OZZ7?O> zmS|e;$pZXu0qh-3EVtJ>MWW=HdNYzfDFkIqe`Vs+tDQr{>Z_4Scy1B7aL zTa};%V``r>Uc7d|NlcO-?7)M-<51dG*h(94rY?T(Wm$Q+noRv)FG`5yr zt|K$|tLX7+x?8U?Xl}d6?oJ~YC8cZwu{*An9!k7s@JcrprB6cCQ_D0|K=tussg28TZm%K^s>FQ9y_n^DsL=AA%PX#~!5| z2OVH89@2;%Q%s#GWrsC(L}QO@?5L^NYz+!&_UDDW-=%Dz~p(TrkpmVc2fz2g5 zx}H&^*%%!g56tYwvICL4m)Rcp1_{cfl-gq%++!9nk#mdSTgK7 z_TCyD^uW^;Pa!#iG)|XF6;~DRFX-5xj6BtBsq};)m2}1KBhI|;FOcht7Z^A_s(-Mb zTwy#%wSd3yDjaq)c-DB0LY1Fl^Gx;G&W}6$C!J>Vh!KLznGCgbwcfry5KCETkd*CG`oO*W0)#c=@di%RyY4?f@1}ffPBTS1|Xevs&r-e{-+@ z=S(f}(38$2vGqx3!uAyI?R+tdt79kt@%fX^zW%qQ-2 zMfD7hdI8#JwC8s->&!H33Blc&!~V(&5MNIqHo zcFdU)n1Xck=ruqdYVin8xa1YcJ1RwyCjoO$rFXO1>YI%_%u8G+Avd|YC8mHCbWRjZ zB55~Nz*-1vH%`UMo(9?|OjKFf)MzBomRSj+0fmTrk2|~cqA$$-b7qI!QK9x#;Vo!L zM-f$q!IaVV1u7M12zAVE$!pDcoh(-DH0T+&(A@+bmt4x+v zd&1cV;JNjLGofdDV^ca*4`Cj7Ckm}Y`U1TSqFUfweoTCI!g+FJB8fM1K>JA@lHB{) z2zi4OK@q*_xf7nTJLc#G#Y~x__kE<@&>aL?S8&PvMj!E)r=1yrzBm8z6OK0hj=&8Z zPIt*|_qgdV6L*n@NeO|$&xS%Un1gbjqA5e^~+@)55F(@o*? zEJOZY_`ZtP-5bXVY7f$|*0rLzlz?&IQ?2PVw8;&&O`;f*ml{pO=Mzu*JtMKNt zY%=V!W4gsSt(~lASbSpl^kf?fXf+2LUv#kdEC$Sn5TDJQwdNdrw8I?4jHLLA06-cyw$FDEx@HP1|rRlDQRx6x-$=oa0aB=)uXD3Mx@CU~hh)3S&o)E+8Fu36!a9hJpvy?QjEXoP`bSUYbQvwiz2@xV*4RXixZ_he({5s=p zXS#UlEvHY+f7{tJ)1>r=Nhp)|HW#qZ)QfHQMx{soB#yr=iN^oD?YuO?ew_|>2`;;t z*fJv~PLN%F@Zbl|*APS`xb_q0G|rQPAARclj7uQ@@i*_EsEASDa{frI`K~PTsYBya>d*D0Jbmc-LIhQ^)da$2@ToXHNPHHR|;)E=`Q- z;)?0li)F*rGfT9Cwb#YUx+1V2^IKVxl_gtQij}2WS(?VWX;83Zkz`Mm3VwT{aJ>jZ z8KqdogI!$bJsCRqOfV9PhRlvBV@B&Bw6esBt}d^tOehwvFS$qf{VtpMrK@XJI@N`> z>vYmbbcera%qot#M_Nec3AvA0?ssL!b~kK?m?gx5b>Thf*6;Vb5-0S=b_OLj6hZ~` zk>=ubF!kX{0eXj^55Mq~iC+t_GO?^^!-0$yzxa2dK1A!j5AGJ7lU#iOym3jcq}cy) zWC?EU>xznqn2l!73ErOXI;aHZVGFCma}Da7&-}noX~0Omfs^_H9}NH^8U|Ff0B80^ z;E-(ur{xBuZ39o`4lMS)=r+{aK)PKCYHBT*HtV3|bQSRA)i~N;gQMpLFl?^lsca)p zhr&P}{1{)tZs0|1GxUJAa%j45_H&dqwEfT z0?73Rb`L)V1p77+><2)uu$$n2XAeU8;34?`-4AZw0Y!mQffY&x(NHRgWk;0GP$)=- zLO});3UZ)O(1)E+Cb6fLS?n2QE~ZvY9fH*@VJ|AJ>}BOL_G)m>VAl)|^R}afu55|E zDtiAqNBmeQCnq#NtQP*J=rSf|j_nL zji6B1kziwJ@Xrq`awrRo;dc0Xc0jRQhs)(C)&d3cE#T(gij)2xaJU}j20H}6J;y!l z?+8F_gD$bb6N1-GaP{UkNJ_8)MRUJ6Ingx^MtX6RTt{3`N#hw1d}p%BJO?HB5}!|U zjpvt$ev@5;phMC!*;Nr$AhitaybuEGB5`W6t6pK}Md1|JWZCx5mK6^Yw@q=86O=Pk zTsd0QCs5P*6jS3fOwrFp-#M_yjoJFn}8O#$^ ztMgo1mq0c4%^I)<=VNx&vUf25KEU2ddO=@cR(^vS_!DO3ZjU9xa2SgM%hCnTvxnUt9KF`{2oLN5U>?8*y%%d`AGXB@ zLGwL?5*`L8aX(hYBVf-QWZQALx)uG{1qSGTusPw<8cG-^@ZWQwFMGY_9ybDmKN3>8QBV#m zM?BBb;53be^=^ew*Sps8iDLbF*Lz66z3aB%o7cEj zn_Ppq4-+32Gp=_PVKDG2#yn!*{I7~$wV+&TEu>$S9n?X9tlF--NG7aKi?9DVf zdt|%Hvc|n2ULS0XL~4AYakr_lR_L_tc@{gsvs~=H7vYKlC-Q|ATVgzK?zPj`YTKeg zT}4kbH&A2%e`BaYA#eX;<0{2r-Ne7tC zNSB}J@-tojL6=_~{8t2HMZVwc{9lN}iZsMwRrwhz_n&n6-OA5e`8m4%10i2kejeXI z`GstA>7r_i1w0HT0-iIJNV?OISFvh}4PJ-HvRYN_bb)WLR7Iyt6lzeSH6=!)SghIn zOBstbTQTsUxUCe3H5)Nl6|dscDEMlhQ1Dg6T@6t5)h2P@hM7@9hYQ{;;-72 zR4YYa?WSP69;9SYEt!<3JH4aOtD4e73k6;+Q}S&5aiu4^t@NUty)}ehg~4n;J1bW% z0dNp{6_){Y$+yE^c95nN*a9eNu$@g%3av^JwXm3S4$+hn4N+LxbY&+h7ojz z5p+g4*c=$aDx>VIQYoj0(Hezeolap`E0qd|GLB^0l@x|`x-x;jnn-uRqBdocRhjI7 z87$KjL`R%v3!t{?D%-JrwUxrLHYqbT3dXvWf9YV2s1g@cIU6a;9C|(1&Q>BiE5I?2 zI#NX!xJXcy8oJDDCS1Jps+C^4GXvOFCHl;ynq|%yf$`Ykn_P@m{W3bv; zwXzIu2b4mpbt2+%T9p+vaw`$N(yClWg@MavQ?9TotLOpDurtbPg7F$kzS63!wJPf< z@hWuN??-$cG^k>juEOAFw|BmTPg2N zR%M%=;)_zs4!g2bx!F#kMeP(?lrD(*Y*Thyl{@UpoyuKQ*xi(Jk5vKNY^8D^%?0qz zR$?yT0>!3s1&U4O3L-w+lzmnOs!c1Ehfx2+X8mBLtyB)!l!ui^Xuu!U6lgZRq&#L* z4k=)ztyB)vhexc+G>@J#M_kj z9lP=uEN9%ki|NMyNg3a>D(_Ru2Q&dcq{~N|@>e_ir}8lYS2pbvtMaL)d`8(nw<4TIn>FIAa`tinmUJ!h1ys@1_Z^l*Ay6%=U^sc268qbLwwr9)>*t?55oT^?z4)S zhYVeOvH)_2jZ=-){4)_X&3M5wM&k#>d(+U0Gr~XJn99!tS57xvklO^Wn_)N=YX)re zAs|~IPK-qW``s#c7>LrL!{jSo=Lx*CwBi@z-ih*xQzMPofTo-^!;bPKo~-j!%8^Ew zZgfefi-&vBSc*&fz79njuBi$FlByP+pWsjH2>nrwsHAEn=>DP2&5h0ei7T5(^|YjM zX+ta8B14s`Hbf~^9e}3}=L0AwN{!~s-yl9a->GY?uEwZN8Ko21rpsgyWLx{26rb$Js`6W}@0fR}&TIJsO)b*ZVs*&&CsbdWTB7#S z)lwBA#+WgU;a<)MZ~^wAe1Q(yyOXG>G_H)!Qp*U+aAxpNO&u<_JR9w^0G`pJBF5-t zQ%9&Hb#)XT$=SDGEmueD>KJt_rl_t~(7SQ!cz^;$suR$mh4YLQ_|h0R+~*ZVpT|Ut zN9Gyb;xb_y#-iaupspG&xU2o-Xqoyjk=$O4Dx9QFrpYixSEs4~%ye~zj)3nE==>O6 z?&kOCJjf60@SlRmhv>2g7j-60iCL)hvMM7fag91#Q|IVvr8-YntJG>utLOiTjNCh`jWnYkV}>b) z%R;q5R~wOjs@f=UG*QwLbsE@O?NNCXbhTM+K_?ehF00I4T-i{YTi)1EtE;W}Y(|Zd zmbsK)OI=>3smpbBg}PE#FQtMmQ!m%mD^!Hp4bWvZvhS%evU>ckt83INb#<+}PE)Va z)vML@nu_qe8`Ntx6@hyjNxeZ=H{%mN-{|k$LZ95It6Qn?n~*wUzLC{=o4Q?B z1!flysylRbr+Txl-lERd)mxEc^L%4s{B5d5Q*YPRUFvRKy@OhDr+OE)^==tw8}suX zO}$rF??VsN`_(7Azgh~MOfnZXi)a62XyrjwHJj3A5&{2 zi6?6fJ0Sk3c&XM%NjgY*9;3@4REhP4!3@&KA0~(#LE+I0j3npdJXuqZ>gp3}o~}NL z)cgg;@RFz0W4d}=#e6%VKCP+G=<2id>2p+DEKMwbExGV+@x#YVO?^`*|YC-AgF=n*~PMz?Z3nxYDqrc+;(nEDQeR^ZmN|iu$UqzNVhi)YH29 zhI)!H&YOfOvc&EbBh9L*Z|UmW>O11o2f$jXa2m0}6LrSph+O=2%)sFG<;J4O|G~Zx zYtBKY*LRgMPwa|R973!zPDDp_^h(|moE$KomG@ZO z@)!79{sM)|UoxSkyg(fC7uXxRLgx1=NCzU9>A>ak7wBC60;A*CfWaWXhZh2l1&xGw ze4NFzaqS`QU1Ln+Ioq!^2Il7Cl_{hcE<+CC5kW1eZ8#C&j!J<(BJaf^aL1<$uQXBu zIe29%noR;-I2ni0shkQjB>_fTK?d)``{Lt%cxN)1Cf(ch132Km%3hJ_S%`LSnMk27HNaac;3jeT*Q zF$ay!yvj(gpvDrf-izjpgK3kB=d!lul(jXdEYuuyC)6AQt;Q5M8k;~9W+I0YE+Db{ zD&sf`ue{nAB_TnDr$ClInUhLV19PC7>9U&jZPl#jGx$vOS^*<%7Q_sS9Jf?vJMpoyt)fpLY(^0Y3;gh`b;hsAjM0Ne7-xT43$|?6}D8WDmeTaqLgyP z^bN)g*~=!d#Wnymb$lU!(gLea8t+Ij7c&qWYcX2Y6XRIVNybsm7V##&1a0X8K}s`k zK`-3k=CyK?M|_T`O=iARELWBSZZM{rIYVA%ea(DwHc)>S24oe>+NxNND$IgcpkT8& zq)Ct#Gy6H#MdK?qe(8CL5@B4A@Qd`MNw01iUxf+7pcK?a`43CNABF&#R-R=RIg@e# z1PXS9C5nahR}4c`@X>3HzY#EQ-e??^z}Sa{+6-lB0)_*E$8NG4F7uXSg)DP&$9r(Y@o_KiM(i%pn$fiK244R zp_pkQ@I)Y-9NglC<_O#>oMnEE-+u{O0-7q+l2gEF05qEp1Ypy>fVtFw=4C9=vA?sC&{;>-py}jV z8@Ji$Pt)t#Eg_6TxU~`t(1)>Q5NmK1-_H-Ay4hfaK7u=%4(Z|Qa7R;)W<$DJ9gLql z@kcS0EMTpc@PoJm)~q5a0pMp-!gv^ z%OFzDOWMuMcUa)Em0o7C!Gu`jPhqj113HDn0HF9Jv<{T9Y~1#(#zHiX-(>iPh0)%B zn*nA+Y%n`N4lr3{6;eZcDke0_#KBCKIp-#0ILf^LCZlUUm1*wfm$Y^661hLnth&UU zRkEZArp7yBPz(-QvKYS2Na{^{tQ7L|>(QxASe`dvP&UiiYHr`oP`L_`_wvI@+l)P^ z|NJ&1cQ(~e=Gdh$*nW<~76X{9grCH8h=gEjJ%53}DCgXcw$|-v1O1K;bIw6Cy@$qM zIs;a_LZ=H^)yD0{4pfyUj9fXk6y)s+jE9NaYuj-9>^9uaP-%DvU*A^M`p_t+v3c<1 z2z=i*qUM+FBN`EnEiPZEXjY2hW;p`ivI%VM%8z+hD0;AqM33ho0<*XTre0 zQH~5nT1>Y0!hk|1uIYr5Q+@VWy!hoc?K$F{zKC#n|oHTxj1o0M&Cx*xwY4xa9y6bc#ia zw1e!AN7(r;_nN8Xb?V$9X}hECWTc+#%sRxm=y{8gXj9yAM%ev{U}5~2(lXWBf7g(@o1;@!9k8t^bUn-ZSB`Mop-ns z&Qv+YI!CL2KJ4`b8JQq*HFL^uyE0sGUVTYQ98yzny@f!vBN8>jJ z#ET6YLhhnCY-cK_I)@~nGoOm*Im&R}5J|S%D>+$RKMv8y(V{pobNi|V_-$ZI)w%fn zJDiriez7id(k&ZKL`I>#_@h&vICGl;k2b;V+YM`!l=-~%MZ~va&|dHpr8RG1@aetA z^%kjY@vwf^&WCX5q`wsLb2kG~0>9)?;@vOhMw|8<`{SjUwA(+fdggVIW3Tr!l`ZDJ zY$TwP%U?F$>f)C58p-Qcf-eW>{!?L2$a=Y3R_|G2VW`#cp>$ zQ{wkP2=j_Zg-6Mi_c9`urGb?e|TjDqq|)9p!hh)J;a2_;NaX`_ad`^ zUo$El^{A!ZVoJJp_jfx);Xt?TA`|T(?4lb@Ag=iE!2v89pvPZV)04yMK5k9y?Jui6 z(%+3Z<0%uA5^=1|-9tj*SXz%jQw(GDmn}|w0MD$>F>agqX|&rea{i65DNh!HxbGG0 zGtAu_V~*m<%|+s&$r32vPIiwm#g+}edo)oz(+!8$!Id-J&zj=8&3ELj`T`C5BBbBm zL?!N@>()iz2rb5xTK-Lz>iOMqTr|#cBeqNM@ErFVQ_7fncFLaaY`DF{;2V>>5UV4KATen3pIjmYry?iGiFN{d-~o*e#RF0&6)DaX?LW!^ai&F zxi;V6uJ_BbUfOu)hNaN6UJ9e*ITlD9Dc9!1?&yobT)f}C+*G|D{!edI7Dgt^B2JG% zm=}BkJ@Mx;zUQ8CM|pcOh}BekY*MiCC zm%2Y>1rZIaRYEwlKhQsN@|V!AsRb1wnvT1kKHT{<5a4OPHEC=+Jfp<}zjulfgC|+E zm|hGDY7PxFAhXn0E)vBHqO--Rqjo*I1Hc9ink=@w3fAo0_uV6dpTFQvHfQ0c8NYlE z{XpmliUYOet2Q)XQDXQ@ZVG=oTugn*{j@0E4ll?Tf>@b2DmeLN_eL(Zx+7hG^0Moz z?pz+7sk5Q1Ooz=(COoKJ4E4>gxo;9F-x|fi%U^d-=JEFf9t4rh?%0_%c)G%tOva>E zMSOe8-D9SkZ3R!~CJ^k~!57&f)t!i#b_1^60bSaqG68jB%F$gG(l~OPV`k<8TTZ@9 z#NL9XjXz1sCW+S5?vZllSH@iS$X*Kf`=2a6N;D%qmPQcvH=2wCj*O|B#feHD+QVjXmS zHtlnm3!;n6F$g80h{3d;GR%7l-8g8FI5El=%ZG@{_uZqMI!n^AqrxRN3=p7DE`GY* zYc3EAl_H8?);o*$o_D7VfNw!GohG)6J^+A8xtfH}F8a3ZnY_}%4>pzw1(b#Dxp3VA z-#9Pa;&Mhagd2xHI2&yGqS*st$rE0mICqOnhn<`Pl^KR{qj`YXvsi8PF9ubS-YVUZ zW*vn=p-O$oXWq4|!qFPFBQNoY3}(Af0j?=Fs_82%udkf6{m21wUmUg;qFXPS>_MO{ zV=lq)Rb!B8M^IuD^>Go|F*nnt#Q`Oor5Z(B(kXd4g zT3SP9BWuZGB$iz@Qhp|_MiMB-QyN)|B(Q5Vwn2mM9A=I*U^$|pp{BWHaV5Nsx4?>o zv#3x|SVWy2T?v)njlsJw`%jV;{0FgR2h>I!&xlj9cyV1y_CI9;R5X=IlYS z`Bis!k?<+Jm^6LvwuqR|-5XV`QgRmIwZe3R!tQMmNzb{mJSe0s^wKuibLd!pLhuG( z=dL?+kl1EuB?_iwMpPnA(mQkzOShVa6w|(Nm%8tg0c|yQFPyHEL-q`|hlEvfj`?7) z0s9jwVIO%xPi>!lX5ZTpM-b@x7e!<)k4;ZX#t`2vm#}&AjeuDm5r!L}lbna4>n;|3 zJtRfqkd!UM(|60YM|4{{a%;1nw6@xL|DUvBwQTw&KPk?%1YWMe&Ep?%-NoXBm zFtaBq;FS-NtxZ6?FYF1uD`|U2Jt@VLWLIIZhjt3pT85-yY&g}5*RSE1R{Ef$Oc(Lr zxCiqrG3^_7*8tT?ylFpX?+7*$!w>_QjzaCUY6pQ!I~u=qM{za@LG*zg1)YZow&q|_ zuJL%HcEmFE9Qa0T0(7m3ZOtbA2MsX+F&TGLpgC?fiP)Z8{OcQc(fIaZ9g5{%24p|1 z!(6x|)QI*9DFq5rvO?IBW`)Y1j`C-K(Mo{c@GS_=Eb+>>?t~6~)M4>q_R-(g$DZJr znSJaTvH-DSzo~@Uo!Q6kfQcVHodtDSP(&Dy+0dD%f0VyQ!i4hAM}C3;gijIzuppjz z2J}Mz3M7(+g#z{p-7S>c70q}*UMHc^DfmsrZyJ8PS!_TUU0M3Y=x)`B%{!-|0n;pE z^_|$uR!6|*@xAZlUiSCz+%rrqzM?nQ^%5h0mV4QjpWPall&8g}7K+{fa6=V8`29cJ zPuOJYKT^^%#euW#Ql$QN)@?WS(^fk!oBKWX%bKqJUjlk~DW{F>jUd$w zn|Qs4tn#BCv9ILF0$w`%`c2}%%vkFAA2VY!OvUr__3!>9a_VEr+OoDjcD70NpPQVK zAzr;amNcrBD`FRzr0|-@Chi^tcnpd(DXXRzVxz=^TVi9ygPUb#M>of=wb+JYWRVL| z>V|ELt-plG9@|M44FeYZM5z0eeW6>+K6*tDa4fMe>;ai}O1NSbr?m6MBC{OkV|CCLqYe5P*p*^2IL&G;Atgh zT30M|B9&s8^$$o@IFIS%jOA8`vlv-ZP>AX6M$FqDJ6}R?+Jw|M#i{KQPBsynoFb{J z>hF59mvI7qIm*;v1Vmh7_T#b1fZLkKV_%Juxt{R!{$W=toVKQlrWu|jar_d+B3@SF zI)vQ3>4n%kQe{QEj+E~c1>eWgR2G2PP{CF>cUAFQ%I&4_rD>dO}X-X4ee z`bQhhRMh;P`TmI;))k0>`5rPrUYqYJG7o@r*Q{?6FBf`f+E|J_vlC=lzn(p17Yk>2 z!WydL*bL9x@)SMqs@X?ZoqCGMDb=YOK;q@g;RuMTylKtI^nevVEzg%L1}Zg#w?GEd)VBBu_np zN?M7~>12LEHbeW{~f(0-0rigDUJ#mp&>hQ%|AwF&J zB+JnjdGkDe12mQnRc&1I*ws2)j~q*`_T)udK`#w$Xl+IXdF&dpBuy31&hvCrVLvEU zb1c~7uN9wWdy<3cRh~K(ydx|G+773Qo9BCy0;=@XaRl5WsC#R0xNbYjT+_(0vVlK0 z0{o4^uP5IUa)!@)kW*hDvCk}^G+@aDnTP*+Z`hBtQxF0NO@8_#289JQ8b_A{krc7H5*u^yz@m zr^VRec_`NidR zc`61$SQmI|Jcx-5!DD9*vo#(GU!|ZZxLsViz|$9c<%bt|#+R5@moZm{fPVF%%X{MsuXTH$HsabozTKnufHdNO&Uc`sIaiX)Q1 zkxdpKt@JG9I0awonJV!^^p3Zm{S_P4ukj)Cq=>j!@fw0yVEK!TIx93aP?pbICST0ia>%{f9dB6)4$8Pi7C-FkenkAb>!|fjO2Jpb` zp0y^=>#xhtHcnL-o2qo!yFS_Fi4ZZ?j)gC}%QM~7i{I8-a8kT_4`CeowK6O#Ut%x!k?Qy!~0kNDs1t-tn3&+jH3 zkZ`2#266pK&lr^Q;YrUH6YW~9soe&F12qWSF}OzC?Qxp;=^am$cnbcj!yXh4yzUts zBP-oFAbVJqrPJzKhjIQ%H@$b+sJ~h{L9#5uLGj5E3`kviBpa^r#KYl1xXQnF3Nk_ zgyOe@&MgzSJH1l?#n)oJOM{CL#>zA}Td_uarUm3K{gnuw>`fD^z1{?IJl0FQ0_8Tj zqVHaR>n~4(1bZ5>yS(vcMc)}-cR1%QoxIPQY|77aGT!fwob)%sEOJwZ7n`72igMSom_|+>N2NeQ*WErP#7eFrU<5eDdvO@QLqohrO}7wDU-nAhiH`47+cI|&XGRkA1o*+- zg1Mv6eL10Dywm@i*f+*I7PWyD{8zIFuJ4}sJ|4BjgOmNZ$$5&PDm#gon7G9D`8If> zx3gL0_3ux35XJ&5(r&uJlp`PhLKC}ZdWl7SaHiL4B8P+3(Ub4O0N(t~~Bd2_C<}`yk9(!Fg>&C^_NU z6&tVc8nJ$hx5h~>Ezo%M17(0cL5?Qlh2uuAD-}m>>CaK8V=H(SQg5@aF}JL4G4}my zby!Yq6iM}pCte%hQq^47)Jh?q$?t{aG?>Pe6nm{adFYCos->;qY52$c&8+@qb(Q|A z#`<~$yvz5ef%$Qx4qo6*;4t|$woNi5$OqU?EyRp~D-0c$;=4!zyQ@=UyJc)QjhS2s z@=;l?-plS2Ki%j}%)6iX8@aHjW07WcU<2pCGX+=@8r!GCQtKf^Az=IA#Y1NY5XpMn zBE_o$6efFP#a3^QBbPlQAz1`pyn+ydKq$Vv@Qse`qI;2Q@_J-FR%D zJ+LbimxEk=N8m6#?yt^_#7s$1gxMf{@AiRkI z`0}zm$s7ogq?^Clh#6%>A7(L#PvAs+BNq~kefYo(C@j@)K&n_ltVDq*A#Oq<2$MpP zAVeXVq)Vx60vFMO{4NLIqjkF{6vo^)T~~>2a7Pjm|I+(9LtZStbuS|tz%v-F7g`S;Bbqcmlqh1Ojg|ArwGq8lW>B z12TgR4S%oC=6b@AOJT#yh=lI^?c=@Zjo??m$g&v?$k24u>k|IQyf*RFewp=^{oZv}dAj_f?#DMe zVX=1-b1Nf2ExSQ7uRiK^iX1lxwA+p#ib}8GJx9Fp=04q8lsvH+n5Q|*lqjNPXZYZ2 zdCGfJ6z?s@MOnPQnySWz>dNMoW1Bm;KyF?cJn^#kkwm$du6VllZQ`=8y+cvKiLbpG zZkakJ_J^(F=d)grkxG=#iOm5}(MlrBT*Qbtm>yccXq!GdJp6Wk(C+@DU0=p?2@x}7lQ9u}<*7MuSX zVG$!zeN&Plsno%?8Lk6+#OgRPdNnqJyg_3LmFx1fR~aAe6>6C`$pu@Ns>a1j>l?7+ zQbf!&ar!$R3y~hY;$U%+>*!oCZFQ)SO`=jK>%^(mfp4bk#FP*Z_ln3Dz0!QIir>## zqQsBczN<6`H1I(ah|$Lk5X-~tGsW%qX%1oU<{O=504Rj>LMk?L_6=p7eig+F7Q%8gf;5cM=qs(}-4UkvyTMYRmKP*L*kqSG`w9(loS=v!% zoo3h~o@DlEY8ImNn3)`hn2udAD#)UbAw|Vj64Q&A0jd;ZMzc{=Vt<^l2VmFE@6d(f zwk{O2bTpj&dgS4Vo{7#7n_j9MUV<1!cw>?W3F7K>UotG^_N4ndm&YI?pYt$twO1y! zfUub-Vro%OP5Hr~wt5DI>Y>sBrbHB+0|JS_o|LG0N3=t#M1CKpJ4{eh_&Y+JJ((ZB zB#ARHkGnE{AtF5)S=%U@GJIqxeKNy0Ad(LkJl8jbmy6O|-)!4BjOKU@_XM#w*EfMr z73Xt(#SybG&a=hPJl`N>zcSBPCb7s*>#RO;D$hqo(+NF&&pFZ-U{+LPo2%~WdeWUmdBD2s}8MOnm0s3Vy_)mnyOBDJlBJM`gJ>t7UUm?F=WEc5L z`M%(iBHtJjU9W%lROe?f5uZ^^q*_tpb6$LMvkdV)XP!(RPTKOOI9=)^W^|XKKA&c4 zsthRiWy>>k$)0asnFypue<|YiTYVmNNY#At>5n!Kn1|IQNgS-PLK^eIHY}bZjTMtD za|k^Eoq%&hcR(lL8PSX3KndUjSsCn)qgXk4=O2xI)yiO5>S7i2#W*{LhNLwR)zFCp zNGhH@4bcflgSr*7&a0YV(imA?Q^nyUg-`T#PC`nr+#w}py-FI93~98ZuAymZ>x9Dbr=g+Uf5yZ?vA5_to!(ozC)67}7^2X-c+G2!YC>ye)gtf;Csv|-07mRw<{K`OTYUq?&khZS zSUq(;&e@^V96I>um_I$pSnJc-z69@uJ=ZZ!x`5P>-5v1Xx=SUb>R2^?9kX=?#Pg&x zGgHS3pQVFN$)!U`C+VtZ5NElQ8mN8#)<%DI4Z2rP!HnzX`#ZEtns2RGP`R`P27^@k zY@HUhWT2|l)N?obx+He^fbz_d9{V-&cUJ{w>*+1N9^&9XY~2&ETS2+H1MSmXof`tL zs5}bm*}P;ZseO1HVwu}-_8IYEn8Q33!(AO7!KPBe;;M>!$hUCI298W zA;!kpH1UZaF+0A$SLq?PJM5iwnuM4)oHd0ii~sMH(M-(}dEl3N8Hu|d!&lC(0+ zZGvj=C!ev2P~nG7I+#j^q{iGOdZW30#DM`84-`mg51_BEMSZkgk=JuH$uS7No<|>L zFpiGS4zieI3E+rZpyQ$L!w0lOIly36J2W^SX$43fi1a~(KxBhy%hr*ahrNS9k`!vX z727~B7U+$_h#4NmEFf3CM4a!O@WMhYdBX@YH|@D00LGdd zAwkbn3n)PsOM%+TWFySYEHP9y)rTToC^Z1;3_ny24CZ7t6qOYcInQzCr9#ai3G1cO z5SWEILWRNZ14CpM(ZTPUJe3~1==<><8e^5;S22j(`T=BS0W=oW)d+Mo?&8|hJA7>h z7@`9UG5Qr&aN$0@rQ((MF9TIWe@Sxx5Ji)G7xS{{>~%No<@eA9E)dGuJ&5f>`vp?& zr^qhSgJ^^d>tbaOS=qz%azAP9z=P-`>`}Ud&jUCQrMsX;v0U8JbEqA3-aV9vm@ZFH zI2R9l3K0^I(XR3oB^`Ib0nXEOc}AmXF3&OeJ2=T+aKIbRO9)s3hY#!(I~LWeba_o< zuOmJy`AI!(WpBU-0eh25d5f~WO?U6mi0qW1Vh|fm*eztihmz4QoeLp1l_88ws z5ovd$mm2%GhIsYtWsRLd>oxW}!klRAEaWK~`$J>ji%)-#?l0!Pu1AT^lYF^W=o{&f zv(%D`O+qPnX%M<98i&a~!Vv2eJ(RP#qidRLYg+vEOBc7+LA}f$t{C;t2g{8JcI*b& z$mor@Z<6opWGlBx5?|wXojbTLoq=jRN{9Y)jLx0hManl`;s4xls`cEEfn%Hy-Jl0L zce7@l)x%dYbb?|BnGOlq8EN<~9UENC^WzCRf-b@a5v~wmAB%45Jc%bm-Ck_I+2
To)$=s+b&xI&O!19UGa!>cKgE~ntLK?I@6J1sNi+Ng^x)|_&p{}h!l}Md z9$0AUyeGYc-7CPUlOd~^I}KaZ*m_?yTqgA6m(cz-OXK}@K7i-z;AG6w`9MC1b~5NS z@d6!IriE005r#r169~bp0fL8Zad0_+^b#tu0F{;Md??_@%K!#Gj1SlOh^=!U^NAOy z`5q8M(AwaV>Aq+ct)t#0gg3m9sw!uZRoM7iu?^*m14lLZC4R@Q$-U7Q&N@)|TYUw= z$7lJPErD`c*}zHU&XnW8Wgbl`@XNv0Fd0F#m?z429;8kJo|@c3C$JVefsMckj2x%W z3Vlu?u0f>;fC(l~EpjgqAFc2uX4N6l z+#tpQVUGum9Y%IggM$E%+Gz%iSRkURd}Dc?m{sLVNN>+K7&~8+EE=kGR4@?j8uS?) zPyV3TRplEOcsVZe2+>N%Syzex4*ncnlJc!_h+B)9ly8fRILTs9?Ef5!Zn1XS~S;@W#Q6mqt0&K5?kFY`o z?!k%>4H?|WVn6|n9%39gZ&UIqsy>EY&e?o4z+3oWb^wZ z>}edYl+?*|&<``P70<+GGYcEl93~oTe0pFZra%iO!3s=(4H%{EI8!~2^V2gJzc;`` z{Dd`g3w)_X!(7P6R`7U)2kZ+op%L(@Hd%%RT*H^cpIU&eh2y?;FeSZ;-vNW(yWv-D zFG{2tu>*B41fQKEGgxuG8}3Dy(MgXXWW{ncN5etv4K!gTY{@o&Jxvi5>@-nAlPCrT zcaU*l8u?(Cx=JpP41N_!JFqPBc?(;EI}0N7Hle#~A>{3g($+yUAQBC&mhSa6GwPK-UA)Yy$1Lxx+e6 zG(npqy8#l2sM%+>*2PHg4A(xioS*2NOy4H#Mw=B*=8qI zR6vp>UZy8>2i)h~Mjr)kw}JlNCF9d+t(xtP9$u0VRx;vn;N8l-vZcP#f?u*C+dO8s?=@gL8D;=O)DI#42d(z6Da#MSc zQ3=O2cEZY@*4Q&5DbBq&DsCd&dx!VyU`w43-h3h_OdLO=WFln5S||IA5%c}6%}Z5;5+tq5 z8zn(9vB4MYbDF(Dt;47CH#+1ED9m(w$h_>F|Ss(Wq{rA#C+m*Ht&9xBBZVTdNj?$8*G((xK%# ztk@!Tj_6}jYD#oQy-5EM7A$n2wuXrY^GrT)iEo|w*L0sxq&EA$7IWv?9b)!1kv8$k z#opLBvESw%6s4DpA6h(d=p3qHs`ylo8Ya|MU)TRXL`x7v%azL3)iZp-PnY?| zaq)}I)>Sm>YLduFjf@Q75XZ6N>D23@vBuM2JkcSk5Wk$pHk3vr$`Ig}k`@v*wctF) zX?#L&$)12Tp74U?M;NaE^TkLcVR%C?h^Q$fon#bPPTzr;nhtu2gQG`rkEtCUN*xbs zhkV4F)XpR}8Cj-CYUh%vNHnRPe)!@N&{yr$4v|UJ50liIg8Ox-l2CA&=(*CD9k4+D z`51F{cN4GJ^%yg{n<;Md6i;QFg3tj#-HE^}c|eN20jYe*+Xi;1tBiC7klO;nt&$oC zWPBMQNgH84kbLZ6<_RgA8Lo5$&GVHb)rsUzqe0=sAv6n~-Qu&AzDWVKpA~6oMD;Ak zO*f*l(oIq#V<#dvX+*|kD(pT`45F1}YbYOp2 zSZVSnM7j{*=7APKv=L0MB4A+Rln=oHy%bMFF_OaprV$`jMuM0a1*%{)sDQE9*DKhz z07!&A815%Q6n~AO{tj^d9h|m5;9x+7>zeLt4)4k4^8ToTMw`r~x8X{GlfY=@`Ju7! z@>EG8bI_@sC5fy6s2+ zE!1_Gn~5QKP-7*rAR{|rF|P(#Y4LEJu+!O@uyL%8u1XuXd1E zYAhs{reZZINuVM;U1SIJ$ag8e2ndcK7&A@ZZuJ>j2&sy^NfuAvUaUXq&UPU-f}a#> zlZm&G3RmIu+YQdbNdT9D2)^)RO26G9f)Ptf5j%k}bmuUJ4xIqOZT_!Ez}RqJWU?-9 ztg0ji_}0_Ex+5fyT->k)T3~_QdW@frDfS7COPi}|NU?^q=WO1|?Ob#)pD$(ui`_T- zC?Jpzc07MkBgOnVFi0D>Cw7`B*=mmy4OKBd@l%D%tvgy9%Nv)&q^Yu{X6qf_r-?lo zj<{%mn%IT(4~FOB#N%-O^+}2D=4qnwJv9z97Yc@06)IjsZLs@crj+9>hlmXpBjON? z#3Ab_(};6w-G7wX=*CX`F%>$eVHKDREX3}R`(y#(_P}%wGfTSxi}>&qfku!;83$?( zN!SuKOnESN3E?O#@j@DSbmiGX(PgKv2(x(3PTw5UD&pMM&x&uudGnlTP};c6xM#l-TrUt8|EFfegaB z>UpfYjzi}fsN3mu0Up`ip-o#Em{d2( z!z0ubDGtU?9Sn(Ewto68JadPMLPvPvf=Ci~EcOi*kN(|QA>Kb^@kugIe6B~qDck>7 z231?Jz)mzcWn$IcJ{w;pjxY7~6aIUABTaeO1d@#H-pEt8M`6+byv$c5zO6)#M;EG4 z_Ya9@(naJxWdc!!4k-&EV`-9w5Cvrl*okdU)M18910-=zX+#}nVZvv(V8!ONNLd6x z`+vUeNpE{$pW~pS!e($&XjeihnNrFk;`c^nW|3*1N!8^bcQ4R&VX7{ZG%@f*QOJqk z-d8m2jYr9*-%ONw44q_K} z1xfWbwATF4U-L`)NCgLjq507HMT??A8+kw*bq0-*3fia#2kg0l?iVUO~rl8PLk|6`q!;urNi~b;W z7Lp~yrl1G*;qtcO=%9e*07vgpGg=Z!a>6is7;cV|KVe^U|Lb zczX$FhUDE#DAqH`qcR%Up=1>|j4s3N=qv)gB#=-WmJLud3yZYvtb)>kS|QSQfKZ#H zNl`WlyMa^_7~xYZVbfR>6VJq%jBI3v(#zQzg+!kVTN`ZMB5#v#h0@KGX^NFMK~Lzd zO}=!oF~U*NE`si`d16J`fErB^^u`-}IT{L^hw{XU8+^+_NetNRYY!p38?I}K@327PUBBtR7E-wd zI`gw6jnxrZun^Wj6rR1FB+Nss-AEV#waW_x#a(0C2>_k=L^8TqqE55_PpJwt{?FvX zZKx%zHVa8~h7^o{%0x#e`GE=U(Kq?Z#i`#N zFfzL)(jgvhw&Wum4=EPd+Y1!?61=IRYMXCaz%+wP#7Z@AcIb-vW)LXkFJLFsL zYFs?>lJ#>>q#6wBk^@Lxk_bQua*;;JiKM>N4PZ2m|JuaA(dLnq8e<~HeDs^tJi~|) zu5)CC9M}Qgj$|S2f=I+YBSK4&y)nh0gFAd2(e|;a#+w25EO@LY9Y;Zn!OVA(q+2^e z{wx4VMBpkX5iUqLgCw{S&tD){js<3NhJa@Z!b<_PVMrZ{^;w2-94@OT{iaR;E-9;0 zZ*xKo=}8z3{)=EV$O>%sB7%j9zX>??p_ZkBxOB_0i=Ae8g3K#hQf>}4h8SAE(yGfG zL{r6jG?2#_(2`?;A}ZS2K=HXu_<4bmi&$C!71B6iZz2gZiFZwyc90Htg5)CZ7&3JP z>(mZl4UGwe!-FvmE*=vc>MkCW^tO+oN8UatC9;mvFt&yL_eiDW@jbC4KEt%6zph(9 zjf&seW0N(I;-4T@_=$R|t$|YlNVfdY&|gs53@#;L5K(kveNB!%1sRGe zm$ohdg*maV-XAEdfhUB;i|t6SsBCTlZ3hyxvblC?J=mlzcHFi22lWVB3>4!rqqVuR zZZT{M3MOXS>uaDeRMkQ!C_m++{4;vavNtYmg+`K@+?L*JmK{#E(Sg=6mGx%+fc&8M zEPKn+d98J=9ZTsm%Wn2#Lfxgv+*V58S=89Fx>`81@uMF9a&U3Yc86L6#%Zj-NC%+dmIZwio39L^UlrEsN@!3d)MhCk)NB+hxSb zA@r6|XhELc9+;1T$f%>X^~_&f(@@*GAfrP&GBf>`+0l3TuWr7-7|$Nm>3?7RvH(A`h`8pv^Q1H&uE{Q z7!Itx09FMicLQLK>5lmUMl#KnrL@ZC)hupYZl5-%cP$c)s8)z>XTRwLD1Uyt_BsVv%tXw>A@zNT!B{`+6tQi03wyAP) zO>J{cO+)YG%z@N^Kymd#=#k0QU$?BrUk^(mu&|qJF(I(TX)!j})h=lDLr78WZ>g)U zv18h1Acw4|Z??aW`G2NaUTLHJroIy2H!W^#rC_O42rQ85;p6^N^s1^6N50D18vi_C zl|YG|vSpAwq#(HoZCp_hn)9i{O0xYc+n&aZ9X}eKo<6j6PRZEGL&p~n8=7s7)r!TH z^9q_Op$(UTt`93oL!~IFeet5>gto$HDLFGVRQ}}b0GhLSab;5rYWL%XS>j46F}aME z8Q3?}hd*h5a$EIv^#xTKdD%Vl^4bdsrTN=OFZ>P_7MNoNL=^}XLI4Z(p#zFS!-02) z5ZECjifDY;A3<4{Z{2kfp9sR?yd< zizT$84OmkkqZvD80)`1FeQ{0ooN)04{`!Eqf2D@<_`8SdHj5i*mm}I%60L{}%gC0J z#De4nm>V4?R#leiD>M-w_r((9b<{HmuPG#_OAaqGjs3mp!t&T|Jpv! ztL)zQ%$#ZGoGJHSQ4~zwh(1C}UTu%2bz_=gd$p{q+8R{N+?TP0ADAIkl_Xh}3ZJ76 z@k0{_hE)`;j!uk*3ro((En8W%BIeY~RxDM%z;|p;^L2G3;V4Ciz!UpG9ZK>eKT^sT zKO4CT#T+pQi{JUr+W6{6E3#pRraj}6Sp=k3Bo>uyVs*@mTB>^a$WO#*#8H=!h)183 zhUw#RD8*wB#2BcD3;!?7kk%iMdL^V8F@==wy|E1CBFd39{RE8@=iqOKW@fWlTbNB0 zFtk*{7U-hhjn zga{TH=5F!F$WYv|X&D)sBN`kgiOD!5(y#`;z+pj?I(2Zeb8F~bdJav&t!ldcvw}IM ztAZDpYovW{>Xf3(F(K^!X}#I6Msrq)$qD^Yh;uTQ4fj=~yB zo1BZwKNUr)L8K9*Nc7k&gj9dS0J2+rzh;zy`V)qZOwqQ9Jy3D{H8FvLlrRxXQFs}y zuSQ-Hdwl%BNPHJKnvw2dEbgrMc{sMq#{)lM87Tk%NaXidfuGUE9B2I}B0n__<9*@7 zm`#ez{sWPpmj8!=pPnK=i~fhL`cDUc1wv68iv4Ix6G`EFb%P;dz@YwVJ*$U?vN*66 zc2#qK?~VRG-iRZr3-d^2Qxn7=7$o-S*^hr=U7X&{B*u^4kc3A|k_ zV&K@MSqhB?oXp{9?4E`e2B`z}1N-ZTVEBnfO^hZwWQy~Zl-~LjBhEq?&4mZJY#KH& zJ|!h-SQy41VTMRmKSKl-heqANFs%&X5ebPyd-h-x60}2xjvWp9h;re7nwDBbmKMv< zm^$#EOvAEv%<24TGNKN!al40wyO@VVc|H#^_6`#T*Cs(U3_{djY5G$lF}Z}OON~U` zi6`Ar*Z}@5ymYzG$v%igQJh6kY?i^M2Ey2oK_F#}1!iR;6`Dga!D9HTC^m6OOdFJj zZA|T}RX-Viy4z1}I&KcJSFlu=d;*(f@~~Oxn}|_^L2Xs?Ol*1?Y2a=!HG$OsP{7=D zGM_ic$7STEGRq0gQZaPHnQ?X1n4+$%sz2ytEY8wO-7z8cbL?zUs=kzrMJiZ;zm-aA z0;OUU$#7K*BqhrBx02&rp%^RRFrFILCWVO!sqAZnJ~cu7!6c0R*f(C5p_+tnFRaYm z=hPt2V*VZ>*GX~iq`Q!lCTBz4lbmAM(CW=vH)%pMEcQRUKo|K;EZLyWOz(iP1$`S?bx{$dtwbDRV3}=YJ!mUJH>5U%jBB z{8?xJ+a)n#hxvF>`1mMT6t>#HtLcNXs6{Ej`Dj^M)Z=9_>d^^PN>rMEG~WJpiT<=e z<`HU9C?mzrhW@O_{aG8S<@$j2(UP@_!LxczwU%OqE2giFp#@GZ$$X0`0u?%`XE&NI zuz3PofT7{VraKWds*~sI>Bte}Dh^13GqN6#1KCX_j-iPbR%%5wT6tCUBb`y1N)ch-3&=Kp#Vj)I#_2R3g zn^%_-aE-S1un~X8rr7_^sOZqZbNpG^a-nTwmufy9LF=ZF3pPAI(%YU=s;XK?%5kxTMTTxr}pe+9F&C1 ze|#McR|8$xoFI|r^+`!)cVC>){v(F1QWbNK#oakju_#D(f4 zG-2eOMCs5oqz$WH4$VG#Oq%`mwCF&nasiG1Ugl~~vG9nL=;$Up z&zo}x9o-%NP)X+KIyABl`p0{o=;(Os;wmYg?&p(cd?*1v#tgRt(D29qtM1R3+#}{+ z-NzI|(7Y>YxrO-`?vGp;I#K5T{#p0;@9=)c|HMY-KjHn1|BK};%`NHCg#CAyvkLBG z8vk-5^UvMizr*{nl?B=$EwD!Rcii7!dq351@d>Tj|J9Cd-Kt25ib;r?1O z3s480ylMe7NBIBugT}vhgsS^x`O6M5mYuX<=gJ4-TtaeuA2>a}t=D~6d~$gMZdCCl zYNP%W@)Q4r(<|UX{>0~LH5}XgPdy#AjuCgROLGtWk>-hQu5Ki$ds?!aL-&CjTdXYP zNs@b1Hw;9Bo!XOj^8S_sG#2!LYL~Haz*<>8(1v>+;Q;@kGJob8(f(`uKmWitN_}7; z5)B0%BOcvB2Zg;rOasY37Icpv@s4BlRibF+b{QADl6ZjU$RnKRojDS2IPL;jxt&{*cgrhpS{eHh*`fXw-IWjG@X`}-f)LH)Z>D)(7{9tr$HZypthV58tyS!a+ zFd5lHv)HnNdC1|Ok2p1hQ8=yaQ@rz^+5IuH?VrEg{rQM)78^lShkv#Q#FvTL5dU9q zyx@}82M4f>ADqDYf1RwVr#28PE>3~ygb7YAXjX5oJDkT^oQA4RJ3q#P=ka)bF_$XV z4FC4dkJty7Hhz-3Q`LXCkE1GO#1ks2R_JxKJs_4hl>)a8sCkR40?1c=0BtAWCbwAq zCRWGf?zr{AgE5SOdCb*c4QcQfKJ#y;4FSE8!mW6G>x4dr9UpC_ITQ;izBphZWj-FE z<;8Xk*hv$MlUbOfw0gjHOEg7EQ`P>^bl37AotCcUI!TLWF=blz9H3XTXCBe4v0i;Z z89TORxj%y8qq;_^u8&lwZDGBE9SLEl*=X@ZuU^F~xO~E!gVs;zD45t{p|r#vi@L0W zfg#N}aB|KLpWt!Z#4q|Mr{ScUf*(2}rZzF22_vTYTv!N?Cgc-g7+7hML{-RH8W^-? z3K^j50ah0yNij^{#B zKcy{uq-?i~?tgrx!^7-4IMGjO%@|+lPbmezHw34hgm{!{d+x49!&Bn8aM; z)f*;c6J&{Fxmx;L`it~0?JP66A2JCtd-)m0PC2W~$T{r*;VB~h@-tdA#AkER^EvPs zz4?=EO1&V`4}{4niVxEUJmx>en^Ua_bL=S&;{~yy<^ZKAQ)iDs8J~x$ z5TaBX{-14fQ6PSpqJzu6*1oWOfKY!*4Toc=kV}wLt{bHB9l$fd+5?G^QRG%B_P+wMkQanC3M$?|Tj@ zX{hYY9)tDz?wlyai&Z7lS!i*2H?H* z2zoM}`pGtVk79{~aj@bE`U%#KPUxygqo@Z{-15yI9n4c&C*ZKe6^t> zHV%RArBgrIrWM(o>Y0rX$<$}zgL;igqE(A7e>*q`mmdjot5)2i2{}WDA!-dJe_mvk zEk2SMA&=y_yEtFJ9eZ8Tt=OuEhls9M-S1a#xI6?~>8m}QH2u7F5OjHh>Qo)7M6jkT zR`q@x=sy%m#>*C@l)fV8Vn5(rI@p&8^dt@SlWm%iPyEjya+ny`Z`)kInuk?D=%DiO z|7_FaCq(5Y^+lV80dQg%X4tL_L=9IA;N@-;1{J;#8Ai-+9b2fdl@&p5uhA7aKfeq< z2$ZUlJwsslapk9UG;o9p5Heh;0q@kb_J8_(oQosg<%Fm!X>_m+rPY$PVAU`W&u}zE z1bPUB`pGtZ+L>6NLDwhPIc|P~Z?W=75^8Qe8^7aGX)7y&tmc^+Bvcu+Dtz7V%(Vun z9YOA_hTTLBgPmV0MVKlZ{aoVksf+aFMZ8VO{K0;`Poug)&D}PnvhQtt;nE0EthRC6A1p^ILAK3s0tSp!Nc6EZR#i9qC-}xTCW2322Xkn~;_}ymgpag!S9M2@%RN^u53qeXA+NbZ4&pQ3f zgcGx1j}iM|i5L9(+Nap3ywXK_CmOXVzs-Pe$wA2JRei7V(cZm8rBl)sH{J~C=}M4I z4FjAQr6?a*tYw`uJ2%HlCCDZ=l}Uub)WWvTa6cGts%|t&)Whf#A@B@~LXgb_zK002 z35O(;f}8Vb5i1O7yHtn8C&>M=eMwF|s<-urOD2(Yhf#j5@Tr~jv?Uh(W-^yjOI+Kef^qq2>9L>7~MO*bwB7aPS-n>EQikd{$Dm)3)xAs|xiw zzobqx)P>MM(x=uHO{fSQyo^H8V?-gA4KHk;hIL1fBcEs2EQFRJzbMsr%X6Q8@)iRJ z0+vYoWSjPZX)G#M@|vwLtQjK;QtZW?^aH35!Jv`JHagy+DewE6f-8k6qchsC9_b$@|&VPT;7T?!7PNe@l{b1o2s5v1-QfKQE z#?Q!kVh{QU*cRHLw4mYbm+FVS^n+(GW1M1O&D>{QVe2?-Edt$8s(!LfA-@LPRf`;RT=DGtTmJCOmO9)dF$N$mYy26R^iUJ-E zc0TZSyr_6DheyvM(d-apeUd5>pU*d%!IsD1Jpp_7wVL#60OUVJGgqjpoij9@AhPyY z)u4ZXFFr#AQ9{c4L<|>-e&ykcw0R*gfrS%AdKN0;QvuWuHQ3|MuyBeJ2)8GS zS~{ChvE^6TVg$LCN_OGYUH*2SP<4_>+BbJl{yH?ygfKN_{6E|DvIpmT#N(?VdQKMn zA-?BE3`Z_O9{l;eIX}O9=3!^|&^2335=iC5iLTyV?bt(+{z(>z0QPvnI#} zJ2r*$M{M=-27I3g4^ZobnrJzc1wVGQu>&WF8 z%#}dTs!%`Krn$gUR$8(f8$Wz03v`*RC`_9x?Y=DskyB8c3Uj$O(~jYLs*f*n08EQe0gZkXXJk{v=b^)+<9vZT~lU1jPll35* zpO-=NsY(f^%!Ijb>chyXsLWgTs}9yoRl?Y%$ojwCg3ENN#&HrrgEdXzQT7|f*SRA$ z7NM4;;+Q`;HSUsKX~>!;I&}4-ZH6sCMG10DSMwp~>+ji>g|&CEm+GgBdRTaBTH)nb z0fZ(}Cas=oLZ86G>55wj`2f4G>L2V^ALrKyxmWZKvWImG?G%Xiwl55~)UbctH|=_b zVkHyg#tiKSYiB7Hq4s(aOeg-)?0z{6TM06CaW76iI73jCui`gOMjJzrsb%_eYSh<) z>b$$e10DJlf?PtcG)~R$W$y>m6YL!zYL2MOA#=ZJoQsZv&_b$~jKN$eJ>DS*N@s{_ z+~)D~<2clX&_Lp!9Lo7M``QP|G#gB?^%-W__`*=j*vpbQbo- zWs5YDXMvQfuE;!E7^iRqIpx%4B-DYs#0|5?KEY-r$gG!Zh*clzdg%Cy(6Q&izg