From 0dc3d20d58986a323eaeaef7bcdee51e9f642826 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 16 Nov 2011 09:33:13 +0100 Subject: [PATCH 01/44] Corrected bug causing PhaseByTransmission to crash in case of new Genotype.Type --- .../walkers/phasing/PhaseByTransmission.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java index 6394e0e24..847165e3e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java @@ -151,16 +151,16 @@ public class PhaseByTransmission extends RodWalker, HashMa alleles.add(VAR); alleles.add(VAR); } - else if(genotype == Genotype.Type.NO_CALL){ - alleles.add(NO_CALL); - alleles.add(NO_CALL); - } else{ return null; } return alleles; } + private boolean isPhasable(Genotype.Type genotype){ + return genotype == Genotype.Type.HOM_REF || genotype == Genotype.Type.HET || genotype == Genotype.Type.HOM_VAR; + } + //Create a new Genotype based on information from a single individual //Homozygous genotypes will be set as phased, heterozygous won't be private void phaseSingleIndividualAlleles(Genotype.Type genotype, FamilyMember familyMember){ @@ -271,21 +271,21 @@ public class PhaseByTransmission extends RodWalker, HashMa public TrioPhase(Genotype.Type mother, Genotype.Type father, Genotype.Type child){ //Take care of cases where one or more family members are no call - if(child == Genotype.Type.NO_CALL || child == Genotype.Type.UNAVAILABLE){ + if(!isPhasable(child)){ phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER); phaseSingleIndividualAlleles(father, FamilyMember.FATHER); phaseSingleIndividualAlleles(child, FamilyMember.CHILD); } - else if(mother == Genotype.Type.NO_CALL || mother == Genotype.Type.UNAVAILABLE){ + else if(!isPhasable(mother)){ phaseSingleIndividualAlleles(mother, FamilyMember.MOTHER); - if(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE){ + if(!isPhasable(father)){ phaseSingleIndividualAlleles(father, FamilyMember.FATHER); phaseSingleIndividualAlleles(child, FamilyMember.CHILD); } else phasePairAlleles(father, child, FamilyMember.FATHER); } - else if(father == Genotype.Type.NO_CALL || father == Genotype.Type.UNAVAILABLE){ + else if(!isPhasable(father)){ phasePairAlleles(mother, child, FamilyMember.MOTHER); phaseSingleIndividualAlleles(father, FamilyMember.FATHER); } @@ -327,7 +327,7 @@ public class PhaseByTransmission extends RodWalker, HashMa //Note that only cases where a null/missing/unavailable genotype was passed in the first place can lead to a null/missing/unavailable //genotype so it is safe to return the original genotype in this case. //In addition, if the phasing confidence is 0, then return the unphased, original genotypes. - if(phredScoreTransmission ==0 || genotype == null || !phasedGenotype.isAvailable() || phasedGenotype.isNoCall()) + if(phredScoreTransmission ==0 || genotype == null || !isPhasable(genotype.getType())) return genotype; //Add the transmission probability From 5c2595701c0c534add4a4a9dbda1bee22ffff7d7 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 28 Nov 2011 17:10:33 +0100 Subject: [PATCH 02/44] Added a function to get families only for a given list of samples. --- .../sting/gatk/samples/SampleDB.java | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index ee0873c6e..9f00257d1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -142,17 +142,29 @@ public class SampleDB { * @return */ public final Map> getFamilies() { + return getFamilies(null); + } + + /** + * Returns a map from family ID -> set of family members for all samples in sampleIds with + * non-null family ids + * + * @param sampleIds - all samples to include. If null is passed then all samples are returned. + * @return + */ + public final Map> getFamilies(Collection sampleIds) { final Map> families = new TreeMap>(); for ( final Sample sample : samples.values() ) { - final String famID = sample.getFamilyID(); - if ( famID != null ) { - if ( ! families.containsKey(famID) ) - families.put(famID, new TreeSet()); - families.get(famID).add(sample); + if(sampleIds != null && sampleIds.contains(sample.getID())){ + final String famID = sample.getFamilyID(); + if ( famID != null ) { + if ( ! families.containsKey(famID) ) + families.put(famID, new TreeSet()); + families.get(famID).add(sample); + } } } - return families; } From e877db8f420defd0ae2059e11481d01a686e922e Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 28 Nov 2011 17:11:30 +0100 Subject: [PATCH 03/44] Changed visibility of getSampleDB from protected to public as the sampleDB needs to be accessible from Annotators and Evaluators too. --- .../java/src/org/broadinstitute/sting/gatk/walkers/Walker.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index 792fef9c3..6264808f4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -88,7 +88,7 @@ public abstract class Walker { return getToolkit().getMasterSequenceDictionary(); } - protected SampleDB getSampleDB() { + public SampleDB getSampleDB() { return getToolkit().getSampleDB(); } From 795c99d693409878dc64fad5232bb4574e7fa31d Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 28 Nov 2011 17:13:14 +0100 Subject: [PATCH 04/44] Adapted MendelianViolation to the new ped family representation. Adapted all classes using MendelianViolation too. MendelianViolationEvaluator was added a number of useful metrics on allele transmission and MVs --- .../walkers/annotator/MVLikelihoodRatio.java | 50 +- .../MendelianViolationEvaluator.java | 221 ++++---- .../walkers/variantutils/SelectVariants.java | 90 ++-- .../sting/utils/MendelianViolation.java | 476 +++++++++++++----- 4 files changed, 561 insertions(+), 276 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java index bd0d4e3fb..b9e6a5b2b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java @@ -3,22 +3,18 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.gatk.samples.SampleDB; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.MendelianViolation; -import org.broadinstitute.sting.utils.codecs.vcf.VCFFilterHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** * Created by IntelliJ IDEA. @@ -30,23 +26,26 @@ import java.util.Map; public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation { private MendelianViolation mendelianViolation = null; + private String motherId; + private String fatherId; + private String childId; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( mendelianViolation == null ) { - if ( walker instanceof VariantAnnotator && ((VariantAnnotator) walker).familyStr != null) { - mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).familyStr, ((VariantAnnotator)walker).minGenotypeQualityP ); + if (checkAndSetSamples(((VariantAnnotator) walker).getSampleDB())) { + mendelianViolation = new MendelianViolation(((VariantAnnotator)walker).minGenotypeQualityP ); } else { - throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid Family String file (-family) on the command line."); + throw new UserException("Mendelian violation annotation can only be used from the Variant Annotator, and must be provided a valid PED file (-ped) from the command line containing only 1 trio."); } } Map toRet = new HashMap(1); - boolean hasAppropriateGenotypes = vc.hasGenotype(mendelianViolation.getSampleChild()) && vc.getGenotype(mendelianViolation.getSampleChild()).hasLikelihoods() && - vc.hasGenotype(mendelianViolation.getSampleDad()) && vc.getGenotype(mendelianViolation.getSampleDad()).hasLikelihoods() && - vc.hasGenotype(mendelianViolation.getSampleMom()) && vc.getGenotype(mendelianViolation.getSampleMom()).hasLikelihoods(); + boolean hasAppropriateGenotypes = vc.hasGenotype(motherId) && vc.getGenotype(motherId).hasLikelihoods() && + vc.hasGenotype(fatherId) && vc.getGenotype(fatherId).hasLikelihoods() && + vc.hasGenotype(childId) && vc.getGenotype(childId).hasLikelihoods(); if ( hasAppropriateGenotypes ) - toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc)); + toRet.put("MVLR",mendelianViolation.violationLikelihoodRatio(vc,motherId,fatherId,childId)); return toRet; } @@ -55,4 +54,27 @@ public class MVLikelihoodRatio extends InfoFieldAnnotation implements Experiment public List getKeyNames() { return Arrays.asList("MVLR"); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("MVLR", 1, VCFHeaderLineType.Float, "Mendelian violation likelihood ratio: L[MV] - L[No MV]")); } + + private boolean checkAndSetSamples(SampleDB db){ + Set families = db.getFamilyIDs(); + if(families.size() != 1) + return false; + + Set family = db.getFamily(families.iterator().next()); + if(family.size() != 3) + return false; + + Iterator sampleIter = family.iterator(); + Sample sample; + for(sample = sampleIter.next();sampleIter.hasNext();sample=sampleIter.next()){ + if(sample.getParents().size()==2){ + motherId = sample.getMaternalID(); + fatherId = sample.getPaternalID(); + childId = sample.getID(); + return true; + } + } + return false; + } + } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index 0cadf6c0d..363f5665f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; +import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -7,9 +9,11 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.MendelianViolation; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Map; +import java.util.Set; /** * Mendelian violation detection and counting @@ -40,12 +44,25 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator") public class MendelianViolationEvaluator extends VariantEvaluator { - @DataPoint(description = "Number of mendelian variants found") + @DataPoint(description = "Number of variants found with at least one family having genotypes") long nVariants; + @DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall") + long nSkipped; + @DataPoint(description="Number of variants x families called (no missing genotype or lowqual)") + long nFamCalled; + @DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.") + long nVarFamCalled; + @DataPoint(description="Number of variants x families discarded as low quality") + long nLowQual; + @DataPoint(description="Number of variants x families discarded as no call") + long nNoCall; + @DataPoint(description="Number of loci with mendelian violations") + long nLociViolations; @DataPoint(description = "Number of mendelian violations found") long nViolations; - @DataPoint(description = "number of child hom ref calls where the parent was hom variant") + + /*@DataPoint(description = "number of child hom ref calls where the parent was hom variant") long KidHomRef_ParentHomVar; @DataPoint(description = "number of child het calls where the parent was hom ref") long KidHet_ParentsHomRef; @@ -53,11 +70,65 @@ public class MendelianViolationEvaluator extends VariantEvaluator { long KidHet_ParentsHomVar; @DataPoint(description = "number of child hom variant calls where the parent was hom ref") long KidHomVar_ParentHomRef; + */ + + @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR") + long mvRefRef_Var; + @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET") + long mvRefRef_Het; + @DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR") + long mvRefHet_Var; + @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR") + long mvRefVar_Var; + @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF") + long mvRefVar_Ref; + @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF") + long mvVarHet_Ref; + @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF") + long mvVarVar_Ref; + @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET") + long mvVarVar_Het; + + + /*@DataPoint(description ="Number of inherited var alleles from het parents") + long nInheritedVar; + @DataPoint(description ="Number of inherited ref alleles from het parents") + long nInheritedRef;*/ + + @DataPoint(description="Number of HomRef/HomRef/HomRef trios") + long HomRefHomRef_HomRef; + @DataPoint(description="Number of Het/Het/Het trios") + long HetHet_Het; + @DataPoint(description="Number of Het/Het/HomRef trios") + long HetHet_HomRef; + @DataPoint(description="Number of Het/Het/HomVar trios") + long HetHet_HomVar; + @DataPoint(description="Number of HomVar/HomVar/HomVar trios") + long HomVarHomVar_HomVar; + @DataPoint(description="Number of HomRef/HomVar/Het trios") + long HomRefHomVAR_Het; + @DataPoint(description="Number of ref alleles inherited from het/het parents") + long HetHet_inheritedRef; + @DataPoint(description="Number of var alleles inherited from het/het parents") + long HetHet_inheritedVar; + @DataPoint(description="Number of ref alleles inherited from homRef/het parents") + long HomRefHet_inheritedRef; + @DataPoint(description="Number of var alleles inherited from homRef/het parents") + long HomRefHet_inheritedVar; + @DataPoint(description="Number of ref alleles inherited from homVar/het parents") + long HomVarHet_inheritedRef; + @DataPoint(description="Number of var alleles inherited from homVar/het parents") + long HomVarHet_inheritedVar; MendelianViolation mv; + PrintStream mvFile; + Map> families; public void initialize(VariantEvalWalker walker) { - mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); + //Changed by Laurent Francioli - 2011-06-07 + //mv = new MendelianViolation(walker.getFamilyStructure(), walker.getMendelianViolationQualThreshold()); + mv = new MendelianViolation(walker.getMendelianViolationQualThreshold(),false); + families = walker.getSampleDB().getFamilies(); } public boolean enabled() { @@ -75,110 +146,48 @@ public class MendelianViolationEvaluator extends VariantEvaluator { public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci - if (mv.setAlleles(vc)) { + + if(mv.countViolations(families,vc)>0){ + nLociViolations++; + nViolations += mv.getViolationsCount(); + mvRefRef_Var += mv.getParentsRefRefChildVar(); + mvRefRef_Het += mv.getParentsRefRefChildHet(); + mvRefHet_Var += mv.getParentsRefHetChildVar(); + mvRefVar_Var += mv.getParentsRefVarChildVar(); + mvRefVar_Ref += mv.getParentsRefVarChildRef(); + mvVarHet_Ref += mv.getParentsVarHetChildRef(); + mvVarVar_Ref += mv.getParentsVarVarChildRef(); + mvVarVar_Het += mv.getParentsVarVarChildHet(); + + } + HomRefHomRef_HomRef += mv.getRefRefRef(); + HetHet_Het += mv.getHetHetHet(); + HetHet_HomRef += mv.getHetHetHomRef(); + HetHet_HomVar += mv.getHetHetHomVar(); + HomVarHomVar_HomVar += mv.getVarVarVar(); + HomRefHomVAR_Het += mv.getRefVarHet(); + HetHet_inheritedRef += mv.getParentsHetHetInheritedRef(); + HetHet_inheritedVar += mv.getParentsHetHetInheritedVar(); + HomRefHet_inheritedRef += mv.getParentsRefHetInheritedRef(); + HomRefHet_inheritedVar += mv.getParentsRefHetInheritedVar(); + HomVarHet_inheritedRef += mv.getParentsVarHetInheritedRef(); + HomVarHet_inheritedVar += mv.getParentsVarHetInheritedVar(); + + if(mv.getFamilyCalledCount()>0){ nVariants++; - - Genotype momG = vc.getGenotype(mv.getSampleMom()); - Genotype dadG = vc.getGenotype(mv.getSampleDad()); - Genotype childG = vc.getGenotype(mv.getSampleChild()); - - if (mv.isViolation()) { - nViolations++; - - String label; - if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) { - label = "KidHomRef_ParentHomVar"; - KidHomRef_ParentHomVar++; - } else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) { - label = "KidHet_ParentsHomRef"; - KidHet_ParentsHomRef++; - } else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) { - label = "KidHet_ParentsHomVar"; - KidHet_ParentsHomVar++; - } else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) { - label = "KidHomVar_ParentHomRef"; - KidHomVar_ParentHomRef++; - } else { - throw new ReviewedStingException("BUG: unexpected child genotype class " + childG); - } - - return "MendelViolation=" + label; - } + nFamCalled += mv.getFamilyCalledCount(); + nLowQual += mv.getFamilyLowQualsCount(); + nNoCall += mv.getFamilyNoCallCount(); + nVarFamCalled += mv.getVarFamilyCalledCount(); } - } - - return null; // we don't capture any intersting sites - } - - -/* - private double getQThreshold() { - //return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred - return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred - //return 0.0; - } - - TrioStructure trio; - double mendelianViolationQualThreshold; - - private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); - - public static class TrioStructure { - public String mom, dad, child; - } - - public static TrioStructure parseTrioDescription(String family) { - Matcher m = FAMILY_PATTERN.matcher(family); - if (m.matches()) { - TrioStructure trio = new TrioStructure(); - //System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE); - trio.mom = m.group(1); - trio.dad = m.group(2); - trio.child = m.group(3); - return trio; - } else { - throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child"); - } - } - - public void initialize(VariantEvalWalker walker) { - trio = parseTrioDescription(walker.getFamilyStructure()); - mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold(); - } - - private boolean includeGenotype(Genotype g) { - return g.getLog10PError() > getQThreshold() && g.isCalled(); - } - - public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) { - return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles()); - } - - public static boolean isViolation(VariantContext vc, TrioStructure trio ) { - return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) ); - } - - public static boolean isViolation(VariantContext vc, List momA, List dadA, List childA) { - //VariantContext momVC = vc.subContextFromGenotypes(momG); - //VariantContext dadVC = vc.subContextFromGenotypes(dadG); - int i = 0; - Genotype childG = new Genotype("kidG", childA); - for (Allele momAllele : momA) { - for (Allele dadAllele : dadA) { - if (momAllele.isCalled() && dadAllele.isCalled()) { - Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele)); - if (childG.sameGenotype(possibleChild)) { - return false; - } - } + else{ + nSkipped++; } + + + return null; } - return true; + return null; // we don't capture any interesting sites } - - -*/ - - } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index b0016ff4b..fc01dae9f 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -26,9 +26,9 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.samples.Sample; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.variantcontext.*; @@ -41,7 +41,6 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.utils.SampleUtils; import java.io.File; -import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.*; @@ -282,6 +281,9 @@ public class SelectVariants extends RodWalker { @Argument(fullName="select_random_fraction", shortName="fraction", doc="Selects a fraction (a number between 0 and 1) of the total variants at random from the variant track", required=false) private double fractionRandom = 0; + @Argument(fullName="remove_fraction_genotypes", shortName="fractionGenotypes", doc="Selects a fraction (a number between 0 and 1) of the total genotypes at random from the variant track and sets them to nocall", required=false) + private double fractionGenotypes = 0; + /** * This argument select particular kinds of variants out of a list. If left empty, there is no type selection and all variant types are considered for other selection criteria. * When specified one or more times, a particular type of variant is selected. @@ -325,7 +327,7 @@ public class SelectVariants extends RodWalker { private boolean DISCORDANCE_ONLY = false; private boolean CONCORDANCE_ONLY = false; - private Set mvSet = new HashSet(); + private MendelianViolation mv; /* variables used by the SELECT RANDOM modules */ @@ -344,6 +346,8 @@ public class SelectVariants extends RodWalker { private PrintStream outMVFileStream = null; + //Random number generator for the genotypes to remove + private Random randomGenotypes = new Random(); /** * Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher @@ -380,8 +384,6 @@ public class SelectVariants extends RodWalker { for ( String sample : samples ) logger.info("Including sample '" + sample + "'"); - - // if user specified types to include, add these, otherwise, add all possible variant context types to list of vc types to include if (TYPES_TO_INCLUDE.isEmpty()) { @@ -421,29 +423,7 @@ public class SelectVariants extends RodWalker { if (CONCORDANCE_ONLY) logger.info("Selecting only variants concordant with the track: " + concordanceTrack.getName()); if (MENDELIAN_VIOLATIONS) { - if ( FAMILY_STRUCTURE_FILE != null) { - try { - for ( final String line : new XReadLines( FAMILY_STRUCTURE_FILE ) ) { - MendelianViolation mv = new MendelianViolation(line, MENDELIAN_VIOLATION_QUAL_THRESHOLD); - if (samples.contains(mv.getSampleChild()) && samples.contains(mv.getSampleDad()) && samples.contains(mv.getSampleMom())) - mvSet.add(mv); - } - } catch ( FileNotFoundException e ) { - throw new UserException.CouldNotReadInputFile(FAMILY_STRUCTURE_FILE, e); - } - if (outMVFile != null) - try { - outMVFileStream = new PrintStream(outMVFile); - } - catch (FileNotFoundException e) { - throw new UserException.CouldNotCreateOutputFile(outMVFile, "Can't open output file", e); } - } - else - mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); - } - else if (!FAMILY_STRUCTURE.isEmpty()) { - mvSet.add(new MendelianViolation(FAMILY_STRUCTURE, MENDELIAN_VIOLATION_QUAL_THRESHOLD)); - MENDELIAN_VIOLATIONS = true; + mv = new MendelianViolation(MENDELIAN_VIOLATION_QUAL_THRESHOLD,false,true); } SELECT_RANDOM_NUMBER = numRandom > 0; @@ -479,26 +459,26 @@ public class SelectVariants extends RodWalker { } for (VariantContext vc : vcs) { - if (MENDELIAN_VIOLATIONS) { - boolean foundMV = false; - for (MendelianViolation mv : mvSet) { - if (mv.isViolation(vc)) { - foundMV = true; - //System.out.println(vc.toString()); - if (outMVFile != null) - outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + + if (MENDELIAN_VIOLATIONS && mv.countViolations(this.getSampleDB().getFamilies(samples),vc) < 1) + break; + + if (outMVFile != null){ + for( String familyId : mv.getViolationFamilies()){ + for(Sample sample : this.getSampleDB().getFamily(familyId)){ + if(sample.getParents().size() > 0){ + outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " + "childG=%s childGL=%s\n",vc.getChr(), vc.getStart(), vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)), - mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(), - vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), - vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(), - vc.getGenotype(mv.getSampleChild()).toBriefString(),vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsString() ); + sample.getMaternalID(), sample.getPaternalID(), sample.getID(), + vc.getGenotype(sample.getMaternalID()).toBriefString(), vc.getGenotype(sample.getMaternalID()).getLikelihoods().getAsString(), + vc.getGenotype(sample.getPaternalID()).toBriefString(), vc.getGenotype(sample.getPaternalID()).getLikelihoods().getAsString(), + vc.getGenotype(sample.getID()).toBriefString(),vc.getGenotype(sample.getID()).getLikelihoods().getAsString() ); + + } } } - - if (!foundMV) - break; } + if (DISCORDANCE_ONLY) { Collection compVCs = tracker.getValues(discordanceTrack, context.getLocation()); if (!isDiscordant(vc, compVCs)) @@ -657,9 +637,31 @@ public class SelectVariants extends RodWalker { final VariantContext sub = vc.subContextFromSamples(samples, vc.getAlleles()); VariantContextBuilder builder = new VariantContextBuilder(sub); + GenotypesContext newGC = sub.getGenotypes(); + // if we have fewer alternate alleles in the selected VC than in the original VC, we need to strip out the GL/PLs (because they are no longer accurate) if ( vc.getAlleles().size() != sub.getAlleles().size() ) - builder.genotypes(VariantContextUtils.stripPLs(vc.getGenotypes())); + newGC = VariantContextUtils.stripPLs(sub.getGenotypes()); + + //Remove a fraction of the genotypes if needed + if(fractionGenotypes>0){ + ArrayList genotypes = new ArrayList(); + for ( Genotype genotype : newGC ) { + //Set genotype to no call if it falls in the fraction. + if(fractionGenotypes>0 && randomGenotypes.nextDouble() alleles = new ArrayList(2); + alleles.add(Allele.create((byte)'.')); + alleles.add(Allele.create((byte)'.')); + genotypes.add(new Genotype(genotype.getSampleName(),alleles, Genotype.NO_LOG10_PERROR,genotype.getFilters(),new HashMap(),false)); + } + else{ + genotypes.add(genotype); + } + } + newGC = GenotypesContext.create(genotypes); + } + + builder.genotypes(newGC); int depth = 0; for (String sample : sub.getSampleNames()) { diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index cf45dab79..e140575c0 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,147 +1,399 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.util.Collection; -import java.util.List; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * User: carneiro + * User: carneiro / lfran * Date: 3/9/11 * Time: 12:38 PM + * + * Class for the identification and tracking of mendelian violation. It can be used in 2 distinct ways: + * - Either using an instance of the MendelianViolation class to track mendelian violations for each of the families while + * walking over the variants + * - Or using the static methods to directly get information about mendelian violation in a family at a given locus + * */ public class MendelianViolation { - String sampleMom; - String sampleDad; - String sampleChild; + //List of families with violations + private List violationFamilies; - List allelesMom; - List allelesDad; - List allelesChild; + //Call information + private int nocall = 0; + private int familyCalled = 0; + private int varFamilyCalled = 0; + private int lowQual = 0; - double minGenotypeQuality; + private boolean allCalledOnly = true; + + //Stores occurrences of inheritance + private EnumMap>> inheritance; + + private int violations_total=0; + + private double minGenotypeQuality; + + private boolean abortOnSampleNotFound; + + //Number of families with genotype information for all members + public int getFamilyCalledCount(){ + return familyCalled; + } + + //Number of families with genotype information for all members + public int getVarFamilyCalledCount(){ + return varFamilyCalled; + } + + //Number of families missing genotypes for one or more of their members + public int getFamilyNoCallCount(){ + return nocall; + } + + //Number of families with genotypes below the set quality threshold + public int getFamilyLowQualsCount(){ + return lowQual; + } + + public int getViolationsCount(){ + return violations_total; + } + + //Count of alt alleles inherited from het parents (no violation) + public int getParentHetInheritedVar(){ + return getParentsHetHetInheritedVar() + getParentsRefHetInheritedVar() + getParentsVarHetInheritedVar(); + } + + //Count of ref alleles inherited from het parents (no violation) + public int getParentHetInheritedRef(){ + return getParentsHetHetInheritedRef() + getParentsRefHetInheritedRef() + getParentsVarHetInheritedRef(); + } + + //Count of HomRef/HomRef/HomRef trios + public int getRefRefRef(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF); + } + + //Count of HomVar/HomVar/HomVar trios + public int getVarVarVar(){ + return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR); + } + + //Count of HomRef/HomVar/Het trios + public int getRefVarHet(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET) + + inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET); + } + + //Count of Het/Het/Het trios + public int getHetHetHet(){ + return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET); + } + + //Count of Het/Het/HomRef trios + public int getHetHetHomRef(){ + return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF); + } + + //Count of Het/Het/HomVar trios + public int getHetHetHomVar(){ + return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR); + } + + //Count of ref alleles inherited from Het/Het parents (no violation) + public int getParentsHetHetInheritedRef(){ + return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET) + + 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF); + //return parentsHetHet_childRef; + } + + //Count of var alleles inherited from Het/Het parents (no violation) + public int getParentsHetHetInheritedVar(){ + return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HET) + + 2*inheritance.get(Genotype.Type.HET).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR); + //return parentsHetHet_childVar; + } + + //Count of ref alleles inherited from HomRef/Het parents (no violation) + public int getParentsRefHetInheritedRef(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF) + + inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF); + //return parentsHomRefHet_childRef; + } + + //Count of var alleles inherited from HomRef/Het parents (no violation) + public int getParentsRefHetInheritedVar(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HET) + + inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET); + //return parentsHomRefHet_childVar; + } + + //Count of ref alleles inherited from HomVar/Het parents (no violation) + public int getParentsVarHetInheritedRef(){ + return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HET) + + inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET); + //return parentsHomVarHet_childRef; + } + + //Count of var alleles inherited from HomVar/Het parents (no violation) + public int getParentsVarHetInheritedVar(){ + return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR) + + inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR); + //return parentsHomVarHet_childVar; + } + + //Count of violations of the type HOM_REF/HOM_REF -> HOM_VAR + public int getParentsRefRefChildVar(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR); + } + + //Count of violations of the type HOM_REF/HOM_REF -> HET + public int getParentsRefRefChildHet(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF).get(Genotype.Type.HET); + } + + //Count of violations of the type HOM_REF/HET -> HOM_VAR + public int getParentsRefHetChildVar(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR) + + inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR); + } + + //Count of violations of the type HOM_REF/HOM_VAR -> HOM_VAR + public int getParentsRefVarChildVar(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR) + + inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR); + } + + //Count of violations of the type HOM_REF/HOM_VAR -> HOM_REF + public int getParentsRefVarChildRef(){ + return inheritance.get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF) + + inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF).get(Genotype.Type.HOM_REF); + } + + //Count of violations of the type HOM_VAR/HET -> HOM_REF + public int getParentsVarHetChildRef(){ + return inheritance.get(Genotype.Type.HET).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF) + + inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET).get(Genotype.Type.HOM_REF); + } + + //Count of violations of the type HOM_VAR/HOM_VAR -> HOM_REF + public int getParentsVarVarChildRef(){ + return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_REF); + } + + //Count of violations of the type HOM_VAR/HOM_VAR -> HET + public int getParentsVarVarChildHet(){ + return inheritance.get(Genotype.Type.HOM_VAR).get(Genotype.Type.HOM_VAR).get(Genotype.Type.HET); + } + + + //Count of violations of the type HOM_VAR/? -> HOM_REF + public int getParentVarChildRef(){ + return getParentsRefVarChildRef() + getParentsVarHetChildRef() +getParentsVarVarChildRef(); + } + + //Count of violations of the type HOM_REF/? -> HOM_VAR + public int getParentRefChildVar(){ + return getParentsRefVarChildVar() + getParentsRefHetChildVar() +getParentsRefRefChildVar(); + } + + //Returns a String containing all trios where a Mendelian violation was observed. + //The String is formatted "mom1+dad1=child1,mom2+dad2=child2,..." + public String getViolationFamiliesString(){ + if(violationFamilies.isEmpty()) + return ""; + + Iterator it = violationFamilies.iterator(); + String violationFams = it.next(); + while(it.hasNext()){ + violationFams += ","+it.next(); + } + return violationFams; + } + + public List getViolationFamilies(){ + return violationFamilies; + } static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 }; static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 }; - private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); - - public String getSampleMom() { - return sampleMom; - } - public String getSampleDad() { - return sampleDad; - } - public String getSampleChild() { - return sampleChild; - } public double getMinGenotypeQuality() { return minGenotypeQuality; } - /** - * - * @param sampleMomP - sample name of mom - * @param sampleDadP - sample name of dad - * @param sampleChildP - sample name of child - */ - public MendelianViolation (String sampleMomP, String sampleDadP, String sampleChildP) { - sampleMom = sampleMomP; - sampleDad = sampleDadP; - sampleChild = sampleChildP; - } - - /** - * - * @param family - the sample names string "mom+dad=child" + /** + * Constructor * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation - */ - public MendelianViolation(String family, double minGenotypeQualityP) { - minGenotypeQuality = minGenotypeQualityP; - - Matcher m = FAMILY_PATTERN.matcher(family); - if (m.matches()) { - sampleMom = m.group(1); - sampleDad = m.group(2); - sampleChild = m.group(3); - } - else - throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child"); - } - - /** - * An alternative to the more general constructor if you want to get the Sample information from the engine yourself. - * @param sample - the sample object extracted from the sample metadata YAML file given to the engine. - * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation - */ - public MendelianViolation(Sample sample, double minGenotypeQualityP) { - sampleMom = sample.getMother().getID(); - sampleDad = sample.getFather().getID(); - sampleChild = sample.getID(); - minGenotypeQuality = minGenotypeQualityP; - } - - /** - * This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to - * isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site - * before evaluating it for mendelian violation, you can call setAlleles and then isViolation(). - * @param vc - the variant context to extract the genotypes and alleles for mom, dad and child. - * @return false if couldn't find the genotypes or context has empty alleles. True otherwise. - */ - public boolean setAlleles (VariantContext vc) - { - Genotype gMom = vc.getGenotypes(sampleMom).get(sampleMom); - Genotype gDad = vc.getGenotypes(sampleDad).get(sampleDad); - Genotype gChild = vc.getGenotypes(sampleChild).get(sampleChild); - - if (gMom == null || gDad == null || gChild == null) - throw new IllegalArgumentException(String.format("Variant %s:%d didn't contain genotypes for all family members: mom=%s dad=%s child=%s", vc.getChr(), vc.getStart(), sampleMom, sampleDad, sampleChild)); - - if (gMom.isNoCall() || gDad.isNoCall() || gChild.isNoCall() || - gMom.getPhredScaledQual() < minGenotypeQuality || - gDad.getPhredScaledQual() < minGenotypeQuality || - gChild.getPhredScaledQual() < minGenotypeQuality ) { - - return false; - } - - allelesMom = gMom.getAlleles(); - allelesDad = gDad.getAlleles(); - allelesChild = gChild.getAlleles(); - return !allelesMom.isEmpty() && !allelesDad.isEmpty() && !allelesChild.isEmpty(); - } - - - /** * + */ + public MendelianViolation(double minGenotypeQualityP) { + this(minGenotypeQualityP,true); + } + + /** + * Constructor + * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation + * @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored. + */ + public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound) { + minGenotypeQuality = minGenotypeQualityP; + this.abortOnSampleNotFound = abortOnSampleNotFound; + violationFamilies = new ArrayList(); + createInheritanceMap(); + } + + /** + * Constructor + * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation + * @param abortOnSampleNotFound - Whether to stop execution if a family is passed but no relevant genotypes are found. If false, then the family is ignored. + * @param completeTriosOnly - whether only complete trios are considered or parent/child pairs are too. + */ + public MendelianViolation(double minGenotypeQualityP, boolean abortOnSampleNotFound, boolean completeTriosOnly) { + minGenotypeQuality = minGenotypeQualityP; + this.abortOnSampleNotFound = abortOnSampleNotFound; + violationFamilies = new ArrayList(); + createInheritanceMap(); + allCalledOnly = completeTriosOnly; + } + + /** + * @param families the families to be checked for Mendelian violations * @param vc the variant context to extract the genotypes and alleles for mom, dad and child. - * @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation. - * - */ - public boolean isViolation(VariantContext vc) - { - return setAlleles(vc) && isViolation(); - } - - /** * @return whether or not there is a mendelian violation at the site. */ - public boolean isViolation() { - if (allelesMom.contains(allelesChild.get(0)) && allelesDad.contains(allelesChild.get(1)) || - allelesMom.contains(allelesChild.get(1)) && allelesDad.contains(allelesChild.get(0))) - return false; - return true; + public int countViolations(Map> families, VariantContext vc){ + + //Reset counts + nocall = 0; + lowQual = 0; + familyCalled = 0; + varFamilyCalled = 0; + violations_total=0; + violationFamilies.clear(); + clearInheritanceMap(); + + for(Set family : families.values()){ + Iterator sampleIterator = family.iterator(); + Sample sample; + while(sampleIterator.hasNext()){ + sample = sampleIterator.next(); + if(sample.getParents().size() > 0) + updateViolations(sample.getFamilyID(),sample.getMaternalID(), sample.getPaternalID(), sample.getID() ,vc); + } + } + return violations_total; + } + + public boolean isViolation(Sample mother, Sample father, Sample child, VariantContext vc){ + + //Reset counts + nocall = 0; + lowQual = 0; + familyCalled = 0; + varFamilyCalled = 0; + violations_total=0; + violationFamilies.clear(); + clearInheritanceMap(); + updateViolations(mother.getFamilyID(),mother.getID(),father.getID(),child.getID(),vc); + return violations_total>0; + } + + + private void updateViolations(String familyId, String motherId, String fatherId, String childId, VariantContext vc){ + + int count; + Genotype gMom = vc.getGenotype(motherId); + Genotype gDad = vc.getGenotype(fatherId); + Genotype gChild = vc.getGenotype(childId); + + if (gMom == null || gDad == null || gChild == null){ + if(abortOnSampleNotFound) + throw new IllegalArgumentException(String.format("Variant %s:%d: Missing genotypes for family %s: mom=%s dad=%s family=%s", vc.getChr(), vc.getStart(), familyId, motherId, fatherId, childId)); + else + return; + } + //Count No calls + if(allCalledOnly && (!gMom.isCalled() || !gDad.isCalled() || !gChild.isCalled())){ + nocall++; + } + else if (!gMom.isCalled() && !gDad.isCalled() || !gChild.isCalled()){ + nocall++; + } + //Count lowQual. Note that if min quality is set to 0, even values with no quality associated are returned + else if (minGenotypeQuality>0 && (gMom.getPhredScaledQual() < minGenotypeQuality || + gDad.getPhredScaledQual() < minGenotypeQuality || + gChild.getPhredScaledQual() < minGenotypeQuality )) { + lowQual++; + } + else{ + //Count all families per loci called + familyCalled++; + //If the family is all homref, not too interesting + if(!(gMom.isHomRef() && gDad.isHomRef() && gChild.isHomRef())) + { + varFamilyCalled++; + if(isViolation(gMom, gDad, gChild)){ + violationFamilies.add(familyId); + violations_total++; + } + } + count = inheritance.get(gMom.getType()).get(gDad.getType()).get(gChild.getType()); + inheritance.get(gMom.getType()).get(gDad.getType()).put(gChild.getType(),count+1); + + } + } + + private boolean isViolation(Genotype gMom, Genotype gDad, Genotype gChild) { + //1 parent is no "call + if(!gMom.isCalled()){ + return (gDad.isHomRef() && gChild.isHomVar()) || (gDad.isHomVar() && gChild.isHomRef()); + } + else if(!gDad.isCalled()){ + return (gMom.isHomRef() && gChild.isHomVar()) || (gMom.isHomVar() && gChild.isHomRef()); + } + //Both parents have genotype information + return !(gMom.getAlleles().contains(gChild.getAlleles().get(0)) && gDad.getAlleles().contains(gChild.getAlleles().get(1)) || + gMom.getAlleles().contains(gChild.getAlleles().get(1)) && gDad.getAlleles().contains(gChild.getAlleles().get(0))); + } + + private void createInheritanceMap(){ + + inheritance = new EnumMap>>(Genotype.Type.class); + for(Genotype.Type mType : Genotype.Type.values()){ + inheritance.put(mType, new EnumMap>(Genotype.Type.class)); + for(Genotype.Type dType : Genotype.Type.values()){ + inheritance.get(mType).put(dType, new EnumMap(Genotype.Type.class)); + for(Genotype.Type cType : Genotype.Type.values()){ + inheritance.get(mType).get(dType).put(cType, 0); + } + } + } + + } + + private void clearInheritanceMap(){ + for(Genotype.Type mType : Genotype.Type.values()){ + for(Genotype.Type dType : Genotype.Type.values()){ + for(Genotype.Type cType : Genotype.Type.values()){ + inheritance.get(mType).get(dType).put(cType, 0); + } + } + } } /** * @return the likelihood ratio for a mendelian violation */ - public double violationLikelihoodRatio(VariantContext vc) { + public double violationLikelihoodRatio(VariantContext vc, String motherId, String fatherId, String childId) { double[] logLikAssignments = new double[27]; // the matrix to set up is // MOM DAD CHILD @@ -152,9 +404,9 @@ public class MendelianViolation { // AA AB | AB // |- BB // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs - double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector(); - double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector(); - double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector(); + double[] momGL = vc.getGenotype(motherId).getLikelihoods().getAsVector(); + double[] dadGL = vc.getGenotype(fatherId).getLikelihoods().getAsVector(); + double[] childGL = vc.getGenotype(childId).getLikelihoods().getAsVector(); int offset = 0; for ( int oMom = 0; oMom < 3; oMom++ ) { for ( int oDad = 0; oDad < 3; oDad++ ) { From a09c01fcec2f703dfee9af8bfc07103007c723a0 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 28 Nov 2011 17:18:11 +0100 Subject: [PATCH 05/44] Removed walker argument FamilyStructure as this is now supported by the engine (ped file) --- .../gatk/walkers/varianteval/VariantEvalWalker.java | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 10d4651b7..04bbdc169 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -161,13 +161,7 @@ public class VariantEvalWalker extends RodWalker implements Tr @Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false) protected double MIN_PHASE_QUALITY = 10.0; - /** - * This argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined. - */ - @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations", required=false) - protected String FAMILY_STRUCTURE; - - @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) + @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation. Default is 50.", required=false) protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false) @@ -529,8 +523,6 @@ public class VariantEvalWalker extends RodWalker implements Tr public double getMinPhaseQuality() { return MIN_PHASE_QUALITY; } - public String getFamilyStructure() { return FAMILY_STRUCTURE; } - public double getMendelianViolationQualThreshold() { return MENDELIAN_VIOLATION_QUAL_THRESHOLD; } public TreeSet getStratificationObjects() { return stratificationObjects; } From ab67011791ee5f25f30b9e192eb10f10e5a8e6c6 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Tue, 29 Nov 2011 11:18:15 +0100 Subject: [PATCH 06/44] Corrected bug introduced in the last update and causing no families to be returned by getFamilies in case the samples were not specified --- .../src/org/broadinstitute/sting/gatk/samples/SampleDB.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index 9f00257d1..929ad41d1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -156,7 +156,7 @@ public class SampleDB { final Map> families = new TreeMap>(); for ( final Sample sample : samples.values() ) { - if(sampleIds != null && sampleIds.contains(sample.getID())){ + if(sampleIds == null || sampleIds.contains(sample.getID())){ final String famID = sample.getFamilyID(); if ( famID != null ) { if ( ! families.containsKey(famID) ) From 7d58db626e28283d472fe29d71643ce739eceace Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 30 Nov 2011 10:09:20 +0100 Subject: [PATCH 07/44] Added MendelianViolationEvaluator integration test --- .../varianteval/VariantEvalIntegrationTest.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 403ecce78..13876ff83 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -291,6 +291,17 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } + @Test + public void testVEMendelianViolationEvaluator() { + String vcfFile = "/MendelianViolationEval.vcf"; + String pedFile = "/MendelianViolationEval.ped"; + + WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s", + 1, + Arrays.asList("85a8fc01a1f50839667bfcd04155f735")); + executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec); + } + @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; From f49dc5c067ed97820a1a509a17afea58142ffc77 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 30 Nov 2011 14:43:37 +0100 Subject: [PATCH 08/44] Added functionality to get all children that have both parents (useful when trios are needed) --- .../sting/gatk/samples/SampleDB.java | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index 929ad41d1..1ed8dd7a3 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -168,6 +168,47 @@ public class SampleDB { return families; } + + /** + * Returns the set of all children that have both of their parents. + * Note that if a family is composed of more than 1 child, each child is + * returned. + * @return - all the children that have both of their parents + */ + public final Set getChildrenWithParents(){ + return getChildrenWithParents(false); + } + + /** + * Returns the set of all children that have both of their parents. + * Note that if triosOnly = false, a family is composed of more than 1 child, each child is + * returned. + * + * This method can be used wherever trios are needed + * + * @param triosOnly - if set to true, only strict trios are returned + * @return - all the children that have both of their parents + */ + public final Set getChildrenWithParents(boolean triosOnly) { + + Map> families = getFamilies(); + final Set childrenWithParents = new HashSet(); + Iterator sampleIterator; + + for ( Set familyMembers: families.values() ) { + if(triosOnly && familyMembers.size() != 3) + continue; + + sampleIterator = familyMembers.iterator(); + for(Sample sample = sampleIterator.next(); sampleIterator.hasNext(); sample = sampleIterator.next()){ + if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents())) + childrenWithParents.add(sample); + } + + } + return childrenWithParents; + } + /** * Return all samples with a given family ID * @param familyId From 9574be0394d9e61255548c63a95348c52fc26b68 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 30 Nov 2011 14:44:15 +0100 Subject: [PATCH 09/44] Updated MendelianViolationEvaluator integration test --- .../gatk/walkers/varianteval/VariantEvalIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 13876ff83..e2a5d89a7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -296,7 +296,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { String vcfFile = "/MendelianViolationEval.vcf"; String pedFile = "/MendelianViolationEval.ped"; - WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s", + WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST", 1, Arrays.asList("85a8fc01a1f50839667bfcd04155f735")); executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec); From 1cb5e9e149161e88890a74fb4d1f523c7313b139 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 30 Nov 2011 14:45:04 +0100 Subject: [PATCH 10/44] Removed outdated (and unused) -familyStr commandline argument --- .../sting/gatk/walkers/annotator/VariantAnnotator.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index 143f2eb2e..94902e828 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -167,9 +167,6 @@ public class VariantAnnotator extends RodWalker implements Ann @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false) protected boolean indelsOnly = false; - @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation") - public String familyStr = null; - @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio") public double minGenotypeQualityP = 0.0; From 20bffe0430a6fe79b8e9cc558bd3a55e25c91f37 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 30 Nov 2011 14:46:38 +0100 Subject: [PATCH 11/44] Adapted for the new version of MendelianViolation --- .../TransmissionDisequilibriumTest.java | 57 ++++++++----------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java index 3de179365..6cc8923e8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java @@ -12,10 +12,8 @@ import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.exceptions.UserException; -import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.variantcontext.VariantContext; -import java.io.FileNotFoundException; import java.util.*; /** @@ -26,42 +24,33 @@ import java.util.*; public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation { - private Set fullMVSet = null; + private Set trios = null; private final static int REF = 0; private final static int HET = 1; private final static int HOM = 2; public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { - if ( fullMVSet == null ) { - fullMVSet = new HashSet(); - + if ( trios == null ) { if ( walker instanceof VariantAnnotator ) { - final Map> families = ((VariantAnnotator) walker).getSampleDB().getFamilies(); - for( final Set family : families.values() ) { - for( final Sample sample : family ) { - if( sample.getParents().size() == 2 && family.containsAll(sample.getParents()) ) { // only works with trios for now - fullMVSet.add( new MendelianViolation(sample, 0.0) ); - } - } - } + trios = ((VariantAnnotator) walker).getSampleDB().getChildrenWithParents(); } else { throw new UserException("Transmission disequilibrium test annotation can only be used from the Variant Annotator and requires a valid ped file be passed in."); } } final Map toRet = new HashMap(1); - final HashSet mvsToTest = new HashSet(); + final HashSet triosToTest = new HashSet(); - for( final MendelianViolation mv : fullMVSet ) { - final boolean hasAppropriateGenotypes = vc.hasGenotype(mv.getSampleChild()) && vc.getGenotype(mv.getSampleChild()).hasLikelihoods() && - vc.hasGenotype(mv.getSampleDad()) && vc.getGenotype(mv.getSampleDad()).hasLikelihoods() && - vc.hasGenotype(mv.getSampleMom()) && vc.getGenotype(mv.getSampleMom()).hasLikelihoods(); + for( final Sample child : trios) { + final boolean hasAppropriateGenotypes = vc.hasGenotype(child.getID()) && vc.getGenotype(child.getID()).hasLikelihoods() && + vc.hasGenotype(child.getPaternalID()) && vc.getGenotype(child.getPaternalID()).hasLikelihoods() && + vc.hasGenotype(child.getMaternalID()) && vc.getGenotype(child.getMaternalID()).hasLikelihoods(); if ( hasAppropriateGenotypes ) { - mvsToTest.add(mv); + triosToTest.add(child); } } - toRet.put("TDT", calculateTDT( vc, mvsToTest )); + toRet.put("TDT", calculateTDT( vc, triosToTest )); return toRet; } @@ -72,27 +61,27 @@ public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implemen public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("TDT", 1, VCFHeaderLineType.Float, "Test statistic from Wittkowski transmission disequilibrium test.")); } // Following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT - private double calculateTDT( final VariantContext vc, final Set mvsToTest ) { + private double calculateTDT( final VariantContext vc, final Set triosToTest ) { - final double nABGivenABandBB = calculateNChildren(vc, mvsToTest, HET, HET, HOM); - final double nBBGivenABandBB = calculateNChildren(vc, mvsToTest, HOM, HET, HOM); - final double nAAGivenABandAB = calculateNChildren(vc, mvsToTest, REF, HET, HET); - final double nBBGivenABandAB = calculateNChildren(vc, mvsToTest, HOM, HET, HET); - final double nAAGivenAAandAB = calculateNChildren(vc, mvsToTest, REF, REF, HET); - final double nABGivenAAandAB = calculateNChildren(vc, mvsToTest, HET, REF, HET); + final double nABGivenABandBB = calculateNChildren(vc, triosToTest, HET, HET, HOM); + final double nBBGivenABandBB = calculateNChildren(vc, triosToTest, HOM, HET, HOM); + final double nAAGivenABandAB = calculateNChildren(vc, triosToTest, REF, HET, HET); + final double nBBGivenABandAB = calculateNChildren(vc, triosToTest, HOM, HET, HET); + final double nAAGivenAAandAB = calculateNChildren(vc, triosToTest, REF, REF, HET); + final double nABGivenAAandAB = calculateNChildren(vc, triosToTest, HET, REF, HET); final double numer = (nABGivenABandBB - nBBGivenABandBB) + 2.0 * (nAAGivenABandAB - nBBGivenABandAB) + (nAAGivenAAandAB - nABGivenAAandAB); final double denom = (nABGivenABandBB + nBBGivenABandBB) + 4.0 * (nAAGivenABandAB + nBBGivenABandAB) + (nAAGivenAAandAB + nABGivenAAandAB); return (numer * numer) / denom; } - private double calculateNChildren( final VariantContext vc, final Set mvsToTest, final int childIdx, final int momIdx, final int dadIdx ) { - final double likelihoodVector[] = new double[mvsToTest.size() * 2]; + private double calculateNChildren( final VariantContext vc, final Set triosToTest, final int childIdx, final int momIdx, final int dadIdx ) { + final double likelihoodVector[] = new double[triosToTest.size() * 2]; int iii = 0; - for( final MendelianViolation mv : mvsToTest ) { - final double[] momGL = vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsVector(); - final double[] dadGL = vc.getGenotype(mv.getSampleDad()).getLikelihoods().getAsVector(); - final double[] childGL = vc.getGenotype(mv.getSampleChild()).getLikelihoods().getAsVector(); + for( final Sample child : triosToTest ) { + final double[] momGL = vc.getGenotype(child.getMaternalID()).getLikelihoods().getAsVector(); + final double[] dadGL = vc.getGenotype(child.getPaternalID()).getLikelihoods().getAsVector(); + final double[] childGL = vc.getGenotype(child.getID()).getLikelihoods().getAsVector(); likelihoodVector[iii++] = momGL[momIdx] + dadGL[dadIdx] + childGL[childIdx]; likelihoodVector[iii++] = momGL[dadIdx] + dadGL[momIdx] + childGL[childIdx]; } From 1d5d200790866c635df9fd29fc6597c9f1d13d66 Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Wed, 30 Nov 2011 15:30:30 +0100 Subject: [PATCH 12/44] Cleaned up unused import statements --- .../org/broadinstitute/sting/utils/MendelianViolation.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java index e140575c0..b9c209e69 100755 --- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java +++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java @@ -1,15 +1,10 @@ package org.broadinstitute.sting.utils; import org.broadinstitute.sting.gatk.samples.Sample; -import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** * User: carneiro / lfran From 71f793b71bdead2353e911d8e55640936f6da9f1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 2 Dec 2011 14:13:14 -0500 Subject: [PATCH 13/44] First partially working version of the multi-allelic version of the Exact AF calculation --- .../genotyper/ExactAFCalculationModel.java | 256 +++++++++++++++++- .../genotyper/UnifiedArgumentCollection.java | 5 + 2 files changed, 257 insertions(+), 4 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 5d0b6f0a7..ae7c2f5c1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -44,8 +45,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. private final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + private final boolean USE_MULTI_ALLELIC_CALCULATION; + + protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); + USE_MULTI_ALLELIC_CALCULATION = UAC.MULTI_ALLELIC; } public void getLog10PNonRef(GenotypesContext GLs, List alleles, @@ -60,9 +65,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { for (int k=1; k < numAlleles; k++) { // multi-allelic approximation, part 1: Ideally // for each alt allele compute marginal (suboptimal) posteriors - - // compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of - // as a row-wise upper triangular matrix of likelihoods. - // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. + // compute indices for AA,AB,BB for current allele - genotype log10Likelihoods are a linear vector that can be thought of + // as a row-wise upper triangular matrix of log10Likelihoods. + // So, for example, with 2 alt alleles, log10Likelihoods have AA,AB,AC,BB,BC,CC. // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD final int idxAA = 0; @@ -74,7 +79,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int idxBB = idxDiag; idxDiag += incr--; - final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); + final int lastK = USE_MULTI_ALLELIC_CALCULATION ? + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false) : + linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); if (numAlleles > 2) { posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); @@ -221,6 +228,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return lastK; } + final static double approximateLog10SumLog10(double[] vals) { + if ( vals.length < 2 ) + throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10"); + + double approx = approximateLog10SumLog10(vals[0], vals[1]); + for ( int i = 2; i < vals.length; i++ ) + approx = approximateLog10SumLog10(approx, vals[i]); + return approx; + } + final static double approximateLog10SumLog10(double a, double b, double c) { //return softMax(new double[]{a, b, c}); return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c); @@ -256,6 +273,237 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } + // ------------------------------------------------------------------------------------- + // + // Multi-allelic implementation. + // + // ------------------------------------------------------------------------------------- + + private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first + private static final int AC_ZERO_INDEX = 0; // ExactACset index for k=0 over all k + + // This class represents a column in the Exact AC calculation matrix + private static final class ExactACset { + final int[] ACcounts; + final double[] log10Likelihoods; + final HashMap ACsetIndexToPLIndex = new HashMap(); + final ArrayList dependentACsetsToDelete = new ArrayList(); + + private int index = -1; + + public ExactACset(int size, int[] ACcounts) { + this.ACcounts = ACcounts; + log10Likelihoods = new double[size]; + } + + public int getIndex() { + if ( index == -1 ) + index = generateIndex(ACcounts, log10Likelihoods.length); + return index; + } + + public static int generateIndex(int[] ACcounts, int multiplier) { + int index = 0; + for ( int i = 0; i < ACcounts.length; i++ ) + index += Math.pow(multiplier, i) * ACcounts[i]; + return index; + } + + public int getACsum() { + int sum = 0; + for ( int count : ACcounts ) + sum += count; + return sum; + } + } + + public int linearExactMultiAllelic(GenotypesContext GLs, + int numAlternateAlleles, + double[] log10AlleleFrequencyPriors, + double[] log10AlleleFrequencyPosteriors, + boolean preserveData) { + + final ArrayList genotypeLikelihoods = getGLs(GLs); + final int numSamples = genotypeLikelihoods.size()-1; + final int numChr = 2*numSamples; + + // queue of AC conformations to process + final Queue ACqueue = new LinkedList(); + + // mapping of ExactACset indexes to the objects + final HashMap indexesToACset = new HashMap(numChr+1); + + // add AC=0 to the queue + int[] zeroCounts = new int[numAlternateAlleles]; + ExactACset zeroSet = new ExactACset(numSamples+1, zeroCounts); + ACqueue.add(zeroSet); + indexesToACset.put(0, zeroSet); + + // keep processing while we have AC conformations that need to be calculated + double maxLog10L = Double.NEGATIVE_INFINITY; + while ( !ACqueue.isEmpty() ) { + // compute log10Likelihoods + final ExactACset set = ACqueue.remove(); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPosteriors, log10AlleleFrequencyPriors); + + // adjust max likelihood seen if needed + maxLog10L = Math.max(maxLog10L, log10LofKs); + } + + // TODO -- finish me + + return 0; + } + + private static double calculateAlleleCountConformation(final ExactACset set, + final ArrayList genotypeLikelihoods, + final double maxLog10L, + final int numChr, + final boolean preserveData, + final Queue ACqueue, + final HashMap indexesToACset, + double[] log10AlleleFrequencyPriors, + double[] log10AlleleFrequencyPosteriors) { + + // compute the log10Likelihoods + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPosteriors, log10AlleleFrequencyPriors); + + // clean up memory + if ( !preserveData ) { + for ( int index : set.dependentACsetsToDelete ) + indexesToACset.put(index, null); + } + + final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; + + // can we abort early because the log10Likelihoods are so small? + if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( DEBUG ) System.out.printf(" *** breaking early ks=%d log10L=%.2f maxLog10L=%.2f%n", set.index, log10LofK, maxLog10L); + return log10LofK; + } + + // iterate over higher frequencies if possible + int ACwiggle = numChr - set.getACsum(); + if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N + return log10LofK; + + ExactACset lastSet = null; + int numAltAlleles = set.ACcounts.length; + + // genotype log10Likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. + // So e.g. with 2 alt alleles the log10Likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + + // do it for the k+1 case + int PLindex = 0; + for ( int allele = 0; allele < numAltAlleles; allele++ ) { + int[] ACcountsClone = set.ACcounts.clone(); + ACcountsClone[allele]++; + lastSet = updateACset(ACcountsClone, numChr, set.getIndex(), ++PLindex, ACqueue, indexesToACset); + } + + // do it for the k+2 case if it makes sense; note that the 2 alleles may be the same or different + if ( ACwiggle > 1 ) { + for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { + for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { + int[] ACcountsClone = set.ACcounts.clone(); + ACcountsClone[allele_i]++; + ACcountsClone[allele_j]++; + lastSet = updateACset(ACcountsClone, numChr,set.getIndex(), ++PLindex , ACqueue, indexesToACset); + } + } + } + + if ( lastSet == null ) + throw new ReviewedStingException("No new AC sets were added or updated but the AC still hasn't reached 2N"); + lastSet.dependentACsetsToDelete.add(set.index); + + return log10LofK; + } + + private static ExactACset updateACset(int[] ACcounts, + int numChr, + final int callingSetIndex, + final int PLsetIndex, + final Queue ACqueue, + final HashMap indexesToACset) { + final int index = ExactACset.generateIndex(ACcounts, numChr+1); + if ( !indexesToACset.containsKey(index) ) { + ExactACset set = new ExactACset(numChr/2 +1, ACcounts); + indexesToACset.put(index, set); + ACqueue.add(set); + } + + // add the given dependency to the set + ExactACset set = indexesToACset.get(index); + set.ACsetIndexToPLIndex.put(callingSetIndex, PLsetIndex); + return set; + } + + private static void computeLofK(ExactACset set, + ArrayList genotypeLikelihoods, + final HashMap indexesToACset, + double[] log10AlleleFrequencyPriors, + double[] log10AlleleFrequencyPosteriors) { + + set.log10Likelihoods[0] = 0.0; // the zero case + int totalK = set.getACsum(); + + // special case for k = 0 over all k + if ( set.getIndex() == AC_ZERO_INDEX ) { + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) + set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; + } + // k > 0 for at least one k + else { + // all possible likelihoods for a given cell from which to choose the max + final int numPaths = set.ACsetIndexToPLIndex.size() + 1; + final double[] log10ConformationLikelihoods = new double[numPaths]; + + for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { + final double[] gl = genotypeLikelihoods.get(j); + final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + + for ( int i = 0; i < numPaths; i++ ) + log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY; + + // deal with the AA case first + if ( totalK < 2*j-1 ) + log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; + + // deal with the other possible conformations now + if ( totalK < 2*j ) { + int conformationIndex = 1; + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) + log10ConformationLikelihoods[conformationIndex++] = + determineCoefficient(mapping.getValue(), j, totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; + } + + double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods); + + // finally, update the L(j,k) value + set.log10Likelihoods[j] = log10Max - logDenominator; + } + } + + // update the posteriors vector + final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; + + // TODO -- this needs to be fixed; hard-coding in the biallelic case + log10AlleleFrequencyPosteriors[totalK] = log10LofK + log10AlleleFrequencyPriors[totalK]; + } + + private static double determineCoefficient(int PLindex, int j, int totalK) { + + // TODO -- the math here needs to be fixed and checked; hard-coding in the biallelic case + //AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + + double coeff; + if ( PLindex == 1 ) + coeff = MathUtils.log10Cache[2*totalK] + MathUtils.log10Cache[2*j-totalK]; + else + coeff = MathUtils.log10Cache[totalK] + MathUtils.log10Cache[totalK-1]; + return coeff; + } /** * Can be overridden by concrete subclasses diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 62218416d..d7101da6b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -153,6 +153,10 @@ public class UnifiedArgumentCollection { @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false) public boolean IGNORE_SNP_ALLELES = false; + @Hidden + @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false) + public boolean MULTI_ALLELIC = false; + // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! public UnifiedArgumentCollection clone() { @@ -180,6 +184,7 @@ public class UnifiedArgumentCollection { // todo- arguments to remove uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; uac.BANDED_INDEL_COMPUTATION = BANDED_INDEL_COMPUTATION; + uac.MULTI_ALLELIC = MULTI_ALLELIC; return uac; } From 29662be3d771718e41828d188132f679c0dc01d1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sat, 3 Dec 2011 23:12:04 -0500 Subject: [PATCH 14/44] Fixed bug where k=2N case wasn't properly being computed. Added optimization for BB genotype case not in old model. At this point, integration tests pass except for 1 case where QUALs differ by 0.01 (this is okay because I occasionally need to compute extra cells in the matrix which affects the approximations) and 2 cases where multi-allelic indels are being genotyped (some work still needs to be done to support them). --- .../genotyper/ExactAFCalculationModel.java | 15 +++++++-------- .../UnifiedGenotyperIntegrationTest.java | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index ae7c2f5c1..26c777296 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -317,11 +317,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - public int linearExactMultiAllelic(GenotypesContext GLs, - int numAlternateAlleles, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, - boolean preserveData) { + static public int linearExactMultiAllelic(GenotypesContext GLs, + int numAlternateAlleles, + double[] log10AlleleFrequencyPriors, + double[] log10AlleleFrequencyPosteriors, + boolean preserveData) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; @@ -350,8 +350,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { maxLog10L = Math.max(maxLog10L, log10LofKs); } - // TODO -- finish me - + // TODO -- why do we need to return anything here? return 0; } @@ -471,7 +470,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { log10ConformationLikelihoods[0] = MathUtils.log10Cache[2*j-totalK] + MathUtils.log10Cache[2*j-totalK-1] + set.log10Likelihoods[j-1] + gl[HOM_REF_INDEX]; // deal with the other possible conformations now - if ( totalK < 2*j ) { + if ( totalK <= 2*j ) { // skip impossible conformations int conformationIndex = 1; for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) log10ConformationLikelihoods[conformationIndex++] = diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 34e1ad30e..11e086db8 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("286f0de92e4ce57986ba861390c6019d")); + Arrays.asList("b70732a2f63f8409b61e41fa53eaae3e")); executeTest("test MultiSample Pilot1", spec); } From eab2b76c9b9baba726eb86a1b6992e6576ca77fe Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sat, 3 Dec 2011 23:54:42 -0500 Subject: [PATCH 15/44] Added loads of comments for future reference --- .../genotyper/ExactAFCalculationModel.java | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 26c777296..c7d91f524 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -284,11 +284,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // This class represents a column in the Exact AC calculation matrix private static final class ExactACset { + + // the counts of the various alternate alleles which this column represents final int[] ACcounts; + + // the column of the matrix final double[] log10Likelihoods; + + // mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column; + // for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB. final HashMap ACsetIndexToPLIndex = new HashMap(); + + // to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them final ArrayList dependentACsetsToDelete = new ArrayList(); + // index used to represent this set in the global hashmap: (numSamples^0 * allele_1) + (numSamples^1 * allele_2) + (numSamples^2 * allele_3) + ... private int index = -1; public ExactACset(int size, int[] ACcounts) { @@ -309,6 +319,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return index; } + // sum of all the non-reference alleles public int getACsum() { int sum = 0; for ( int count : ACcounts ) @@ -361,8 +372,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final boolean preserveData, final Queue ACqueue, final HashMap indexesToACset, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { + final double[] log10AlleleFrequencyPriors, + final double[] log10AlleleFrequencyPosteriors) { // compute the log10Likelihoods computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPosteriors, log10AlleleFrequencyPriors); @@ -383,16 +394,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // iterate over higher frequencies if possible int ACwiggle = numChr - set.getACsum(); - if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N + if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies return log10LofK; - ExactACset lastSet = null; + ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing int numAltAlleles = set.ACcounts.length; - // genotype log10Likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. - // So e.g. with 2 alt alleles the log10Likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. + // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. - // do it for the k+1 case + // add conformations for the k+1 case int PLindex = 0; for ( int allele = 0; allele < numAltAlleles; allele++ ) { int[] ACcountsClone = set.ACcounts.clone(); @@ -400,7 +411,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { lastSet = updateACset(ACcountsClone, numChr, set.getIndex(), ++PLindex, ACqueue, indexesToACset); } - // do it for the k+2 case if it makes sense; note that the 2 alleles may be the same or different + // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different if ( ACwiggle > 1 ) { for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { @@ -419,6 +430,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return log10LofK; } + // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and + // also adds it as a dependency to the given callingSetIndex. private static ExactACset updateACset(int[] ACcounts, int numChr, final int callingSetIndex, @@ -462,6 +475,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double[] gl = genotypeLikelihoods.get(j); final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + // initialize for ( int i = 0; i < numPaths; i++ ) log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY; From a7cb941417598afd75cce4e3e6ead5db8f50e574 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 4 Dec 2011 13:02:53 -0500 Subject: [PATCH 16/44] The posteriors vector is now 2 dimensional so that it supports multiple alleles (although the UG is still hard-coded to use only array[0] for now); the exact model now collapses probabilities for all conformations over a given AC into the posteriors array (in the appropriate dimension). Fixed a bug where the priors and posteriors were being passed in swapped. --- .../AlleleFrequencyCalculationModel.java | 6 +- .../genotyper/ExactAFCalculationModel.java | 105 +++++++----------- .../genotyper/UnifiedGenotyperEngine.java | 47 ++++---- 3 files changed, 67 insertions(+), 91 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index a8ce98945..c2f950ef5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -52,7 +52,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { protected enum GenotypeType { AA, AB, BB } - protected static final double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; + protected static final double VALUE_NOT_CALCULATED = Double.NEGATIVE_INFINITY; protected AlleleFrequencyCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { this.N = N; @@ -69,7 +69,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { */ protected abstract void getLog10PNonRef(GenotypesContext GLs, List Alleles, double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors); + double[][] log10AlleleFrequencyPosteriors); /** * Can be overridden by concrete subclasses @@ -80,6 +80,6 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { * @return calls */ protected abstract GenotypesContext assignGenotypes(VariantContext vc, - double[] log10AlleleFrequencyPosteriors, + double[][] log10AlleleFrequencyPosteriors, int AFofMaxLikelihood); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index c7d91f524..0fa311303 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -41,9 +41,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // private final static boolean DEBUG = false; private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - private final boolean SIMPLE_GREEDY_GENOTYPER = false; private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - private final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + + private static final boolean SIMPLE_GREEDY_GENOTYPER = false; + private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private final boolean USE_MULTI_ALLELIC_CALCULATION; @@ -55,48 +56,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public void getLog10PNonRef(GenotypesContext GLs, List alleles, double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { + double[][] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); - final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null; - final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null; - int idxDiag = numAlleles; - int incr = numAlleles - 1; - for (int k=1; k < numAlleles; k++) { - // multi-allelic approximation, part 1: Ideally - // for each alt allele compute marginal (suboptimal) posteriors - - // compute indices for AA,AB,BB for current allele - genotype log10Likelihoods are a linear vector that can be thought of - // as a row-wise upper triangular matrix of log10Likelihoods. - // So, for example, with 2 alt alleles, log10Likelihoods have AA,AB,AC,BB,BC,CC. - // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD - - final int idxAA = 0; - final int idxAB = k; - // yy is always element on the diagonal. - // 2 alleles: BBelement 2 - // 3 alleles: BB element 3. CC element 5 - // 4 alleles: - final int idxBB = idxDiag; - idxDiag += incr--; - - final int lastK = USE_MULTI_ALLELIC_CALCULATION ? - linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false) : - linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB); - - if (numAlleles > 2) { - posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone(); - bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors); - } - } - - if (numAlleles > 2) { - // multiallelic approximation, part 2: - // report posteriors for allele that has highest estimated AC - int mostLikelyAlleleIdx = MathUtils.maxElementIndex(bestAFguess); - for (int k=0; k < log10AlleleFrequencyPosteriors.length-1; k++) - log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]); - - } + if ( USE_MULTI_ALLELIC_CALCULATION ) + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false); + else + linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -161,7 +127,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public int linearExact(GenotypesContext GLs, double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) { + double[][] log10AlleleFrequencyPosteriors) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; final int numChr = 2*numSamples; @@ -178,7 +144,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( k == 0 ) { // special case for k = 0 for ( int j=1; j <= numSamples; j++ ) { - kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA]; + kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0]; } } else { // k > 0 final double[] kMinus1 = logY.getkMinus1(); @@ -191,14 +157,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { double aa = Double.NEGATIVE_INFINITY; double ab = Double.NEGATIVE_INFINITY; if (k < 2*j-1) - aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[idxAA]; + aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0]; if (k < 2*j) - ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[idxAB]; + ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1]; double log10Max; if (k > 1) { - final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[idxBB]; + final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2]; log10Max = approximateLog10SumLog10(aa, ab, bb); } else { // we know we aren't considering the BB case, so we can use an optimized log10 function @@ -212,7 +178,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // update the posteriors vector final double log10LofK = kMinus0[numSamples]; - log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k]; + log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k]; // can we abort early? lastK = k; @@ -239,7 +205,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } final static double approximateLog10SumLog10(double a, double b, double c) { - //return softMax(new double[]{a, b, c}); return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c); } @@ -328,11 +293,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - static public int linearExactMultiAllelic(GenotypesContext GLs, - int numAlternateAlleles, - double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors, - boolean preserveData) { + public static void linearExactMultiAllelic(GenotypesContext GLs, + int numAlternateAlleles, + double[] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyPosteriors, + boolean preserveData) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; @@ -355,14 +320,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPosteriors, log10AlleleFrequencyPriors); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); // adjust max likelihood seen if needed maxLog10L = Math.max(maxLog10L, log10LofKs); } - - // TODO -- why do we need to return anything here? - return 0; } private static double calculateAlleleCountConformation(final ExactACset set, @@ -373,10 +335,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue, final HashMap indexesToACset, final double[] log10AlleleFrequencyPriors, - final double[] log10AlleleFrequencyPosteriors) { + final double[][] log10AlleleFrequencyPosteriors) { // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPosteriors, log10AlleleFrequencyPriors); + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); // clean up memory if ( !preserveData ) { @@ -455,7 +417,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { ArrayList genotypeLikelihoods, final HashMap indexesToACset, double[] log10AlleleFrequencyPriors, - double[] log10AlleleFrequencyPosteriors) { + double[][] log10AlleleFrequencyPosteriors) { set.log10Likelihoods[0] = 0.0; // the zero case int totalK = set.getACsum(); @@ -501,8 +463,21 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // update the posteriors vector final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; - // TODO -- this needs to be fixed; hard-coding in the biallelic case - log10AlleleFrequencyPosteriors[totalK] = log10LofK + log10AlleleFrequencyPriors[totalK]; + // determine the power of theta to use + int nonRefAlleles = 0; + for ( int i = 0; i < set.ACcounts.length; i++ ) { + if ( set.ACcounts[i] > 0 ) + nonRefAlleles++; + } + if ( nonRefAlleles == 0 ) // for k=0 we still want to use a power of 1 + nonRefAlleles++; + + // update the posteriors vector which is a collapsed view of each of the various ACs + for ( int i = 0; i < set.ACcounts.length; i++ ) { + // TODO -- double check the math and then cache these values for efficiency + double prior = Math.pow(log10AlleleFrequencyPriors[totalK], nonRefAlleles); + log10AlleleFrequencyPosteriors[i][set.ACcounts[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts[i]], log10LofK + prior); + } } private static double determineCoefficient(int PLindex, int j, int totalK) { @@ -521,18 +496,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { /** * Can be overridden by concrete subclasses * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ public GenotypesContext assignGenotypes(VariantContext vc, - double[] log10AlleleFrequencyPosteriors, + double[][] log10AlleleFrequencyPosteriors, int AFofMaxLikelihood) { if ( !vc.isVariant() ) throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); - GenotypesContext GLs = vc.getGenotypes(); double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1]; int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index c861af1a2..148313f43 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -77,7 +77,7 @@ public class UnifiedGenotyperEngine { private final double[] log10AlleleFrequencyPriorsIndels; // the allele frequency likelihoods (allocated once as an optimization) - private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); + private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); // the priors object private final GenotypePriors genotypePriorsSNPs; @@ -295,7 +295,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[N+1]); + log10AlleleFrequencyPosteriors.set(new double[1][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -310,10 +310,10 @@ public class UnifiedGenotyperEngine { afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency - int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); + int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[0]); double sum = 0.0; for (int i = 1; i <= N; i++) sum += normalizedPosteriors[i]; @@ -323,15 +323,15 @@ public class UnifiedGenotyperEngine { if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; + phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { sum = 0.0; for (int i = 1; i <= N; i++) { - if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) break; - sum += log10AlleleFrequencyPosteriors.get()[i]; + sum += log10AlleleFrequencyPosteriors.get()[0][i]; } phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } @@ -367,7 +367,7 @@ public class UnifiedGenotyperEngine { clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); + double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod @@ -375,8 +375,8 @@ public class UnifiedGenotyperEngine { clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); + double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; + double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("forwardLog10PofNull=" + forwardLog10PofNull + ", forwardLog10PofF=" + forwardLog10PofF); // the reverse lod @@ -384,8 +384,8 @@ public class UnifiedGenotyperEngine { clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); - double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; - double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get(), 1); + double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; + double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("reverseLog10PofNull=" + reverseLog10PofNull + ", reverseLog10PofF=" + reverseLog10PofF); double forwardLod = forwardLog10PofF + reverseLog10PofNull - overallLog10PofF; @@ -440,7 +440,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[N+1]); + log10AlleleFrequencyPosteriors.set(new double[1][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -453,10 +453,10 @@ public class UnifiedGenotyperEngine { afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency - int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()); + int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()); + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[0]); double sum = 0.0; for (int i = 1; i <= N; i++) sum += normalizedPosteriors[i]; @@ -466,15 +466,15 @@ public class UnifiedGenotyperEngine { if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0]; + phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; } else { phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); if ( Double.isInfinite(phredScaledConfidence) ) { sum = 0.0; for (int i = 1; i <= N; i++) { - if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) + if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) break; - sum += log10AlleleFrequencyPosteriors.get()[i]; + sum += log10AlleleFrequencyPosteriors.get()[0][i]; } phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); } @@ -604,9 +604,12 @@ public class UnifiedGenotyperEngine { return stratifiedContexts; } - protected static void clearAFarray(double[] AFs) { - for ( int i = 0; i < AFs.length; i++ ) - AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + protected static void clearAFarray(double[][] AFs) { + for ( int i = 0; i < AFs.length; i++ ) { + for ( int j = 0; j < AFs[i].length; j++ ) { + AFs[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + } + } } private final static double[] binomialProbabilityDepthCache = new double[10000]; @@ -676,7 +679,7 @@ public class UnifiedGenotyperEngine { AFline.append(i + "/" + N + "\t"); AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.8f\t", getAlleleFrequencyPriors(model)[i])); - if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) + if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED) AFline.append("0.00000000\t"); else AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i])); From 7fac4afab3adc5d11670ca0f1a1dcb63afd05f92 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 5 Dec 2011 15:57:25 -0500 Subject: [PATCH 17/44] Fixed priors (now initialized upon engine startup in a multi-dimensional array) and cell coefficients (properly handles the generalized closed form representation for multiple alleles). --- .../AlleleFrequencyCalculationModel.java | 2 +- .../genotyper/ExactAFCalculationModel.java | 98 ++++++++++++------- .../genotyper/UnifiedGenotyperEngine.java | 51 +++++----- 3 files changed, 89 insertions(+), 62 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index c2f950ef5..2808e6968 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -68,7 +68,7 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results */ protected abstract void getLog10PNonRef(GenotypesContext GLs, List Alleles, - double[] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyPriors, double[][] log10AlleleFrequencyPosteriors); /** diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 0fa311303..d801885c6 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -55,14 +55,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } public void getLog10PNonRef(GenotypesContext GLs, List alleles, - double[] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyPriors, double[][] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); if ( USE_MULTI_ALLELIC_CALCULATION ) linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false); else - linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); + linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyPosteriors); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -266,7 +266,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // index used to represent this set in the global hashmap: (numSamples^0 * allele_1) + (numSamples^1 * allele_2) + (numSamples^2 * allele_3) + ... private int index = -1; - public ExactACset(int size, int[] ACcounts) { + public ExactACset(final int size, final int[] ACcounts) { this.ACcounts = ACcounts; log10Likelihoods = new double[size]; } @@ -277,7 +277,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return index; } - public static int generateIndex(int[] ACcounts, int multiplier) { + public static int generateIndex(final int[] ACcounts, final int multiplier) { int index = 0; for ( int i = 0; i < ACcounts.length; i++ ) index += Math.pow(multiplier, i) * ACcounts[i]; @@ -293,11 +293,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - public static void linearExactMultiAllelic(GenotypesContext GLs, - int numAlternateAlleles, - double[] log10AlleleFrequencyPriors, - double[][] log10AlleleFrequencyPosteriors, - boolean preserveData) { + public static void linearExactMultiAllelic(final GenotypesContext GLs, + final int numAlternateAlleles, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyPosteriors, + final boolean preserveData) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; @@ -334,7 +334,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final boolean preserveData, final Queue ACqueue, final HashMap indexesToACset, - final double[] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPosteriors) { // compute the log10Likelihoods @@ -355,12 +355,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } // iterate over higher frequencies if possible - int ACwiggle = numChr - set.getACsum(); + final int ACwiggle = numChr - set.getACsum(); if ( ACwiggle == 0 ) // all alternate alleles already sum to 2N so we cannot possibly go to higher frequencies return log10LofK; ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing - int numAltAlleles = set.ACcounts.length; + final int numAltAlleles = set.ACcounts.length; // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. @@ -368,7 +368,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // add conformations for the k+1 case int PLindex = 0; for ( int allele = 0; allele < numAltAlleles; allele++ ) { - int[] ACcountsClone = set.ACcounts.clone(); + final int[] ACcountsClone = set.ACcounts.clone(); ACcountsClone[allele]++; lastSet = updateACset(ACcountsClone, numChr, set.getIndex(), ++PLindex, ACqueue, indexesToACset); } @@ -377,7 +377,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( ACwiggle > 1 ) { for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { - int[] ACcountsClone = set.ACcounts.clone(); + final int[] ACcountsClone = set.ACcounts.clone(); ACcountsClone[allele_i]++; ACcountsClone[allele_j]++; lastSet = updateACset(ACcountsClone, numChr,set.getIndex(), ++PLindex , ACqueue, indexesToACset); @@ -394,8 +394,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and // also adds it as a dependency to the given callingSetIndex. - private static ExactACset updateACset(int[] ACcounts, - int numChr, + private static ExactACset updateACset(final int[] ACcounts, + final int numChr, final int callingSetIndex, final int PLsetIndex, final Queue ACqueue, @@ -408,19 +408,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } // add the given dependency to the set - ExactACset set = indexesToACset.get(index); + final ExactACset set = indexesToACset.get(index); set.ACsetIndexToPLIndex.put(callingSetIndex, PLsetIndex); return set; } - private static void computeLofK(ExactACset set, - ArrayList genotypeLikelihoods, + private static void computeLofK(final ExactACset set, + final ArrayList genotypeLikelihoods, final HashMap indexesToACset, - double[] log10AlleleFrequencyPriors, - double[][] log10AlleleFrequencyPosteriors) { + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyPosteriors) { set.log10Likelihoods[0] = 0.0; // the zero case - int totalK = set.getACsum(); + final int totalK = set.getACsum(); // special case for k = 0 over all k if ( set.getIndex() == AC_ZERO_INDEX ) { @@ -450,10 +450,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { int conformationIndex = 1; for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) log10ConformationLikelihoods[conformationIndex++] = - determineCoefficient(mapping.getValue(), j, totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; + determineCoefficient(mapping.getValue(), j, set.ACcounts, totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; } - double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods); + final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods); // finally, update the L(j,k) value set.log10Likelihoods[j] = log10Max - logDenominator; @@ -469,27 +469,53 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( set.ACcounts[i] > 0 ) nonRefAlleles++; } - if ( nonRefAlleles == 0 ) // for k=0 we still want to use a power of 1 - nonRefAlleles++; // update the posteriors vector which is a collapsed view of each of the various ACs for ( int i = 0; i < set.ACcounts.length; i++ ) { - // TODO -- double check the math and then cache these values for efficiency - double prior = Math.pow(log10AlleleFrequencyPriors[totalK], nonRefAlleles); + // for k=0 we still want to use theta + final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][set.ACcounts[i]]; log10AlleleFrequencyPosteriors[i][set.ACcounts[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts[i]], log10LofK + prior); } } - private static double determineCoefficient(int PLindex, int j, int totalK) { + private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) { - // TODO -- the math here needs to be fixed and checked; hard-coding in the biallelic case - //AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + // the closed form representation generalized for multiple alleles is as follows: + // AA: (2j - totalK) * (2j - totalK - 1) + // AB: 2k_b * (2j - totalK) + // AC: 2k_c * (2j - totalK) + // BB: k_b * (k_b - 1) + // BC: 2 * k_b * k_c + // CC: k_c * (k_c - 1) + + final int numAltAlleles = ACcounts.length; + + // the AX het case + if ( PLindex <= numAltAlleles ) + return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK]; + + int subtractor = numAltAlleles+1; + int subtractions = 0; + do { + PLindex -= subtractor; + subtractor--; + subtractions++; + } + while ( PLindex >= subtractor ); + + final int k_i = ACcounts[subtractions-1]; + + // the hom var case (e.g. BB, CC, DD) + final double coeff; + if ( PLindex == 0 ) { + coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1]; + } + // the het non-ref case (e.g. BC, BD, CD) + else { + final int k_j = ACcounts[subtractions+PLindex-1]; + coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j]; + } - double coeff; - if ( PLindex == 1 ) - coeff = MathUtils.log10Cache[2*totalK] + MathUtils.log10Cache[2*j-totalK]; - else - coeff = MathUtils.log10Cache[totalK] + MathUtils.log10Cache[totalK-1]; return coeff; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 148313f43..f13bbdcd4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -73,12 +73,15 @@ public class UnifiedGenotyperEngine { private ThreadLocal afcm = new ThreadLocal(); // because the allele frequency priors are constant for a given i, we cache the results to avoid having to recompute everything - private final double[] log10AlleleFrequencyPriorsSNPs; - private final double[] log10AlleleFrequencyPriorsIndels; + private final double[][] log10AlleleFrequencyPriorsSNPs; + private final double[][] log10AlleleFrequencyPriorsIndels; // the allele frequency likelihoods (allocated once as an optimization) private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); + // the maximum number of alternate alleles for genotyping supported by the genotyper; we fix this here so that the AF priors and posteriors can be initialized at startup + private static final int MAX_NUMBER_OF_ALTERNATE_ALLELES = 5; + // the priors object private final GenotypePriors genotypePriorsSNPs; private final GenotypePriors genotypePriorsIndels; @@ -122,10 +125,10 @@ public class UnifiedGenotyperEngine { this.annotationEngine = engine; N = 2 * this.samples.size(); - log10AlleleFrequencyPriorsSNPs = new double[N+1]; - log10AlleleFrequencyPriorsIndels = new double[N+1]; - computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, GenotypeLikelihoodsCalculationModel.Model.SNP); - computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, GenotypeLikelihoodsCalculationModel.Model.INDEL); + log10AlleleFrequencyPriorsSNPs = new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]; + log10AlleleFrequencyPriorsIndels = new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]; + computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); + computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP); genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL); @@ -295,7 +298,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[1][N+1]); + log10AlleleFrequencyPosteriors.set(new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -440,7 +443,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[1][N+1]); + log10AlleleFrequencyPosteriors.set(new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -747,27 +750,25 @@ public class UnifiedGenotyperEngine { return null; } - protected void computeAlleleFrequencyPriors(int N, final double[] priors, final GenotypeLikelihoodsCalculationModel.Model model) { - // calculate the allele frequency priors for 1-N - double sum = 0.0; - double heterozygosity; + protected static void computeAlleleFrequencyPriors(final int N, final double[][] priors, final double theta) { - if (model == GenotypeLikelihoodsCalculationModel.Model.INDEL) - heterozygosity = UAC.INDEL_HETEROZYGOSITY; - else - heterozygosity = UAC.heterozygosity; - - for (int i = 1; i <= N; i++) { - double value = heterozygosity / (double)i; - priors[i] = Math.log10(value); - sum += value; + // the dimension here is the number of alternate alleles; with e.g. 2 alternate alleles the prior will be theta^2 / i + for (int alleles = 1; alleles <= priors.length; alleles++) { + double sum = 0.0; + + // for each i + for (int i = 1; i <= N; i++) { + double value = Math.pow(theta, alleles) / (double)i; + priors[alleles-1][i] = Math.log10(value); + sum += value; + } + + // null frequency for AF=0 is (1 - sum(all other frequencies)) + priors[alleles-1][0] = Math.log10(1.0 - sum); } - - // null frequency for AF=0 is (1 - sum(all other frequencies)) - priors[0] = Math.log10(1.0 - sum); } - protected double[] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { + protected double[][] getAlleleFrequencyPriors( final GenotypeLikelihoodsCalculationModel.Model model ) { switch( model ) { case SNP: return log10AlleleFrequencyPriorsSNPs; From 7a0f6feda45fbf6f2b7ce4c080e241e39b126782 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 5 Dec 2011 16:18:52 -0500 Subject: [PATCH 18/44] Make sure that too many alternate alleles aren't being passed to the genotyper (10 for now) and exit with a UserError if there are. --- .../walkers/genotyper/UnifiedGenotyperEngine.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index f13bbdcd4..beba865dd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -38,6 +38,7 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -80,7 +81,7 @@ public class UnifiedGenotyperEngine { private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); // the maximum number of alternate alleles for genotyping supported by the genotyper; we fix this here so that the AF priors and posteriors can be initialized at startup - private static final int MAX_NUMBER_OF_ALTERNATE_ALLELES = 5; + private static final int MAX_NUMBER_OF_ALTERNATE_ALLELES = 10; // the priors object private final GenotypePriors genotypePriorsSNPs; @@ -302,6 +303,10 @@ public class UnifiedGenotyperEngine { afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } + // don't try to genotype too many alternate alleles + if ( vc.getAlternateAlleles().size() > MAX_NUMBER_OF_ALTERNATE_ALLELES ) + throw new UserException("the Unified Genotyper is currently not equipped to genotype more than " + MAX_NUMBER_OF_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles"); + // estimate our confidence in a reference call and return if ( vc.getNSamples() == 0 ) return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ? @@ -447,6 +452,10 @@ public class UnifiedGenotyperEngine { afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } + // don't try to genotype too many alternate alleles + if ( vc.getAlternateAlleles().size() > MAX_NUMBER_OF_ALTERNATE_ALLELES ) + throw new UserException("the Unified Genotyper is currently not equipped to genotype more than " + MAX_NUMBER_OF_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles"); + // estimate our confidence in a reference call and return if ( vc.getNSamples() == 0 ) return null; From 79d18dc0784bc570b197b31ae5c7b31f86e40985 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 6 Dec 2011 16:17:18 -0500 Subject: [PATCH 19/44] Fixing indexing bug on the ACsets. Added unit tests for the Exact model code. --- .../genotyper/ExactAFCalculationModel.java | 12 ++- .../ExactAFCalculationModelUnitTest.java | 102 ++++++++++++++++++ .../UnifiedGenotyperIntegrationTest.java | 4 +- 3 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index d801885c6..91299c902 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -263,7 +263,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them final ArrayList dependentACsetsToDelete = new ArrayList(); - // index used to represent this set in the global hashmap: (numSamples^0 * allele_1) + (numSamples^1 * allele_2) + (numSamples^2 * allele_3) + ... + // index used to represent this set in the global hashmap: (numChr^0 * allele_1) + (numChr^1 * allele_2) + (numChr^2 * allele_3) + ... private int index = -1; public ExactACset(final int size, final int[] ACcounts) { @@ -273,7 +273,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public int getIndex() { if ( index == -1 ) - index = generateIndex(ACcounts, log10Likelihoods.length); + index = generateIndex(ACcounts, 2 * log10Likelihoods.length - 1); return index; } @@ -350,7 +350,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // can we abort early because the log10Likelihoods are so small? if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { - if ( DEBUG ) System.out.printf(" *** breaking early ks=%d log10L=%.2f maxLog10L=%.2f%n", set.index, log10LofK, maxLog10L); + if ( DEBUG ) + System.out.printf(" *** breaking early ks=%d log10L=%.2f maxLog10L=%.2f%n", set.index, log10LofK, maxLog10L); + + // no reason to keep this data around because nothing depends on it + if ( !preserveData ) + indexesToACset.put(set.getIndex(), null); + return log10LofK; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java new file mode 100644 index 000000000..00cfff4b3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -0,0 +1,102 @@ +package org.broadinstitute.sting.gatk.walkers.genotyper; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.Genotype; +import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + + +public class ExactAFCalculationModelUnitTest extends BaseTest { + + static double[] AA1, AB1, BB1; + static double[] AA2, AB2, AC2, BB2, BC2, CC2; + static final int numSamples = 3; + static double[][] priors = new double[2][2*numSamples+1]; // flat priors + + @BeforeSuite + public void before() { + AA1 = new double[]{0.0, -20.0, -20.0}; + AB1 = new double[]{-20.0, 0.0, -20.0}; + BB1 = new double[]{-20.0, -20.0, 0.0}; + AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0}; + AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; + AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; + BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; + BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; + CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; + } + + private class GetGLsTest extends TestDataProvider { + GenotypesContext GLs; + int numAltAlleles; + String name; + + private GetGLsTest(String name, int numAltAlleles, Genotype... arg) { + super(GetGLsTest.class, name); + GLs = GenotypesContext.create(arg); + this.name = name; + this.numAltAlleles = numAltAlleles; + } + + public String toString() { + return String.format("%s input=%s", super.toString(), GLs); + } + } + + private static Genotype createGenotype(String name, double[] gls) { + return new Genotype(name, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL), Genotype.NO_LOG10_PERROR, gls); + } + + @DataProvider(name = "getGLs") + public Object[][] createGLsData() { + + // bi-allelic case + new GetGLsTest("B0", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AA3", AA1)); + new GetGLsTest("B1", 1, createGenotype("AA1", AA1), createGenotype("AA2", AA1), createGenotype("AB", AB1)); + new GetGLsTest("B2", 1, createGenotype("AA1", AA1), createGenotype("BB", BB1), createGenotype("AA2", AA1)); + new GetGLsTest("B3a", 1, createGenotype("AB", AB1), createGenotype("AA", AA1), createGenotype("BB", BB1)); + new GetGLsTest("B3b", 1, createGenotype("AB1", AB1), createGenotype("AB2", AB1), createGenotype("AB3", AB1)); + new GetGLsTest("B4", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("AA", AA1)); + new GetGLsTest("B5", 1, createGenotype("BB1", BB1), createGenotype("AB", AB1), createGenotype("BB2", BB1)); + new GetGLsTest("B6", 1, createGenotype("BB1", BB1), createGenotype("BB2", BB1), createGenotype("BB3", BB1)); + + // tri-allelic case + new GetGLsTest("B1C0", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AB", AB2)); + new GetGLsTest("B0C1", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("AC", AC2)); + new GetGLsTest("B1C1a", 2, createGenotype("AA", AA2), createGenotype("AB", AB2), createGenotype("AC", AC2)); + new GetGLsTest("B1C1b", 2, createGenotype("AA1", AA2), createGenotype("AA2", AA2), createGenotype("BC", BC2)); + new GetGLsTest("B2C1", 2, createGenotype("AB1", AB2), createGenotype("AB2", AB2), createGenotype("AC", AC2)); + new GetGLsTest("B3C2a", 2, createGenotype("AB", AB2), createGenotype("BC1", BC2), createGenotype("BC2", BC2)); + new GetGLsTest("B3C2b", 2, createGenotype("AB", AB2), createGenotype("BB", BB2), createGenotype("CC", CC2)); + + return GetGLsTest.getTests(GetGLsTest.class); + } + + + @Test(dataProvider = "getGLs") + public void testGLs(GetGLsTest cfg) { + + final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1]; + for ( int i = 0; i < 2; i++ ) { + for ( int j = 0; j < 2*numSamples+1; j++ ) { + log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + } + } + + ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyPosteriors, false); + + int nameIndex = 1; + for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { + int expectedAlleleCount = Integer.valueOf(cfg.name.substring(nameIndex, nameIndex+1)); + int calculatedAlleleCount = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors[allele]); + Assert.assertEquals(calculatedAlleleCount, expectedAlleleCount); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 11e086db8..3275fc797 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -284,7 +284,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithIndelAllelesPassedIn3() { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + + baseCommandIndels + " --multiallelic --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, Arrays.asList("f93f8a35b47bcf96594ada55e2312c73")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); @@ -293,7 +293,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testWithIndelAllelesPassedIn4() { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( - baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + + baseCommandIndelsb37 + " --multiallelic --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4")); executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); From 7750bafb12ada511e9c764668d013f09d2b92893 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 8 Dec 2011 13:50:50 -0500 Subject: [PATCH 20/44] Fixed bug where last dependent set index wasn't properly being transferred for sites with many alleles. Adding debugging output. --- .../genotyper/ExactAFCalculationModel.java | 51 +++++++++++++++---- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 91299c902..9c3b499b4 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -36,10 +36,10 @@ import java.io.PrintStream; import java.util.*; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { - // - // code for testing purposes - // + private final static boolean DEBUG = false; + //private final static boolean DEBUG = true; + private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. @@ -291,6 +291,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { sum += count; return sum; } + + public boolean equals(Object obj) { + return (obj instanceof ExactACset) ? getIndex() == ((ExactACset)obj).getIndex() : false; + } } public static void linearExactMultiAllelic(final GenotypesContext GLs, @@ -337,13 +341,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPosteriors) { + if ( DEBUG ) + System.out.printf(" *** computing LofK for set=%d%n", set.getIndex()); + // compute the log10Likelihoods computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); // clean up memory if ( !preserveData ) { - for ( int index : set.dependentACsetsToDelete ) + for ( int index : set.dependentACsetsToDelete ) { indexesToACset.put(index, null); + if ( DEBUG ) + System.out.printf(" *** removing used set=%d after seeing final dependent set=%d%n", index, set.getIndex()); + } } final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; @@ -351,7 +361,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // can we abort early because the log10Likelihoods are so small? if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { if ( DEBUG ) - System.out.printf(" *** breaking early ks=%d log10L=%.2f maxLog10L=%.2f%n", set.index, log10LofK, maxLog10L); + System.out.printf(" *** breaking early set=%d log10L=%.2f maxLog10L=%.2f%n", set.getIndex(), log10LofK, maxLog10L); // no reason to keep this data around because nothing depends on it if ( !preserveData ) @@ -386,20 +396,27 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int[] ACcountsClone = set.ACcounts.clone(); ACcountsClone[allele_i]++; ACcountsClone[allele_j]++; - lastSet = updateACset(ACcountsClone, numChr,set.getIndex(), ++PLindex , ACqueue, indexesToACset); + lastSet = updateACset(ACcountsClone, numChr, set.getIndex(), ++PLindex , ACqueue, indexesToACset); } } } - if ( lastSet == null ) - throw new ReviewedStingException("No new AC sets were added or updated but the AC still hasn't reached 2N"); - lastSet.dependentACsetsToDelete.add(set.index); + // if the last dependent set was not at the back of the queue (i.e. not just added), then we need to iterate + // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early) + if ( !preserveData && lastSet == null ) { + if ( DEBUG ) + System.out.printf(" *** iterating over dependent sets for set=%d%n", set.getIndex()); + lastSet = determineLastDependentSetInQueue(set.getIndex(), ACqueue); + } + if ( lastSet != null ) + lastSet.dependentACsetsToDelete.add(set.index); return log10LofK; } // adds the ExactACset represented by the ACcounts to the ACqueue if not already there (creating it if needed) and // also adds it as a dependency to the given callingSetIndex. + // returns the ExactACset if that set was not already in the queue and null otherwise. private static ExactACset updateACset(final int[] ACcounts, final int numChr, final int callingSetIndex, @@ -407,15 +424,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue, final HashMap indexesToACset) { final int index = ExactACset.generateIndex(ACcounts, numChr+1); + boolean wasInQueue = true; if ( !indexesToACset.containsKey(index) ) { ExactACset set = new ExactACset(numChr/2 +1, ACcounts); indexesToACset.put(index, set); ACqueue.add(set); + wasInQueue = false; } // add the given dependency to the set final ExactACset set = indexesToACset.get(index); set.ACsetIndexToPLIndex.put(callingSetIndex, PLsetIndex); + return wasInQueue ? null : set; + } + + private static ExactACset determineLastDependentSetInQueue(final int callingSetIndex, final Queue ACqueue) { + ExactACset set = null; + for ( ExactACset queued : ACqueue ) { + if ( queued.dependentACsetsToDelete.contains(callingSetIndex) ) + set = queued; + } return set; } @@ -454,9 +482,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // deal with the other possible conformations now if ( totalK <= 2*j ) { // skip impossible conformations int conformationIndex = 1; - for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { + if ( DEBUG ) + System.out.printf(" *** evaluating set=%d which depends on set=%d%n", set.getIndex(), mapping.getKey()); log10ConformationLikelihoods[conformationIndex++] = determineCoefficient(mapping.getValue(), j, set.ACcounts, totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; + } } final double log10Max = approximateLog10SumLog10(log10ConformationLikelihoods); From 4aebe99445c00c617be2f1f6a6551769a398fdf7 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 8 Dec 2011 15:31:02 -0500 Subject: [PATCH 21/44] Need to use longs for the set index (because we can run out of ints when there are too many alternate alleles). Integration tests now use the multiallelic implementation. --- .../genotyper/ExactAFCalculationModel.java | 33 +++++++++---------- .../UnifiedGenotyperIntegrationTest.java | 12 +++---- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 9c3b499b4..1381f48ec 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -38,7 +38,6 @@ import java.util.*; public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private final static boolean DEBUG = false; - //private final static boolean DEBUG = true; private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. @@ -258,27 +257,27 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column; // for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB. - final HashMap ACsetIndexToPLIndex = new HashMap(); + final HashMap ACsetIndexToPLIndex = new HashMap(); // to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them - final ArrayList dependentACsetsToDelete = new ArrayList(); + final ArrayList dependentACsetsToDelete = new ArrayList(); // index used to represent this set in the global hashmap: (numChr^0 * allele_1) + (numChr^1 * allele_2) + (numChr^2 * allele_3) + ... - private int index = -1; + private long index = -1; public ExactACset(final int size, final int[] ACcounts) { this.ACcounts = ACcounts; log10Likelihoods = new double[size]; } - public int getIndex() { + public long getIndex() { if ( index == -1 ) index = generateIndex(ACcounts, 2 * log10Likelihoods.length - 1); return index; } - public static int generateIndex(final int[] ACcounts, final int multiplier) { - int index = 0; + public static long generateIndex(final int[] ACcounts, final int multiplier) { + long index = 0L; for ( int i = 0; i < ACcounts.length; i++ ) index += Math.pow(multiplier, i) * ACcounts[i]; return index; @@ -311,13 +310,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue = new LinkedList(); // mapping of ExactACset indexes to the objects - final HashMap indexesToACset = new HashMap(numChr+1); + final HashMap indexesToACset = new HashMap(numChr+1); // add AC=0 to the queue int[] zeroCounts = new int[numAlternateAlleles]; ExactACset zeroSet = new ExactACset(numSamples+1, zeroCounts); ACqueue.add(zeroSet); - indexesToACset.put(0, zeroSet); + indexesToACset.put(0L, zeroSet); // keep processing while we have AC conformations that need to be calculated double maxLog10L = Double.NEGATIVE_INFINITY; @@ -337,7 +336,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int numChr, final boolean preserveData, final Queue ACqueue, - final HashMap indexesToACset, + final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPosteriors) { @@ -349,7 +348,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // clean up memory if ( !preserveData ) { - for ( int index : set.dependentACsetsToDelete ) { + for ( long index : set.dependentACsetsToDelete ) { indexesToACset.put(index, null); if ( DEBUG ) System.out.printf(" *** removing used set=%d after seeing final dependent set=%d%n", index, set.getIndex()); @@ -419,11 +418,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // returns the ExactACset if that set was not already in the queue and null otherwise. private static ExactACset updateACset(final int[] ACcounts, final int numChr, - final int callingSetIndex, + final long callingSetIndex, final int PLsetIndex, final Queue ACqueue, - final HashMap indexesToACset) { - final int index = ExactACset.generateIndex(ACcounts, numChr+1); + final HashMap indexesToACset) { + final long index = ExactACset.generateIndex(ACcounts, numChr+1); boolean wasInQueue = true; if ( !indexesToACset.containsKey(index) ) { ExactACset set = new ExactACset(numChr/2 +1, ACcounts); @@ -438,7 +437,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return wasInQueue ? null : set; } - private static ExactACset determineLastDependentSetInQueue(final int callingSetIndex, final Queue ACqueue) { + private static ExactACset determineLastDependentSetInQueue(final long callingSetIndex, final Queue ACqueue) { ExactACset set = null; for ( ExactACset queued : ACqueue ) { if ( queued.dependentACsetsToDelete.contains(callingSetIndex) ) @@ -449,7 +448,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static void computeLofK(final ExactACset set, final ArrayList genotypeLikelihoods, - final HashMap indexesToACset, + final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPosteriors) { @@ -482,7 +481,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // deal with the other possible conformations now if ( totalK <= 2*j ) { // skip impossible conformations int conformationIndex = 1; - for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { if ( DEBUG ) System.out.printf(" *** evaluating set=%d which depends on set=%d%n", set.getIndex(), mapping.getKey()); log10ConformationLikelihoods[conformationIndex++] = diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 3275fc797..ccc585bc7 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -16,9 +16,9 @@ import java.util.Map; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129; - private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommand = "-T UnifiedGenotyper --multiallelic -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129; + private final static String baseCommandIndels = "-T UnifiedGenotyper --multiallelic -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --multiallelic -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; // -------------------------------------------------------------------------------------------------------------- // @@ -284,16 +284,16 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithIndelAllelesPassedIn3() { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - baseCommandIndels + " --multiallelic --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + + baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("f93f8a35b47bcf96594ada55e2312c73")); + Arrays.asList("1d4a6a1b840ca6a130516ab9f2d99869")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @Test public void testWithIndelAllelesPassedIn4() { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( - baseCommandIndelsb37 + " --multiallelic --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + + baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4")); executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); From 3e7714629f00955cce5a4f3acfe7f24d25d6b1ae Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 8 Dec 2011 23:50:54 -0500 Subject: [PATCH 22/44] Scrapped the whole idea of an int/long as an index into the ACset: with lots of alternate alleles we run into overflow issues. Instead, simply use the ACcounts array as the hash key since it is unique for each AC conformation. To do this, it needed to be wrapped inside an object so hashcode() would work. --- .../genotyper/ExactAFCalculationModel.java | 132 ++++++++++-------- .../genotyper/UnifiedArgumentCollection.java | 9 ++ .../genotyper/UnifiedGenotyperEngine.java | 8 +- 3 files changed, 89 insertions(+), 60 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 1381f48ec..f9d30e0d1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -244,55 +244,77 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // ------------------------------------------------------------------------------------- private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first - private static final int AC_ZERO_INDEX = 0; // ExactACset index for k=0 over all k + + // a wrapper around the int array so that we can make it hashable + private static final class ExactACcounts { + + private final int[] counts; + private int hashcode = -1; + + public ExactACcounts(final int[] counts) { + this.counts = counts; + } + + public int[] getCounts() { + return counts; + } + + @Override + public boolean equals(Object obj) { + return (obj instanceof ExactACcounts) ? Arrays.equals(counts, ((ExactACcounts)obj).counts) : false; + } + + @Override + public int hashCode() { + if ( hashcode == -1 ) + hashcode = Arrays.hashCode(counts); + return hashcode; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(counts[0]); + for ( int i = 1; i < counts.length; i++ ) { + sb.append("/"); + sb.append(counts[i]); + } + return sb.toString(); + } + } // This class represents a column in the Exact AC calculation matrix private static final class ExactACset { // the counts of the various alternate alleles which this column represents - final int[] ACcounts; + final ExactACcounts ACcounts; // the column of the matrix final double[] log10Likelihoods; // mapping of column index for those columns upon which this one depends to the index into the PLs which is used as the transition to this column; // for example, in the biallelic case, the transition from k=0 to k=1 would be AB while the transition to k=2 would be BB. - final HashMap ACsetIndexToPLIndex = new HashMap(); + final HashMap ACsetIndexToPLIndex = new HashMap(); // to minimize memory consumption, we know we can delete any sets in this list because no further sets will depend on them - final ArrayList dependentACsetsToDelete = new ArrayList(); + final ArrayList dependentACsetsToDelete = new ArrayList(); - // index used to represent this set in the global hashmap: (numChr^0 * allele_1) + (numChr^1 * allele_2) + (numChr^2 * allele_3) + ... - private long index = -1; - public ExactACset(final int size, final int[] ACcounts) { + public ExactACset(final int size, final ExactACcounts ACcounts) { this.ACcounts = ACcounts; log10Likelihoods = new double[size]; } - public long getIndex() { - if ( index == -1 ) - index = generateIndex(ACcounts, 2 * log10Likelihoods.length - 1); - return index; - } - - public static long generateIndex(final int[] ACcounts, final int multiplier) { - long index = 0L; - for ( int i = 0; i < ACcounts.length; i++ ) - index += Math.pow(multiplier, i) * ACcounts[i]; - return index; - } - // sum of all the non-reference alleles public int getACsum() { int sum = 0; - for ( int count : ACcounts ) + for ( int count : ACcounts.getCounts() ) sum += count; return sum; } public boolean equals(Object obj) { - return (obj instanceof ExactACset) ? getIndex() == ((ExactACset)obj).getIndex() : false; + return (obj instanceof ExactACset) ? ACcounts.equals(((ExactACset)obj).ACcounts) : false; } } @@ -310,13 +332,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue = new LinkedList(); // mapping of ExactACset indexes to the objects - final HashMap indexesToACset = new HashMap(numChr+1); + final HashMap indexesToACset = new HashMap(numChr+1); // add AC=0 to the queue int[] zeroCounts = new int[numAlternateAlleles]; - ExactACset zeroSet = new ExactACset(numSamples+1, zeroCounts); + ExactACset zeroSet = new ExactACset(numSamples+1, new ExactACcounts(zeroCounts)); ACqueue.add(zeroSet); - indexesToACset.put(0L, zeroSet); + indexesToACset.put(zeroSet.ACcounts, zeroSet); // keep processing while we have AC conformations that need to be calculated double maxLog10L = Double.NEGATIVE_INFINITY; @@ -336,22 +358,22 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int numChr, final boolean preserveData, final Queue ACqueue, - final HashMap indexesToACset, + final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPosteriors) { if ( DEBUG ) - System.out.printf(" *** computing LofK for set=%d%n", set.getIndex()); + System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); // clean up memory if ( !preserveData ) { - for ( long index : set.dependentACsetsToDelete ) { + for ( ExactACcounts index : set.dependentACsetsToDelete ) { indexesToACset.put(index, null); if ( DEBUG ) - System.out.printf(" *** removing used set=%d after seeing final dependent set=%d%n", index, set.getIndex()); + System.out.printf(" *** removing used set=%s after seeing final dependent set=%s%n", index, set.ACcounts); } } @@ -360,11 +382,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // can we abort early because the log10Likelihoods are so small? if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { if ( DEBUG ) - System.out.printf(" *** breaking early set=%d log10L=%.2f maxLog10L=%.2f%n", set.getIndex(), log10LofK, maxLog10L); + System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L); // no reason to keep this data around because nothing depends on it if ( !preserveData ) - indexesToACset.put(set.getIndex(), null); + indexesToACset.put(set.ACcounts, null); return log10LofK; } @@ -375,7 +397,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return log10LofK; ExactACset lastSet = null; // keep track of the last set placed in the queue so that we can tell it to clean us up when done processing - final int numAltAlleles = set.ACcounts.length; + final int numAltAlleles = set.ACcounts.getCounts().length; // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. @@ -383,19 +405,19 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // add conformations for the k+1 case int PLindex = 0; for ( int allele = 0; allele < numAltAlleles; allele++ ) { - final int[] ACcountsClone = set.ACcounts.clone(); + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); ACcountsClone[allele]++; - lastSet = updateACset(ACcountsClone, numChr, set.getIndex(), ++PLindex, ACqueue, indexesToACset); + lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset); } // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different if ( ACwiggle > 1 ) { for ( int allele_i = 0; allele_i < numAltAlleles; allele_i++ ) { for ( int allele_j = allele_i; allele_j < numAltAlleles; allele_j++ ) { - final int[] ACcountsClone = set.ACcounts.clone(); + final int[] ACcountsClone = set.ACcounts.getCounts().clone(); ACcountsClone[allele_i]++; ACcountsClone[allele_j]++; - lastSet = updateACset(ACcountsClone, numChr, set.getIndex(), ++PLindex , ACqueue, indexesToACset); + lastSet = updateACset(ACcountsClone, numChr, set, ++PLindex , ACqueue, indexesToACset); } } } @@ -404,11 +426,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // over all the dependent sets to find the last one in the queue (otherwise it will be cleaned up too early) if ( !preserveData && lastSet == null ) { if ( DEBUG ) - System.out.printf(" *** iterating over dependent sets for set=%d%n", set.getIndex()); - lastSet = determineLastDependentSetInQueue(set.getIndex(), ACqueue); + System.out.printf(" *** iterating over dependent sets for set=%s%n", set.ACcounts); + lastSet = determineLastDependentSetInQueue(set.ACcounts, ACqueue); } if ( lastSet != null ) - lastSet.dependentACsetsToDelete.add(set.index); + lastSet.dependentACsetsToDelete.add(set.ACcounts); return log10LofK; } @@ -418,14 +440,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // returns the ExactACset if that set was not already in the queue and null otherwise. private static ExactACset updateACset(final int[] ACcounts, final int numChr, - final long callingSetIndex, + final ExactACset callingSet, final int PLsetIndex, final Queue ACqueue, - final HashMap indexesToACset) { - final long index = ExactACset.generateIndex(ACcounts, numChr+1); + final HashMap indexesToACset) { + final ExactACcounts index = new ExactACcounts(ACcounts); boolean wasInQueue = true; if ( !indexesToACset.containsKey(index) ) { - ExactACset set = new ExactACset(numChr/2 +1, ACcounts); + ExactACset set = new ExactACset(numChr/2 +1, index); indexesToACset.put(index, set); ACqueue.add(set); wasInQueue = false; @@ -433,11 +455,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // add the given dependency to the set final ExactACset set = indexesToACset.get(index); - set.ACsetIndexToPLIndex.put(callingSetIndex, PLsetIndex); + set.ACsetIndexToPLIndex.put(callingSet.ACcounts, PLsetIndex); return wasInQueue ? null : set; } - private static ExactACset determineLastDependentSetInQueue(final long callingSetIndex, final Queue ACqueue) { + private static ExactACset determineLastDependentSetInQueue(final ExactACcounts callingSetIndex, final Queue ACqueue) { ExactACset set = null; for ( ExactACset queued : ACqueue ) { if ( queued.dependentACsetsToDelete.contains(callingSetIndex) ) @@ -448,7 +470,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private static void computeLofK(final ExactACset set, final ArrayList genotypeLikelihoods, - final HashMap indexesToACset, + final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, final double[][] log10AlleleFrequencyPosteriors) { @@ -456,7 +478,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int totalK = set.getACsum(); // special case for k = 0 over all k - if ( set.getIndex() == AC_ZERO_INDEX ) { + if ( totalK == 0 ) { for ( int j = 1; j < set.log10Likelihoods.length; j++ ) set.log10Likelihoods[j] = set.log10Likelihoods[j-1] + genotypeLikelihoods.get(j)[HOM_REF_INDEX]; } @@ -481,11 +503,11 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // deal with the other possible conformations now if ( totalK <= 2*j ) { // skip impossible conformations int conformationIndex = 1; - for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { + for ( Map.Entry mapping : set.ACsetIndexToPLIndex.entrySet() ) { if ( DEBUG ) - System.out.printf(" *** evaluating set=%d which depends on set=%d%n", set.getIndex(), mapping.getKey()); + System.out.printf(" *** evaluating set=%s which depends on set=%s%n", set.ACcounts, mapping.getKey()); log10ConformationLikelihoods[conformationIndex++] = - determineCoefficient(mapping.getValue(), j, set.ACcounts, totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; + determineCoefficient(mapping.getValue(), j, set.ACcounts.getCounts(), totalK) + indexesToACset.get(mapping.getKey()).log10Likelihoods[j-1] + gl[mapping.getValue()]; } } @@ -501,16 +523,16 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // determine the power of theta to use int nonRefAlleles = 0; - for ( int i = 0; i < set.ACcounts.length; i++ ) { - if ( set.ACcounts[i] > 0 ) + for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + if ( set.ACcounts.getCounts()[i] > 0 ) nonRefAlleles++; } // update the posteriors vector which is a collapsed view of each of the various ACs - for ( int i = 0; i < set.ACcounts.length; i++ ) { + for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { // for k=0 we still want to use theta - final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][set.ACcounts[i]]; - log10AlleleFrequencyPosteriors[i][set.ACcounts[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts[i]], log10LofK + prior); + final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][set.ACcounts.getCounts()[i]]; + log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]], log10LofK + prior); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index d7101da6b..bfa87122c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -157,6 +157,14 @@ public class UnifiedArgumentCollection { @Argument(fullName = "multiallelic", shortName = "multiallelic", doc = "Allow multiple alleles in discovery", required = false) public boolean MULTI_ALLELIC = false; + /** + * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), + * then this site will be skipped and a warning printed. + */ + @Hidden + @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) + public int MAX_ALTERNATE_ALLELES = 5; + // Developers must remember to add any newly added arguments to the list here as well otherwise they won't get changed from their default value! public UnifiedArgumentCollection clone() { @@ -180,6 +188,7 @@ public class UnifiedArgumentCollection { uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO; uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE; uac.alleles = alleles; + uac.MAX_ALTERNATE_ALLELES = MAX_ALTERNATE_ALLELES; // todo- arguments to remove uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index beba865dd..6a79061fc 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -80,9 +80,6 @@ public class UnifiedGenotyperEngine { // the allele frequency likelihoods (allocated once as an optimization) private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); - // the maximum number of alternate alleles for genotyping supported by the genotyper; we fix this here so that the AF priors and posteriors can be initialized at startup - private static final int MAX_NUMBER_OF_ALTERNATE_ALLELES = 10; - // the priors object private final GenotypePriors genotypePriorsSNPs; private final GenotypePriors genotypePriorsIndels; @@ -126,8 +123,8 @@ public class UnifiedGenotyperEngine { this.annotationEngine = engine; N = 2 * this.samples.size(); - log10AlleleFrequencyPriorsSNPs = new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]; - log10AlleleFrequencyPriorsIndels = new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]; + log10AlleleFrequencyPriorsSNPs = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; + log10AlleleFrequencyPriorsIndels = new double[UAC.MAX_ALTERNATE_ALLELES][N+1]; computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsSNPs, UAC.heterozygosity); computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY); genotypePriorsSNPs = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.SNP); @@ -155,6 +152,7 @@ public class UnifiedGenotyperEngine { return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext) : null); } + VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); if ( vc == null ) From 8777288a9f62a610e3efaf1ebfdd434b0bf79520 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Dec 2011 00:00:20 -0500 Subject: [PATCH 23/44] Don't throw a UserException if too many alt alleles are trying to be genotyped. Instead, I've added an argument that allows the user to set the max number of alt alleles to genotype and the UG warns and skips any sites with more than that number. --- .../genotyper/UnifiedArgumentCollection.java | 2 +- .../genotyper/UnifiedGenotyperEngine.java | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index bfa87122c..53600b145 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -159,7 +159,7 @@ public class UnifiedArgumentCollection { /** * If there are more than this number of alternate alleles presented to the genotyper (either through discovery or GENOTYPE_GIVEN ALLELES), - * then this site will be skipped and a warning printed. + * then this site will be skipped and a warning printed. Note that genotyping sites with many alternate alleles is both CPU and memory intensive. */ @Hidden @Argument(fullName = "max_alternate_alleles", shortName = "maxAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 6a79061fc..6e61790ed 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -38,7 +38,6 @@ import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; @@ -153,8 +152,6 @@ public class UnifiedGenotyperEngine { } VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); - VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model); - if ( vc == null ) return null; @@ -297,13 +294,15 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]); + log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } // don't try to genotype too many alternate alleles - if ( vc.getAlternateAlleles().size() > MAX_NUMBER_OF_ALTERNATE_ALLELES ) - throw new UserException("the Unified Genotyper is currently not equipped to genotype more than " + MAX_NUMBER_OF_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles"); + if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) { + logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument"); + return null; + } // estimate our confidence in a reference call and return if ( vc.getNSamples() == 0 ) @@ -446,13 +445,15 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { - log10AlleleFrequencyPosteriors.set(new double[MAX_NUMBER_OF_ALTERNATE_ALLELES][N+1]); + log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } // don't try to genotype too many alternate alleles - if ( vc.getAlternateAlleles().size() > MAX_NUMBER_OF_ALTERNATE_ALLELES ) - throw new UserException("the Unified Genotyper is currently not equipped to genotype more than " + MAX_NUMBER_OF_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles"); + if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) { + logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument"); + return null; + } // estimate our confidence in a reference call and return if ( vc.getNSamples() == 0 ) From 2fe50c64da60ab27bf119f2975b364e3e629d18d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Dec 2011 00:47:01 -0500 Subject: [PATCH 24/44] Updating md5s --- .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index ccc585bc7..a91b6c15d 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -7,7 +7,6 @@ import org.testng.annotations.Test; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; // ********************************************************************************** // @@ -29,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("b70732a2f63f8409b61e41fa53eaae3e")); + Arrays.asList("f6ef10dee80f9ccd7d245a28787ca887")); executeTest("test MultiSample Pilot1", spec); } @@ -295,8 +294,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("9be28cb208d8b0314d2bc2696e2fd8d4")); - executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4); + Arrays.asList("6ee2f3c6b5422f0a2ad0669639e293cb")); + executeTest("test MultiSample Phase1 indels with complicated records", spec4); } @Test From aa4a8c5303e41d6a9b9cfe28c3a8321dabb86ff3 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Dec 2011 02:25:06 -0500 Subject: [PATCH 25/44] No dynamic programming solution for assignning genotypes; just done greedily now. Fixed QualByDepth to skip no-call genotypes. No-calls are no longer given annotations (attributes). --- .../gatk/walkers/annotator/QualByDepth.java | 2 +- .../genotyper/ExactAFCalculationModel.java | 99 ++----------------- .../UnifiedGenotyperIntegrationTest.java | 20 ++-- 3 files changed, 20 insertions(+), 101 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java index d555463bc..6638fc7a8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java @@ -38,7 +38,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati for ( final Genotype genotype : genotypes ) { // we care only about variant calls with likelihoods - if ( genotype.isHomRef() ) + if ( !genotype.isHet() && !genotype.isHomVar() ) continue; AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index f9d30e0d1..22017a1ee 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -42,7 +42,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - private static final boolean SIMPLE_GREEDY_GENOTYPER = false; private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private final boolean USE_MULTI_ALLELIC_CALCULATION; @@ -592,10 +591,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { GenotypesContext GLs = vc.getGenotypes(); double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1]; - int[][] tracebackArray = new int[GLs.size()+1][AFofMaxLikelihood+1]; ArrayList sampleIndices = new ArrayList(); - int sampleIdx = 0; // todo - optimize initialization for (int k=0; k <= AFofMaxLikelihood; k++) @@ -604,83 +601,29 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { pathMetricArray[0][0] = 0.0; - // todo = can't deal with optimal dynamic programming solution with multiallelic records - if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) { - sampleIndices.addAll(GLs.getSampleNamesOrderedByName()); - sampleIdx = GLs.size(); - } - else { - - for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) { - if ( !genotype.hasLikelihoods() ) - continue; - - double[] likelihoods = genotype.getLikelihoods().getAsVector(); - - if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) { - //System.out.print(sample.getKey()+":"); - //for (int k=0; k < likelihoods.length; k++) - // System.out.format("%4.2f ",likelihoods[k]); - //System.out.println(); - // all likelihoods are essentially the same: skip this sample and will later on force no call. - //sampleIdx++; - continue; - } - - sampleIndices.add(genotype.getSampleName()); - - for (int k=0; k <= AFofMaxLikelihood; k++) { - - double bestMetric = pathMetricArray[sampleIdx][k] + likelihoods[0]; - int bestIndex = k; - - if (k>0) { - double m2 = pathMetricArray[sampleIdx][k-1] + likelihoods[1]; - if (m2 > bestMetric) { - bestMetric = m2; - bestIndex = k-1; - } - } - - if (k>1) { - double m2 = pathMetricArray[sampleIdx][k-2] + likelihoods[2]; - if (m2 > bestMetric) { - bestMetric = m2; - bestIndex = k-2; - } - } - - pathMetricArray[sampleIdx+1][k] = bestMetric; - tracebackArray[sampleIdx+1][k] = bestIndex; - } - sampleIdx++; - } - } + sampleIndices.addAll(GLs.getSampleNamesOrderedByName()); GenotypesContext calls = GenotypesContext.create(); - int startIdx = AFofMaxLikelihood; - for (int k = sampleIdx; k > 0; k--) { + for (int k = GLs.size(); k > 0; k--) { int bestGTguess; String sample = sampleIndices.get(k-1); Genotype g = GLs.get(sample); if ( !g.hasLikelihoods() ) continue; - // if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now, - // and will add no-call genotype to GL's in a second pass + ArrayList myAlleles = new ArrayList(); double[] likelihoods = g.getLikelihoods().getAsVector(); - if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) { - bestGTguess = Utils.findIndexOfMaxEntry(likelihoods); - } - else { - int newIdx = tracebackArray[k][startIdx];; - bestGTguess = startIdx - newIdx; - startIdx = newIdx; + // if there is no mass on the likelihoods, then just no-call the sample + if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) { + calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false)); + continue; } + bestGTguess = Utils.findIndexOfMaxEntry(likelihoods); + // likelihoods are stored row-wise in lower triangular matrix. IE // for 2 alleles they have ordering AA,AB,BB // for 3 alleles they are ordered AA,AB,BB,AC,BC,CC @@ -709,33 +652,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods); - //System.out.println(myAlleles.toString()); calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); } - for ( final Genotype genotype : GLs.iterateInSampleNameOrder() ) { - if ( !genotype.hasLikelihoods() ) - continue; - - final Genotype g = GLs.get(genotype.getSampleName()); - final double[] likelihoods = genotype.getLikelihoods().getAsVector(); - - if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL) - continue; // regular likelihoods - - final double qual = Genotype.NO_LOG10_PERROR; - calls.replace(new Genotype(g.getSampleName(), NO_CALL_ALLELES, qual, null, g.getAttributes(), false)); - } - return calls; } - - private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) { - int j = logYMatrix.length - 1; - System.out.printf("-----------------------------------%n"); - for (int k=0; k <= numChr; k++) { - double posterior = logYMatrix[j][k] + log10AlleleFrequencyPriors[k]; - System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior); - } - } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index a91b6c15d..c04b0085c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -28,7 +28,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("f6ef10dee80f9ccd7d245a28787ca887")); + Arrays.asList("a2d3839c4ebb390b0012d495e4e53b3a")); executeTest("test MultiSample Pilot1", spec); } @@ -44,7 +44,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testWithAllelesPassedIn2() { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1, - Arrays.asList("d0593483e85a7d815f4c5ee6db284d2a")); + Arrays.asList("43e7a17d95b1a0cf72e669657794d802")); executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2); } @@ -52,7 +52,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("3ccce5d909f8f128e496f6841836e5f7")); + Arrays.asList("ae29b9c9aacce8046dc780430540cd62")); executeTest("test SingleSample Pilot2", spec); } @@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - private final static String COMPRESSED_OUTPUT_MD5 = "890143b366050e78d6c6ba6b2c6b6864"; + private final static String COMPRESSED_OUTPUT_MD5 = "fda341de80b3f6fd42a83352b18b1d65"; @Test public void testCompressedOutput() { @@ -83,7 +83,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // Note that we need to turn off any randomization for this to work, so no downsampling and no annotations - String md5 = "95614280c565ad90f8c000376fef822c"; + String md5 = "32a34362dff51d8b73a3335048516d82"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1, @@ -164,8 +164,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testHeterozyosity() { HashMap e = new HashMap(); - e.put( 0.01, "46243ecc2b9dc716f48ea280c9bb7e72" ); - e.put( 1.0 / 1850, "6b2a59dbc76984db6d4d6d6b5ee5d62c" ); + e.put( 0.01, "2cb2544739e01f6c08fd820112914317" ); + e.put( 1.0 / 1850, "730b2b83a4b1f6d46fc3b5cd7d90756c" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -275,7 +275,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "indelAllelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1, - Arrays.asList("6e182a58472ea17c8b0eb01f80562fbd")); + Arrays.asList("45633d905136c86e9d3f90ce613255e5")); executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec2); } @@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("1d4a6a1b840ca6a130516ab9f2d99869")); + Arrays.asList("75e49dff01763aff2984dc86a72eb229")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("6ee2f3c6b5422f0a2ad0669639e293cb")); + Arrays.asList("8209a308d95659c6da7dab8733c736f9")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } From 5a0617080483b4fb4076bdc915962a06ace255ea Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Fri, 9 Dec 2011 14:51:34 +0100 Subject: [PATCH 26/44] Corrected bug causing getChildrenWithParents() to not take the last family member into consideration. --- .../src/org/broadinstitute/sting/gatk/samples/SampleDB.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java index 1ed8dd7a3..a6f6b3481 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java +++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java @@ -200,7 +200,9 @@ public class SampleDB { continue; sampleIterator = familyMembers.iterator(); - for(Sample sample = sampleIterator.next(); sampleIterator.hasNext(); sample = sampleIterator.next()){ + Sample sample; + while(sampleIterator.hasNext()){ + sample = sampleIterator.next(); if(sample.getParents().size() == 2 && familyMembers.containsAll(sample.getParents())) childrenWithParents.add(sample); } From 72fbfba97d3875253f3dfbd8221c7f2e442fc35b Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Fri, 9 Dec 2011 15:57:07 +0100 Subject: [PATCH 27/44] Added UnitTests for getFamilies() and getChildrenWithParents() --- .../sting/gatk/samples/SampleDBUnitTest.java | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java index d498ee61a..7f21da4f4 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java @@ -27,11 +27,42 @@ public class SampleDBUnitTest extends BaseTest { new Sample("dad", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED), new Sample("mom", "fam1", null, null, Gender.FEMALE, Affection.AFFECTED))); + private static final Set testPEDFamilyF2 = new HashSet(Arrays.asList( + new Sample("s2", "fam2", "d2", "m2", Gender.FEMALE, Affection.AFFECTED), + new Sample("d2", "fam2", null, null, Gender.MALE, Affection.UNKNOWN), + new Sample("m2", "fam2", null, null, Gender.FEMALE, Affection.UNKNOWN) + )); + + private static final Set testPEDFamilyF3 = new HashSet(Arrays.asList( + new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED), + new Sample("d1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN), + new Sample("m1", "fam3", null, null, Gender.FEMALE, Affection.UNKNOWN) + )); + private static final Set testSAMSamples = new HashSet(Arrays.asList( new Sample("kid", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN), new Sample("mom", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN), new Sample("dad", null, null, null, Gender.UNKNOWN, Affection.UNKNOWN))); + private static final HashMap> testGetFamilies = new HashMap>(); + static { + testGetFamilies.put("fam1", testPEDSamples); + testGetFamilies.put("fam2", testPEDFamilyF2); + testGetFamilies.put("fam3", testPEDFamilyF3); + } + + private static final Set testKidsWithParentsFamilies2 = new HashSet(Arrays.asList( + new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED), + new Sample("kid3", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED), + new Sample("kid2", "fam5", "dad2", "mom2", Gender.MALE, Affection.AFFECTED))); + + private static final HashSet testGetPartialFamiliesIds = new HashSet(Arrays.asList("kid","s1")); + private static final HashMap> testGetPartialFamilies = new HashMap>(); + static { + testGetPartialFamilies.put("fam1", new HashSet(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED)))); + testGetPartialFamilies.put("fam3", new HashSet(Arrays.asList(new Sample("s1", "fam3", "d1", "m1", Gender.FEMALE, Affection.AFFECTED)))); + } + private static final String testPEDString = String.format("%s%n%s%n%s", "fam1 kid dad mom 1 2", @@ -46,6 +77,18 @@ public class SampleDBUnitTest extends BaseTest { "fam3 s1 d1 m1 2 2", "fam2 s2 d2 m2 2 2"); + private static final String testPEDMultipleFamilies2 = + String.format("%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s%n%s", + "fam1 kid dad mom 1 2", + "fam1 dad 0 0 1 1", + "fam1 mom 0 0 2 2", + "fam4 kid4 dad4 0 1 2", + "fam4 dad4 0 0 1 1", + "fam5 kid2 dad2 mom2 1 2", + "fam5 kid3 dad2 mom2 1 2", + "fam5 dad2 0 0 1 1", + "fam5 mom2 0 0 2 2"); + private static final String testPEDStringInconsistentGender = "fam1 kid 0 0 2 2"; @@ -138,6 +181,25 @@ public class SampleDBUnitTest extends BaseTest { Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet); } + @Test() + public void getFamilies(){ + builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); + SampleDB db = builder.getFinalSampleDB(); + Assert.assertEquals(db.getFamilies(),testGetFamilies); + Assert.assertEquals(db.getFamilies(null),testGetFamilies); + Assert.assertEquals(db.getFamilies(testGetPartialFamiliesIds),testGetPartialFamilies); + } + + @Test() + public void testGetChildrenWithParents() + { + builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies2)); + SampleDB db = builder.getFinalSampleDB(); + Assert.assertEquals(db.getChildrenWithParents(), testKidsWithParentsFamilies2); + Assert.assertEquals(db.getChildrenWithParents(false), testKidsWithParentsFamilies2); + Assert.assertEquals(db.getChildrenWithParents(true), new HashSet(Arrays.asList(new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.AFFECTED)))); + } + @Test() public void loadFamilyIDs() { builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies)); From 442ceb6ad9b381108a3420a4158118e38339b474 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Dec 2011 10:16:44 -0500 Subject: [PATCH 28/44] The Exact model now computes both the likelihoods and posteriors (in separate arrays); likelihoods are used for assigning genotypes, not the posteriors. --- .../AlleleFrequencyCalculationModel.java | 16 +++++----- .../genotyper/ExactAFCalculationModel.java | 31 ++++++++++++------- .../genotyper/UnifiedGenotyperEngine.java | 22 ++++++++----- .../ExactAFCalculationModelUnitTest.java | 4 ++- 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 2808e6968..01e696237 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -62,24 +62,26 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { /** * Must be overridden by concrete subclasses - * @param GLs genotype likelihoods - * @param Alleles Alleles corresponding to GLs - * @param log10AlleleFrequencyPriors priors - * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results + * @param GLs genotype likelihoods + * @param Alleles Alleles corresponding to GLs + * @param log10AlleleFrequencyPriors priors + * @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results + * @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results */ protected abstract void getLog10PNonRef(GenotypesContext GLs, List Alleles, double[][] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyLikelihoods, double[][] log10AlleleFrequencyPosteriors); /** * Can be overridden by concrete subclasses * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyPosteriors allele frequency results + * @param log10AlleleFrequencyLikelihoods allele frequency results * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ protected abstract GenotypesContext assignGenotypes(VariantContext vc, - double[][] log10AlleleFrequencyPosteriors, - int AFofMaxLikelihood); + double[][] log10AlleleFrequencyLikelihoods, + int AFofMaxLikelihood); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 22017a1ee..f4af579e3 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -52,15 +52,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { USE_MULTI_ALLELIC_CALCULATION = UAC.MULTI_ALLELIC; } - public void getLog10PNonRef(GenotypesContext GLs, List alleles, - double[][] log10AlleleFrequencyPriors, - double[][] log10AlleleFrequencyPosteriors) { + public void getLog10PNonRef(final GenotypesContext GLs, + final List alleles, + final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, + final double[][] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); if ( USE_MULTI_ALLELIC_CALCULATION ) - linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false); + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); else - linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyPosteriors); + linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -125,6 +127,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public int linearExact(GenotypesContext GLs, double[] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyLikelihoods, double[][] log10AlleleFrequencyPosteriors) { final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; @@ -176,6 +179,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // update the posteriors vector final double log10LofK = kMinus0[numSamples]; + log10AlleleFrequencyLikelihoods[0][k] = log10LofK; log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k]; // can we abort early? @@ -320,6 +324,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { public static void linearExactMultiAllelic(final GenotypesContext GLs, final int numAlternateAlleles, final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, final double[][] log10AlleleFrequencyPosteriors, final boolean preserveData) { @@ -344,7 +349,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { while ( !ACqueue.isEmpty() ) { // compute log10Likelihoods final ExactACset set = ACqueue.remove(); - final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); + final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); // adjust max likelihood seen if needed maxLog10L = Math.max(maxLog10L, log10LofKs); @@ -359,13 +364,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final Queue ACqueue, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, final double[][] log10AlleleFrequencyPosteriors) { if ( DEBUG ) System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts); // compute the log10Likelihoods - computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors); + computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); // clean up memory if ( !preserveData ) { @@ -471,6 +477,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final ArrayList genotypeLikelihoods, final HashMap indexesToACset, final double[][] log10AlleleFrequencyPriors, + final double[][] log10AlleleFrequencyLikelihoods, final double[][] log10AlleleFrequencyPosteriors) { set.log10Likelihoods[0] = 0.0; // the zero case @@ -517,7 +524,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } } - // update the posteriors vector final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1]; // determine the power of theta to use @@ -527,11 +533,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { nonRefAlleles++; } - // update the posteriors vector which is a collapsed view of each of the various ACs + // update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) { + int AC = set.ACcounts.getCounts()[i]; + log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK); + // for k=0 we still want to use theta - final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][set.ACcounts.getCounts()[i]]; - log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]], log10LofK + prior); + final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC]; + log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 6e61790ed..606a0544c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -77,6 +77,7 @@ public class UnifiedGenotyperEngine { private final double[][] log10AlleleFrequencyPriorsIndels; // the allele frequency likelihoods (allocated once as an optimization) + private ThreadLocal log10AlleleFrequencyLikelihoods = new ThreadLocal(); private ThreadLocal log10AlleleFrequencyPosteriors = new ThreadLocal(); // the priors object @@ -294,6 +295,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { + log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -311,8 +313,9 @@ public class UnifiedGenotyperEngine { generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); @@ -350,7 +353,7 @@ public class UnifiedGenotyperEngine { } // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); // print out stats if we have a writer if ( verboseWriter != null ) @@ -369,16 +372,18 @@ public class UnifiedGenotyperEngine { // the overall lod VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0]; double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); //if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF); // the forward lod VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); @@ -386,8 +391,9 @@ public class UnifiedGenotyperEngine { // the reverse lod VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model); + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); //normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true); double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0]; double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1); @@ -445,6 +451,7 @@ public class UnifiedGenotyperEngine { // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { + log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); } @@ -460,8 +467,9 @@ public class UnifiedGenotyperEngine { return null; // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) + clearAFarray(log10AlleleFrequencyLikelihoods.get()); clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get()); + afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); // find the most likely frequency int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); @@ -499,7 +507,7 @@ public class UnifiedGenotyperEngine { } // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess); + GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); // *** note that calculating strand bias involves overwriting data structures, so we do that last HashMap attributes = new HashMap(); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index 00cfff4b3..9640a8963 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -83,14 +83,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { @Test(dataProvider = "getGLs") public void testGLs(GetGLsTest cfg) { + final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1]; final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1]; for ( int i = 0; i < 2; i++ ) { for ( int j = 0; j < 2*numSamples+1; j++ ) { + log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; } } - ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyPosteriors, false); + ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); int nameIndex = 1; for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) { From 64dad13e2da3294d7f8132fce40bd9da24529375 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Dec 2011 11:09:40 -0500 Subject: [PATCH 29/44] Don't carry around an extra copy of the code for the Haplotype Caller --- .../genotyper/ExactAFCalculationModel.java | 3 +- .../genotyper/UnifiedGenotyperEngine.java | 135 ++++-------------- 2 files changed, 31 insertions(+), 107 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index f4af579e3..dccc2c02c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -588,12 +588,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { /** * Can be overridden by concrete subclasses * @param vc variant context with genotype likelihoods + * @param log10AlleleFrequencyLikelihoods likelihoods * @param AFofMaxLikelihood allele frequency of max likelihood * * @return calls */ public GenotypesContext assignGenotypes(VariantContext vc, - double[][] log10AlleleFrequencyPosteriors, + double[][] log10AlleleFrequencyLikelihoods, int AFofMaxLikelihood) { if ( !vc.isVariant() ) throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 606a0544c..4821b3eb8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -97,6 +97,7 @@ public class UnifiedGenotyperEngine { // the standard filter to use for calls below the confidence threshold but above the emit threshold private static final Set filter = new HashSet(1); + private final GenomeLocParser genomeLocParser; private final boolean BAQEnabledOnCMDLine; @@ -114,6 +115,7 @@ public class UnifiedGenotyperEngine { @Requires({"toolkit != null", "UAC != null", "logger != null", "samples != null && samples.size() > 0"}) public UnifiedGenotyperEngine(GenomeAnalysisEngine toolkit, UnifiedArgumentCollection UAC, Logger logger, PrintStream verboseWriter, VariantAnnotatorEngine engine, Set samples) { this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF; + genomeLocParser = toolkit.getGenomeLocParser(); this.samples = new TreeSet(samples); // note that, because we cap the base quality by the mapping quality, minMQ cannot be less than minBQ this.UAC = UAC.clone(); @@ -290,8 +292,13 @@ public class UnifiedGenotyperEngine { return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make(); } - // private method called by both UnifiedGenotyper and UGCallVariants entry points into the engine - private VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + return calculateGenotypes(null, null, null, null, vc, model); + } + + public VariantCallContext calculateGenotypes(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext, Map stratifiedContexts, VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { + + boolean limitedContext = tracker == null || refContext == null || rawContext == null || stratifiedContexts == null; // initialize the data for this thread if that hasn't been done yet if ( afcm.get() == null ) { @@ -307,10 +314,13 @@ public class UnifiedGenotyperEngine { } // estimate our confidence in a reference call and return - if ( vc.getNSamples() == 0 ) + if ( vc.getNSamples() == 0 ) { + if ( limitedContext ) + return null; return (UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES ? estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), false, 1.0) : generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext)); + } // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) clearAFarray(log10AlleleFrequencyLikelihoods.get()); @@ -349,25 +359,31 @@ public class UnifiedGenotyperEngine { if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { // technically, at this point our confidence in a reference call isn't accurately estimated // because it didn't take into account samples with no data, so let's get a better estimate - return estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); + return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); + } + + // strip out the alternate allele(s) if we're making a ref call + Set myAlleles = new HashSet(vc.getAlleles()); + if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { + myAlleles = new HashSet(1); + myAlleles.add(vc.getReference()); } // create the genotypes GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); // print out stats if we have a writer - if ( verboseWriter != null ) + if ( verboseWriter != null && !limitedContext ) printVerboseData(refContext.getLocus().toString(), vc, PofF, phredScaledConfidence, normalizedPosteriors, model); // *** note that calculating strand bias involves overwriting data structures, so we do that last HashMap attributes = new HashMap(); // if the site was downsampled, record that fact - if ( rawContext.hasPileupBeenDownsampled() ) + if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); - - if ( UAC.COMPUTE_SLOD && bestAFguess != 0 ) { + if ( UAC.COMPUTE_SLOD && !limitedContext && bestAFguess != 0 ) { //final boolean DEBUG_SLOD = false; // the overall lod @@ -412,26 +428,18 @@ public class UnifiedGenotyperEngine { attributes.put("SB", strandScore); } - GenomeLoc loc = refContext.getLocus(); + GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); - int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); - - Set myAlleles = new HashSet(vc.getAlleles()); - // strip out the alternate allele if it's a ref call - if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { - myAlleles = new HashSet(1); - myAlleles.add(vc.getReference()); - } - - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles); + VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); builder.genotypes(genotypes); builder.log10PError(phredScaledConfidence/-10.0); if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); builder.attributes(attributes); - builder.referenceBaseForIndel(refContext.getBase()); + if ( !limitedContext ) + builder.referenceBaseForIndel(refContext.getBase()); VariantContext vcCall = builder.make(); - if ( annotationEngine != null ) { + if ( annotationEngine != null && !limitedContext ) { // Note: we want to use the *unfiltered* and *unBAQed* context for the annotations ReadBackedPileup pileup = null; if (rawContext.hasExtendedEventPileup()) @@ -446,91 +454,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } - // A barebones entry point to the exact model when there is no tracker or stratified contexts available -- only GLs - public VariantCallContext calculateGenotypes(final VariantContext vc, final GenomeLoc loc, final GenotypeLikelihoodsCalculationModel.Model model) { - - // initialize the data for this thread if that hasn't been done yet - if ( afcm.get() == null ) { - log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); - log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]); - afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC)); - } - - // don't try to genotype too many alternate alleles - if ( vc.getAlternateAlleles().size() > UAC.MAX_ALTERNATE_ALLELES ) { - logger.warn("the Unified Genotyper is currently set to genotype at most " + UAC.MAX_ALTERNATE_ALLELES + " alternate alleles in a given context, but the context at " + vc.getChr() + ":" + vc.getStart() + " has " + vc.getAlternateAlleles().size() + " alternate alleles; see the --max_alternate_alleles argument"); - return null; - } - - // estimate our confidence in a reference call and return - if ( vc.getNSamples() == 0 ) - return null; - - // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position) - clearAFarray(log10AlleleFrequencyLikelihoods.get()); - clearAFarray(log10AlleleFrequencyPosteriors.get()); - afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); - - // find the most likely frequency - int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); - - // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[0]); - double sum = 0.0; - for (int i = 1; i <= N; i++) - sum += normalizedPosteriors[i]; - double PofF = Math.min(sum, 1.0); // deal with precision errors - - double phredScaledConfidence; - if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); - if ( Double.isInfinite(phredScaledConfidence) ) - phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; - } else { - phredScaledConfidence = QualityUtils.phredScaleErrorRate(PofF); - if ( Double.isInfinite(phredScaledConfidence) ) { - sum = 0.0; - for (int i = 1; i <= N; i++) { - if ( log10AlleleFrequencyPosteriors.get()[0][i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED ) - break; - sum += log10AlleleFrequencyPosteriors.get()[0][i]; - } - phredScaledConfidence = (MathUtils.compareDoubles(sum, 0.0) == 0 ? 0 : -10.0 * sum); - } - } - - // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero - if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { - // technically, at this point our confidence in a reference call isn't accurately estimated - // because it didn't take into account samples with no data, so let's get a better estimate - return null; - } - - // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); - - // *** note that calculating strand bias involves overwriting data structures, so we do that last - HashMap attributes = new HashMap(); - - int endLoc = calculateEndPos(vc.getAlleles(), vc.getReference(), loc); - - Set myAlleles = new HashSet(vc.getAlleles()); - // strip out the alternate allele if it's a ref call - if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { - myAlleles = new HashSet(1); - myAlleles.add(vc.getReference()); - } - - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, myAlleles); - builder.genotypes(genotypes); - builder.log10PError(phredScaledConfidence/-10.0); - if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); - builder.attributes(attributes); - builder.referenceBaseForIndel(vc.getReferenceBaseForIndel()); - - return new VariantCallContext(builder.make(), confidentlyCalled(phredScaledConfidence, PofF)); - } - private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length From 364f1a030b5e55538e53c293e3015dbbb4d8b081 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 9 Dec 2011 14:25:28 -0500 Subject: [PATCH 30/44] Plumbing added so that the UG engine can handle multiple alleles and they can successfully be genotyped. Alleles that aren't likely are not allowed to be used when assigning genotypes, but otherwise the greedy PL-based approach is what is used. Moved assign genotypes code to UG engine since it has nothing to do with the Exact model. Still have some TODOs in here before I can push this out to everyone. --- .../AlleleFrequencyCalculationModel.java | 12 -- .../genotyper/ExactAFCalculationModel.java | 90 +--------- .../genotyper/UnifiedGenotyperEngine.java | 164 +++++++++++++++--- 3 files changed, 145 insertions(+), 121 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index 01e696237..7d3e7047d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -72,16 +72,4 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { double[][] log10AlleleFrequencyPriors, double[][] log10AlleleFrequencyLikelihoods, double[][] log10AlleleFrequencyPosteriors); - - /** - * Can be overridden by concrete subclasses - * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyLikelihoods allele frequency results - * @param AFofMaxLikelihood allele frequency of max likelihood - * - * @return calls - */ - protected abstract GenotypesContext assignGenotypes(VariantContext vc, - double[][] log10AlleleFrequencyLikelihoods, - int AFofMaxLikelihood); } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index dccc2c02c..8fbc9f178 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -27,9 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; import java.io.PrintStream; @@ -40,9 +38,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private final static boolean DEBUG = false; private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. - - private static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); private final boolean USE_MULTI_ALLELIC_CALCULATION; @@ -73,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { if ( sample.hasLikelihoods() ) { double[] gls = sample.getLikelihoods().getAsVector(); - if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL) + if ( MathUtils.sum(gls) < UnifiedGenotyperEngine.SUM_GL_THRESH_NOCALL ) genotypeLikelihoods.add(gls); } } @@ -584,87 +579,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return coeff; } - - /** - * Can be overridden by concrete subclasses - * @param vc variant context with genotype likelihoods - * @param log10AlleleFrequencyLikelihoods likelihoods - * @param AFofMaxLikelihood allele frequency of max likelihood - * - * @return calls - */ - public GenotypesContext assignGenotypes(VariantContext vc, - double[][] log10AlleleFrequencyLikelihoods, - int AFofMaxLikelihood) { - if ( !vc.isVariant() ) - throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart()); - - GenotypesContext GLs = vc.getGenotypes(); - double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1]; - - ArrayList sampleIndices = new ArrayList(); - - // todo - optimize initialization - for (int k=0; k <= AFofMaxLikelihood; k++) - for (int j=0; j <= GLs.size(); j++) - pathMetricArray[j][k] = -1e30; - - pathMetricArray[0][0] = 0.0; - - sampleIndices.addAll(GLs.getSampleNamesOrderedByName()); - - GenotypesContext calls = GenotypesContext.create(); - - for (int k = GLs.size(); k > 0; k--) { - int bestGTguess; - String sample = sampleIndices.get(k-1); - Genotype g = GLs.get(sample); - if ( !g.hasLikelihoods() ) - continue; - - ArrayList myAlleles = new ArrayList(); - - double[] likelihoods = g.getLikelihoods().getAsVector(); - - // if there is no mass on the likelihoods, then just no-call the sample - if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) { - calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false)); - continue; - } - - bestGTguess = Utils.findIndexOfMaxEntry(likelihoods); - - // likelihoods are stored row-wise in lower triangular matrix. IE - // for 2 alleles they have ordering AA,AB,BB - // for 3 alleles they are ordered AA,AB,BB,AC,BC,CC - // Get now alleles corresponding to best index - int kk=0; - boolean done = false; - for (int j=0; j < vc.getNAlleles(); j++) { - for (int i=0; i <= j; i++){ - if (kk++ == bestGTguess) { - if (i==0) - myAlleles.add(vc.getReference()); - else - myAlleles.add(vc.getAlternateAllele(i-1)); - - if (j==0) - myAlleles.add(vc.getReference()); - else - myAlleles.add(vc.getAlternateAllele(j-1)); - done = true; - break; - } - - } - if (done) - break; - } - - final double qual = GenotypeLikelihoods.getQualFromLikelihoods(bestGTguess, likelihoods); - calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); - } - - return calls; - } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 4821b3eb8..918f83514 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -59,6 +59,9 @@ public class UnifiedGenotyperEngine { EMIT_ALL_SITES } + protected static final List NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); + protected static final double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call. + // the unified argument collection private final UnifiedArgumentCollection UAC; public UnifiedArgumentCollection getUAC() { return UAC; } @@ -327,18 +330,21 @@ public class UnifiedGenotyperEngine { clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); - // find the most likely frequency - int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]); + // TODO -- this is not the right thing mathematically to do! In a case of B=1,C=0 the likelihoods would get added to both AC=0 and AC=1 + double[] collapsedPosteriors = collapseAFarrays(log10AlleleFrequencyPosteriors.get(), vc.getAlternateAlleles().size()); + + // is the most likely frequency conformation AC=0 for all alternate alleles? + boolean bestGuessIsRef = MathUtils.maxElementIndex(collapsedPosteriors) == 0; // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[0]); + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(collapsedPosteriors); double sum = 0.0; for (int i = 1; i <= N; i++) sum += normalizedPosteriors[i]; double PofF = Math.min(sum, 1.0); // deal with precision errors double phredScaledConfidence; - if ( bestAFguess != 0 || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + if ( !bestGuessIsRef || UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { phredScaledConfidence = QualityUtils.phredScaleErrorRate(normalizedPosteriors[0]); if ( Double.isInfinite(phredScaledConfidence) ) phredScaledConfidence = -10.0 * log10AlleleFrequencyPosteriors.get()[0][0]; @@ -356,21 +362,46 @@ public class UnifiedGenotyperEngine { } // return a null call if we don't pass the confidence cutoff or the most likely allele frequency is zero - if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestAFguess) ) { + if ( UAC.OutputMode != OUTPUT_MODE.EMIT_ALL_SITES && !passesEmitThreshold(phredScaledConfidence, bestGuessIsRef) ) { // technically, at this point our confidence in a reference call isn't accurately estimated // because it didn't take into account samples with no data, so let's get a better estimate return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); } - // strip out the alternate allele(s) if we're making a ref call - Set myAlleles = new HashSet(vc.getAlleles()); - if ( bestAFguess == 0 && UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { - myAlleles = new HashSet(1); + // strip out any alleles that aren't going to be used + Set myAlleles; + boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; + if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { + myAlleles = new HashSet(vc.getAlleles().size()); myAlleles.add(vc.getReference()); + + // if we're making a reference call then we keep just the ref allele, otherwise we need to determine which ones are okay + if ( !bestGuessIsRef ) { + for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { + if ( MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]) != 0 ) { + myAlleles.add(vc.getAlternateAllele(i)); + altAllelesToUse[i] = true; + } + } + } + } else { + // use all of the alleles if we are given them by the user + myAlleles = new HashSet(vc.getAlleles()); + for ( int i = 0; i < altAllelesToUse.length; i++ ) + altAllelesToUse[i] = true; } + // start constructing the resulting VC + GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); + VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); + builder.log10PError(phredScaledConfidence/-10.0); + if ( ! passesCallThreshold(phredScaledConfidence) ) + builder.filters(filter); + if ( !limitedContext ) + builder.referenceBaseForIndel(refContext.getBase()); + // create the genotypes - GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess); + GenotypesContext genotypes = assignGenotypes(vc, altAllelesToUse); // print out stats if we have a writer if ( verboseWriter != null && !limitedContext ) @@ -383,7 +414,7 @@ public class UnifiedGenotyperEngine { if ( !limitedContext && rawContext.hasPileupBeenDownsampled() ) attributes.put(VCFConstants.DOWNSAMPLED_KEY, true); - if ( UAC.COMPUTE_SLOD && !limitedContext && bestAFguess != 0 ) { + if ( UAC.COMPUTE_SLOD && !limitedContext && !bestGuessIsRef ) { //final boolean DEBUG_SLOD = false; // the overall lod @@ -428,15 +459,9 @@ public class UnifiedGenotyperEngine { attributes.put("SB", strandScore); } - GenomeLoc loc = genomeLocParser.createGenomeLoc(vc); - - VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), myAlleles); + // finish constructing the resulting VC builder.genotypes(genotypes); - builder.log10PError(phredScaledConfidence/-10.0); - if ( ! passesCallThreshold(phredScaledConfidence) ) builder.filters(filter); builder.attributes(attributes); - if ( !limitedContext ) - builder.referenceBaseForIndel(refContext.getBase()); VariantContext vcCall = builder.make(); if ( annotationEngine != null && !limitedContext ) { @@ -454,6 +479,21 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } + private static double[] collapseAFarrays(double[][] original, int numDimensions) { + int size = original[0].length; + double[] newArray = new double[size]; + for ( int i = 0; i < size; i++) + newArray[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; + + for ( int i = 0; i < numDimensions; i++ ) { + for ( int j = 0; j < size; j++ ) { + newArray[j] = ExactAFCalculationModel.approximateLog10SumLog10(newArray[j], original[i][j]); + } + } + + return newArray; + } + private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length @@ -634,8 +674,8 @@ public class UnifiedGenotyperEngine { verboseWriter.println(); } - protected boolean passesEmitThreshold(double conf, int bestAFguess) { - return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || bestAFguess != 0) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); + protected boolean passesEmitThreshold(double conf, boolean bestGuessIsRef) { + return (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_CONFIDENT_SITES || !bestGuessIsRef) && conf >= Math.min(UAC.STANDARD_CONFIDENCE_FOR_CALLING, UAC.STANDARD_CONFIDENCE_FOR_EMITTING); } protected boolean passesCallThreshold(double conf) { @@ -780,4 +820,88 @@ public class UnifiedGenotyperEngine { return vc; } + + /** + * @param vc variant context with genotype likelihoods + * @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use + * + * @return genotypes + */ + public GenotypesContext assignGenotypes(VariantContext vc, + boolean[] allelesToUse) { + + final GenotypesContext GLs = vc.getGenotypes(); + + final List sampleIndices = GLs.getSampleNamesOrderedByName(); + + final GenotypesContext calls = GenotypesContext.create(); + + for ( int k = GLs.size() - 1; k >= 0; k-- ) { + final String sample = sampleIndices.get(k); + final Genotype g = GLs.get(sample); + if ( !g.hasLikelihoods() ) + continue; + + final double[] likelihoods = g.getLikelihoods().getAsVector(); + + // if there is no mass on the likelihoods, then just no-call the sample + if ( MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL ) { + calls.add(new Genotype(g.getSampleName(), NO_CALL_ALLELES, Genotype.NO_LOG10_PERROR, null, null, false)); + continue; + } + + // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. + // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. + + final int numAltAlleles = allelesToUse.length; + + // start with the assumption that the ideal genotype is homozygous reference + Allele maxAllele1 = vc.getReference(), maxAllele2 = vc.getReference(); + double maxLikelihoodSeen = likelihoods[0]; + int indexOfMax = 0; + + // keep track of some state + Allele firstAllele = vc.getReference(); + int subtractor = numAltAlleles + 1; + int subtractionsMade = 0; + + for ( int i = 1, PLindex = 1; i < likelihoods.length; i++, PLindex++ ) { + if ( PLindex == subtractor ) { + firstAllele = vc.getAlternateAllele(subtractionsMade); + PLindex -= subtractor; + subtractor--; + subtractionsMade++; + + // we can skip this allele if it's not usable + if ( !allelesToUse[subtractionsMade-1] ) { + i += subtractor - 1; + PLindex += subtractor - 1; + continue; + } + } + + // we don't care about the entry if we've already seen better + if ( likelihoods[i] <= maxLikelihoodSeen ) + continue; + + // if it's usable then update the alleles + int alleleIndex = subtractionsMade + PLindex - 1; + if ( allelesToUse[alleleIndex] ) { + maxAllele1 = firstAllele; + maxAllele2 = vc.getAlternateAllele(alleleIndex); + maxLikelihoodSeen = likelihoods[i]; + indexOfMax = i; + } + } + + ArrayList myAlleles = new ArrayList(); + myAlleles.add(maxAllele1); + myAlleles.add(maxAllele2); + + final double qual = GenotypeLikelihoods.getQualFromLikelihoods(indexOfMax, likelihoods); + calls.add(new Genotype(sample, myAlleles, qual, null, g.getAttributes(), false)); + } + + return calls; + } } From 044f211a30b427ab62f175095d6e604321b94223 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sat, 10 Dec 2011 23:57:14 -0500 Subject: [PATCH 31/44] Don't collapse likelihoods over all alt alleles - that's just not right. For now, the QUAL is calculated for just the most likely of the alt alleles; I need to think about the right way to handle this properly. --- .../genotyper/UnifiedGenotyperEngine.java | 72 +++++++++---------- .../UnifiedGenotyperIntegrationTest.java | 4 +- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 918f83514..3a86743de 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -330,14 +330,38 @@ public class UnifiedGenotyperEngine { clearAFarray(log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get()); - // TODO -- this is not the right thing mathematically to do! In a case of B=1,C=0 the likelihoods would get added to both AC=0 and AC=1 - double[] collapsedPosteriors = collapseAFarrays(log10AlleleFrequencyPosteriors.get(), vc.getAlternateAlleles().size()); - // is the most likely frequency conformation AC=0 for all alternate alleles? - boolean bestGuessIsRef = MathUtils.maxElementIndex(collapsedPosteriors) == 0; + boolean bestGuessIsRef = true; + + // which alternate allele has the highest MLE AC? + int indexOfHighestAlt = -1; + int alleleCountOfHighestAlt = -1; + + // determine which alternate alleles have AF>0 + boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; + for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { + int indexOfBestAC = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]); + + // if the most likely AC is not 0, then this is a good alternate allele to use + if ( indexOfBestAC != 0 ) { + altAllelesToUse[i] = true; + bestGuessIsRef = false; + } + // if in GENOTYPE_GIVEN_ALLELES mode, we still want to allow the use of a poor allele + else if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { + altAllelesToUse[i] = true; + } + + // keep track of the "best" alternate allele to use + if ( indexOfBestAC > alleleCountOfHighestAlt) { + alleleCountOfHighestAlt = indexOfBestAC; + indexOfHighestAlt = i; + } + } // calculate p(f>0) - double[] normalizedPosteriors = MathUtils.normalizeFromLog10(collapsedPosteriors); + // TODO -- right now we just calculate it for the alt allele with highest AF, but the likelihoods need to be combined correctly over all AFs + double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get()[indexOfHighestAlt]); double sum = 0.0; for (int i = 1; i <= N; i++) sum += normalizedPosteriors[i]; @@ -368,27 +392,18 @@ public class UnifiedGenotyperEngine { return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getGenotypePriors(model).getHeterozygosity(), true, 1.0 - PofF); } - // strip out any alleles that aren't going to be used - Set myAlleles; - boolean[] altAllelesToUse = new boolean[vc.getAlternateAlleles().size()]; + // strip out any alleles that aren't going to be used in the VariantContext + List myAlleles; if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY ) { - myAlleles = new HashSet(vc.getAlleles().size()); + myAlleles = new ArrayList(vc.getAlleles().size()); myAlleles.add(vc.getReference()); - - // if we're making a reference call then we keep just the ref allele, otherwise we need to determine which ones are okay - if ( !bestGuessIsRef ) { - for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { - if ( MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[i]) != 0 ) { - myAlleles.add(vc.getAlternateAllele(i)); - altAllelesToUse[i] = true; - } - } + for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) { + if ( altAllelesToUse[i] ) + myAlleles.add(vc.getAlternateAllele(i)); } } else { // use all of the alleles if we are given them by the user - myAlleles = new HashSet(vc.getAlleles()); - for ( int i = 0; i < altAllelesToUse.length; i++ ) - altAllelesToUse[i] = true; + myAlleles = vc.getAlleles(); } // start constructing the resulting VC @@ -479,21 +494,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } - private static double[] collapseAFarrays(double[][] original, int numDimensions) { - int size = original[0].length; - double[] newArray = new double[size]; - for ( int i = 0; i < size; i++) - newArray[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED; - - for ( int i = 0; i < numDimensions; i++ ) { - for ( int j = 0; j < size; j++ ) { - newArray[j] = ExactAFCalculationModel.approximateLog10SumLog10(newArray[j], original[i][j]); - } - } - - return newArray; - } - private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { // TODO - temp fix until we can deal with extended events properly // for indels, stop location is one more than ref allele length diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index c04b0085c..605d6051c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -285,7 +285,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2.20101123.indels.sites.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,080,000", 1, - Arrays.asList("75e49dff01763aff2984dc86a72eb229")); + Arrays.asList("98a4d1e1e0a363ba37518563ac6cbead")); executeTest("test MultiSample Pilot2 indels with complicated records", spec3); } @@ -294,7 +294,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec( baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation + "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1, - Arrays.asList("8209a308d95659c6da7dab8733c736f9")); + Arrays.asList("915e7a3e7cbfd995dbc41fdd382d0d51")); executeTest("test MultiSample Phase1 indels with complicated records", spec4); } From 7c4b9338ad9cdcab164081b497d8aa86e2a6242a Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Sun, 11 Dec 2011 00:23:33 -0500 Subject: [PATCH 32/44] The old bi-allelic implementation of the Exact model has been completely deprecated - you can only use the multi-allelic implementation now. --- .../genotyper/ExactAFCalculationModel.java | 249 +++++++++--------- .../UnifiedGenotyperIntegrationTest.java | 6 +- 2 files changed, 127 insertions(+), 128 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 8fbc9f178..77a940dcf 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -39,12 +39,9 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6 - private final boolean USE_MULTI_ALLELIC_CALCULATION; - protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) { super(UAC, N, logger, verboseWriter); - USE_MULTI_ALLELIC_CALCULATION = UAC.MULTI_ALLELIC; } public void getLog10PNonRef(final GenotypesContext GLs, @@ -54,10 +51,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double[][] log10AlleleFrequencyPosteriors) { final int numAlleles = alleles.size(); - if ( USE_MULTI_ALLELIC_CALCULATION ) - linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); - else - linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + //linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors); + linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false); } private static final ArrayList getGLs(GenotypesContext GLs) { @@ -77,120 +72,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } - // ------------------------------------------------------------------------------------- - // - // Linearized, ~O(N), implementation. - // - // ------------------------------------------------------------------------------------- - - /** - * A simple data structure that holds the current, prev, and prev->prev likelihoods vectors - * for the exact model calculation - */ - private final static class ExactACCache { - double[] kMinus2, kMinus1, kMinus0; - - private final static double[] create(int n) { - return new double[n]; - } - - public ExactACCache(int n) { - kMinus2 = create(n); - kMinus1 = create(n); - kMinus0 = create(n); - } - - final public void rotate() { - double[] tmp = kMinus2; - kMinus2 = kMinus1; - kMinus1 = kMinus0; - kMinus0 = tmp; - } - - final public double[] getkMinus2() { - return kMinus2; - } - - final public double[] getkMinus1() { - return kMinus1; - } - - final public double[] getkMinus0() { - return kMinus0; - } - } - - public int linearExact(GenotypesContext GLs, - double[] log10AlleleFrequencyPriors, - double[][] log10AlleleFrequencyLikelihoods, - double[][] log10AlleleFrequencyPosteriors) { - final ArrayList genotypeLikelihoods = getGLs(GLs); - final int numSamples = genotypeLikelihoods.size()-1; - final int numChr = 2*numSamples; - - final ExactACCache logY = new ExactACCache(numSamples+1); - logY.getkMinus0()[0] = 0.0; // the zero case - - double maxLog10L = Double.NEGATIVE_INFINITY; - boolean done = false; - int lastK = -1; - - for (int k=0; k <= numChr && ! done; k++ ) { - final double[] kMinus0 = logY.getkMinus0(); - - if ( k == 0 ) { // special case for k = 0 - for ( int j=1; j <= numSamples; j++ ) { - kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0]; - } - } else { // k > 0 - final double[] kMinus1 = logY.getkMinus1(); - final double[] kMinus2 = logY.getkMinus2(); - - for ( int j=1; j <= numSamples; j++ ) { - final double[] gl = genotypeLikelihoods.get(j); - final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; - - double aa = Double.NEGATIVE_INFINITY; - double ab = Double.NEGATIVE_INFINITY; - if (k < 2*j-1) - aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0]; - - if (k < 2*j) - ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1]; - - double log10Max; - if (k > 1) { - final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2]; - log10Max = approximateLog10SumLog10(aa, ab, bb); - } else { - // we know we aren't considering the BB case, so we can use an optimized log10 function - log10Max = approximateLog10SumLog10(aa, ab); - } - - // finally, update the L(j,k) value - kMinus0[j] = log10Max - logDenominator; - } - } - - // update the posteriors vector - final double log10LofK = kMinus0[numSamples]; - log10AlleleFrequencyLikelihoods[0][k] = log10LofK; - log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k]; - - // can we abort early? - lastK = k; - maxLog10L = Math.max(maxLog10L, log10LofK); - if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { - if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); - done = true; - } - - logY.rotate(); - } - - return lastK; - } - final static double approximateLog10SumLog10(double[] vals) { if ( vals.length < 2 ) throw new ReviewedStingException("Passing array with fewer than 2 values when computing approximateLog10SumLog10"); @@ -201,10 +82,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return approx; } - final static double approximateLog10SumLog10(double a, double b, double c) { - return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c); - } - final static double approximateLog10SumLog10(double small, double big) { // make sure small is really the smaller value if ( small > big ) { @@ -579,4 +456,126 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return coeff; } + + + // ------------------------------------------------------------------------------------- + // + // Deprecated bi-allelic ~O(N) implementation. Kept here for posterity. + // + // ------------------------------------------------------------------------------------- + + /** + * A simple data structure that holds the current, prev, and prev->prev likelihoods vectors + * for the exact model calculation + */ +/* + private final static class ExactACCache { + double[] kMinus2, kMinus1, kMinus0; + + private final static double[] create(int n) { + return new double[n]; + } + + public ExactACCache(int n) { + kMinus2 = create(n); + kMinus1 = create(n); + kMinus0 = create(n); + } + + final public void rotate() { + double[] tmp = kMinus2; + kMinus2 = kMinus1; + kMinus1 = kMinus0; + kMinus0 = tmp; + } + + final public double[] getkMinus2() { + return kMinus2; + } + + final public double[] getkMinus1() { + return kMinus1; + } + + final public double[] getkMinus0() { + return kMinus0; + } + } + + public int linearExact(GenotypesContext GLs, + double[] log10AlleleFrequencyPriors, + double[][] log10AlleleFrequencyLikelihoods, + double[][] log10AlleleFrequencyPosteriors) { + final ArrayList genotypeLikelihoods = getGLs(GLs); + final int numSamples = genotypeLikelihoods.size()-1; + final int numChr = 2*numSamples; + + final ExactACCache logY = new ExactACCache(numSamples+1); + logY.getkMinus0()[0] = 0.0; // the zero case + + double maxLog10L = Double.NEGATIVE_INFINITY; + boolean done = false; + int lastK = -1; + + for (int k=0; k <= numChr && ! done; k++ ) { + final double[] kMinus0 = logY.getkMinus0(); + + if ( k == 0 ) { // special case for k = 0 + for ( int j=1; j <= numSamples; j++ ) { + kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[0]; + } + } else { // k > 0 + final double[] kMinus1 = logY.getkMinus1(); + final double[] kMinus2 = logY.getkMinus2(); + + for ( int j=1; j <= numSamples; j++ ) { + final double[] gl = genotypeLikelihoods.get(j); + final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1]; + + double aa = Double.NEGATIVE_INFINITY; + double ab = Double.NEGATIVE_INFINITY; + if (k < 2*j-1) + aa = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + kMinus0[j-1] + gl[0]; + + if (k < 2*j) + ab = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ kMinus1[j-1] + gl[1]; + + double log10Max; + if (k > 1) { + final double bb = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + kMinus2[j-1] + gl[2]; + log10Max = approximateLog10SumLog10(aa, ab, bb); + } else { + // we know we aren't considering the BB case, so we can use an optimized log10 function + log10Max = approximateLog10SumLog10(aa, ab); + } + + // finally, update the L(j,k) value + kMinus0[j] = log10Max - logDenominator; + } + } + + // update the posteriors vector + final double log10LofK = kMinus0[numSamples]; + log10AlleleFrequencyLikelihoods[0][k] = log10LofK; + log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k]; + + // can we abort early? + lastK = k; + maxLog10L = Math.max(maxLog10L, log10LofK); + if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) { + if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L); + done = true; + } + + logY.rotate(); + } + + return lastK; + } + + final static double approximateLog10SumLog10(double a, double b, double c) { + return approximateLog10SumLog10(approximateLog10SumLog10(a, b), c); + } +*/ + } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 605d6051c..4ae00431c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -15,9 +15,9 @@ import java.util.Map; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - private final static String baseCommand = "-T UnifiedGenotyper --multiallelic -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129; - private final static String baseCommandIndels = "-T UnifiedGenotyper --multiallelic -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129; - private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper --multiallelic -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; + private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm BOTH --dbsnp " + b36dbSNP129; + private final static String baseCommandIndels = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b36dbSNP129; + private final static String baseCommandIndelsb37 = "-T UnifiedGenotyper -R " + b37KGReference + " -NO_HEADER -glm INDEL -mbq 20 --dbsnp " + b37dbSNP132; // -------------------------------------------------------------------------------------------------------------- // From cca8a18608806fb44b8373af551e9847260dedf0 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Sun, 11 Dec 2011 13:16:23 -0500 Subject: [PATCH 33/44] PPP pipeline test * added a pipeline test to the Pacbio Processing Pipeline. * updated exampleBAM with more complete RG information so we can use it in a wider variety of pipeline tests * added exampleDBSNP.vcf file with only chromosome 1 in the range of the exampleFASTA.fasta reference for pipeline tests --- .../qscripts/PacbioProcessingPipeline.scala | 1 - .../PacbioProcessingPipelineTest.scala | 45 +++ public/testdata/exampleBAM.bam | Bin 3601 -> 3635 bytes public/testdata/exampleBAM.bam.bai | Bin 136 -> 232 bytes public/testdata/exampleDBSNP.vcf | 282 ++++++++++++++++++ 5 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala create mode 100644 public/testdata/exampleDBSNP.vcf diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index 6f5dae2f8..1d3fb2622 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -177,7 +177,6 @@ class PacbioProcessingPipeline extends QScript { } case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { - this.resources = R this.recal_file = inRecalFile this.output_dir = outPath this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala new file mode 100644 index 000000000..355420a93 --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -0,0 +1,45 @@ +package org.broadinstitute.sting.queue.pipeline + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.BaseTest + +class PacbioProcessingPipelineTest { + @Test + def testBAM { + val testOut = "exampleBAM.recal.bam" + val spec = new PipelineTestSpec + spec.name = "pacbioProcessingPipeline" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -i " + BaseTest.testDir + "exampleBAM.bam", + " -blasr ", + " -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString + spec.fileMD5s += testOut -> "91a88b51d00cec40596d6061aa0c9938" + PipelineTest.executeTest(spec) + } +} diff --git a/public/testdata/exampleBAM.bam b/public/testdata/exampleBAM.bam index a6ebb6fd1e485f0036c63fa78457a6f1cc4ffd41..319dd1a72de427c1d7a3b343368c124735b41542 100644 GIT binary patch delta 3627 zcmV+`4%G3H9J3sMABzYC000000RIL6LPG)o77neM+mGbfUB|1Ty6)B0=lIw@zMO0N z!7tNY)%PwGw0115H_U8?Z3Tr0wDipMz)Ex5?pYur0oho=3KB2yh!Uhkit>O^gv2vX z$rBHG$YYXGBorj1`~`^RcU)E7)3Y6@LhEVQR2{oL_3`(AclrK2NqleZGr!(wyp^Th z-+6aB*&cV_{=)Rx^JnMX(=SXPK7W4k=!0ixFVA~#o&I+B^xf&hKf0K7@4|^=vvuRs zY4-NyxckNTraPDCAMRY7KR&-WfA;YFawmKK^!fXjXD>cDe|Gu)+2!T=(+5w!GCAJ) zEECwq_gj`q%v_V!K=A00f{dvN&RuzPxc zYAzz@7wNtA-rL=G(&_o1o;`i>qeCNdn56>|H#1 z|HawG%dhM_J^Rq?^6A+Nc_)>3F6CI>Ig^uvG5mclci|?=`cm^N?2Yf=|9l*O=}SAG zo7|Zk?7qJ{9-rJ9Pwwm<%-`-{Y%>*-uCm4UT&W~dA4o- z$xlIlkq3{@KJ9Nffs20lX@4dP5v)6FsuNlO@y+Zuq3Wy48m1u-@jOVX$&5N`)hvw|4x9-W% z;w@LSny3{rg(VJT*pi{7Y@PX%%f47=l5a{6?gI(eDRb7nzK}uJ83`3qZ^VQ^eu!5_ zY^*IoRD90~lK0nVE$A~lmO$5D)hD`JdLwgrn}7zUPSJMEAO zrZ`h7^gYjYLQaL^VY}UBo0CBU6@MG8!Df5dZjI1@wpK>`X0$;3_7&nUVeR}@6%UW5 zNBjHJ$=+mlj+i1^VU!|;$`U0QsVYya3L&b<7Rn(=W0Iw$#numi>>rjAn z@G*q<+LLuGL-;BXwmwV4L319wgIyuU0)&)QDVWPJs=!6q|&nzh?H5TW3Zq#h0+W2MkxpDQ5% zmP?{MA0Tz;XypVB50*y!=^W>4zuRbt#^-O8o4EAK5@9Gtq{}SYS?{zR z86t@sd(;j`ZJ_DeHtHeYY4t~KiY=%qc!Q4m#Fm|oPaE_+zsOhI*wF6q^V}GDRTm6zLHFMoKZy?<`A7FrN)d8tQ)!vn!`s3V&>y9g zT)B?VAq9ch=6{SkhF1|E4yhpc|Fn!wWm?8>-O%t5*8b{tZ(|4#4x#ZMFT%9Ic~ur9 zMH$X%j-b_peq{l|AzfAkl}TPv1wz!y0nLclMO8@)Uf@TA6;L1(1v+IA4GA+RNWpoi zT){Y#4in0E+8(x7Mm)SiycV&u0^*x22*o8Llqj4R1b@^lDS^`J2^Tw-?eZ>Twu3Qs zxYM$1K^Vh+%Y%YyeguVP8NeIB#FQ#K;B}W#tS61b3;GxoUm5YiKP?ciyW)3X?eg|G zJ(=zwn#O;)X#7N1x~M9^%%iBN3Q|@Ow~%P4Dsu=^QeoKDVyjKKlms~HPjh9>4^VCd zqHdSwfPZ1;V1GEE!zu+yeK^IFDL4tYF!am>FRKlL*@T+=H)H9{wt|YU<~YrNfqZgq z=HdX>{_$4Cy(0+JW9S*XyW@q7U_f0CTYy9}%OxL)3L&smh+P7~nI49GI3DxnHinPjd z!vWI;RTWZEjuV8SQdt2R(BW=C=)jI z1(yzFs4YB10x#$z##ir)zqlg2R`JKn6Mz1%V3kp~N-8yg^zxwUyQa(tZ7J%xp2;~vrj)GGSAzj5NFq2k&JL-It8cMBRJ{K2+-+J{)4Sxh{e|dYk*`J>5j;F`R zdkaDpB?xAxDn=^96|IV*fJy_t99GJtD8Lt#+847@VFo~MUg~{bKW@HL#1xqNOn)tA zV6z2nOt=)QPf|a6^+^rCwLIYs5SV>U&rn$uDP1OYbA*0Ls0@h*&IL*;R2KnLzW_64 z%!~((9+ZLDG@U*$`3OZO+=R3JT0T%-NymuGP+eP>&%;H__{M*jmT~t+SNuM#{mrd} zClIAaN4wL>!NR^(Ne^*!9byM6#-U+ zN(??cp<6nFqKdMu0)`Bj{|P3x?fBH8)NwIsTO*fJ-$#~3XPbi@mlzI@dVgO1;(0z~ zE3DM-VRJBn*HL3>#8-2i@BOElq;6*7&tdJWw<7MErhg1W@px~taN!6kw51D8^PE(9 z9u*h{Cyn#mxPh2z8m9@Jn^FxGm4v3SgiOIu&qEMx1QD#jgu)ig6qR+$H%|zlz%t=; zID?0Mm&3Jk|=p3i-BZv-W0t`E-2s8qbXJ zTF2=sj8ND!y_H01mSq*pikcMA4WYnB8Hq6}3tAx^4SIvtI_?c{uhZ@Ho9mX>>orUXV01|>!iY;zib zV|86_VbDy_pc#0aa*jP`IG`{Y(vBB`aii24(oV$YXvt<^iDp%Jx; z*G2UQ%M-qak%BvgX)4YVrBY~?x`Os$s;!RTtGafR3g*}&k6C!&N#&4mXk)kOx4nTg z7&6*wc~ELM2nyPM023u142THbAoQef^R~A!+zdh9r3rsBr}?`N*BZQWQ^Vi5SZi$j zs*(5rCgPLDBY#GcLL*EmA}Eejieg3b5+xKBWr-=2p{;~qnwt{I>T$@}7V}u#JmWW` zW8E(65lPIaCuJrsn5{%THHuj&hCg;VfK8P{aT+CR?!Q$1dS%t1VAku3+qGA#Cx1d&KjElPsGrN3XF28&pa1Ie zfQuOY=RaG-=(U2KudX%z;C8^{>G9s-bbo&lqcP-Ogs~Zy6iUk+IwYiDNoWioBS^=p z$g8p=%ojHQZBaimG;kqM2w5YCHJ+9;H@a}FAmwZXR&avHZ8c-oD-#-;H?z=CFG*Ns zB>~?}Gh9khRnP=MAuTJKrZfW&r@_(TqAc8bIM$}R9vS39(3uOJ9j(o_FeY8keqpA( xJDd20tUoPH_&z;YhoiJ;@%Rq6OJl#VQgvaEV%^Rzkf%s6yhp zSM60VdeO_aN0m^Kkop%O4!_4UlWg{+kUO>aR8$Z)Ius zcix#!w#MCipP#-se{t45`TX?p`T50@_g|d8I_tf4@>|`Lcczd3=wi~n4;PNj(v451 z*}ch8_qX4jZeN~#uzhj%^z7p7#pAQf?d<&d`FoeAFW*0Varxfq<>lG)N6)@AIokf? z@h>~I_4M?A^3`eg!T$94?9tqdyH9rych3%YcaIOA>_6Ilbnxh)d-8B< zZX)M5>Am&tz3$uT^z2VhpTB%|mc$RX9-Th#-cQ~fIeGBLNbjwO^*6_ce!O+KHQ7Aw ze)j&1=JNdMt3NrtINN^t{^Qe&C)slJ?2qC1ncRWbB(KYDulX+PlvZu-Heebb%# zraQl4@&EBU;Z<%uJO9JpN~6(Oh1EED;~&1d()ej(wtOE}*?1eTG#)qZ&&U1?)<6IF zE6zCf3h>398C^?ZQS^7v(dmhzy2t*CD?3{87U|#Vw5YaV9}Fw z0waG~tJRX7cB{2c+bw6^;2q#c|`^dtbci?TwQMZ;t%*H5~aX2JvsN5Wl|! zqQY7w8etvd`8KY3k=EwaoE`O6JsDcO<%(7lwL+$_#DNT3GL)39GGB7p7pqM2b?L!- zAmJ)y&Z^fJGUz%Zp+f47m=MSh@zRKmm4zo2-@ZaLV`ljz;=y=2nT(Ht^QIM%nwc%f zQH-O!P_e-)VvJC>0fUo00~vqo!d70#ZMme`4Ew$Iz@Z2?ouEJJSZl4pdVAPzjnII$ zmPY(~v_Smk72+>o?Ce%69vn^&_x7ff-O0`zF-5e(C`AgDB}y<-Ri0KALR66rl!KGT zBuh!h1EYdR)v2U3lLILEsgk# zE5ws4#IG-b7?n{)FfJUA#?rPJ!bJHVu~ERd&k1qe&~6RA5v|Q7Y~~CDPYSWdD1(2zA{Eew;IbZOBkZ~X z^Eh}czq^}$G^hEuXOI(X!s|$U@Eq1#3HJeSvUgw->i!}W+CbB_ZPY`))9R1f6kCu}@ERTUi7h)F z$=CYbR=3^iy3KzMS+iOp?vKKy84b^j@j4U@_njG49MJ+}mBvI9nowOKO(PW-G)f~~ zkVv5A49hihWJ?P}g50#Z}oIJI^H?P$?+ zEfHEG7zy5A+Edp)TzKlW9X|tOU%lN^cc+uXgX!Mnc#(f?5=b*7Dq{jpsR{_Pq$D~b zG>&mXO2CYjj+B5fnq@|5QjE!-6oSuPGm4<&NaaJAY6gok7jWh)$%MxlRg_^4KC1#1 zs+K(vOA~(PityTsYj-D%OM)VmC1p}#tV^0IU136EO)8RcWdVjmx~vE)lf0q|3{hJSXhytFs!Ce0 zfsY0$pukKN=#)V;B+OhO1?Qo11>;OQOeo)Jd)R+o8u8!?@!E);B@o|aK`1T}p+w=l zAYjds5=gC{aIs_AF7GmCI~Y@kJ1xr=gfZ;5JV>bKBPbNh0CoTqQ>yHM*HuQbo-_(C z=wq<>(uj}#d4YIc6~6^zm$&=r@pSLN6#j!n;U~J%MO6W29z{h}kg@{5g+N19nS+~> z3d4V@7E3L{r6j;nAI&XmK0vt^h`L;w6NZ_C{NaEOqZCZ)!zG?v8u43Kh}UKD(GrL? z0mwGcSCBztgmMr9Ra8YF2y(3vDiEw-2*{Wcf(q)q;(U$Io4rVg+JNxL(r*Z4L@R$aO|-&kNXk zK-L}GrY(0p@ImodLEyP;X-31NhBt$;xeVj&eC2C=C{bi}I9hLZ*1Ln2Yq_m~C0u_L zxLyd25^dU{S9@m=cHPz}w4lV>px6eS2bRCiJAv>h3oItqnPoKI(pBR}KQL9}{tY|+ z9LBzRd%`)MPK>LLudGO`EH@m`T~Jja73DagIfN%fWQ4Mk7EzJS;!tr@(Pt3|J0X+8 zW3?c6jKi`p6tld;ctDx3sV}&6AVPm_;UN-uK_4-`d|mwc72&lNe{^@k{}HS*)~%9C z4IsQc$oj5HGeTR6dah?861rc3JT`j)dKROgR6__?F$zqlRlklp9;JpD5lkB)X1gepo9^iEZbRE8^B6-5D=26Q=$lu1#5E+~JsFJ`I2 zG=SV}>UEw!Ztf{!5=?!i7BjHffHoprisd`0AAa;n8-C;Ngf~E-_cb*`Wl^MbnbgG* z>LsBvBpxUiNU4xr1a$oZ#F!B?9u#`84EUz0^nr;-NHXC%T~C%*JO+O+Jv`i*PWBh_tx9s}p0X?{a9Kh{tY9s2s)_;(nHN+; zAE{~G>DS>0-WULiQNec9v2>28u2?< zh}Tu|Ys(-;2pS5Ns|c_nRASKK3Ej{UBvq7c6wqWq|4%TnZO5k$rH+4#N!uE^l=?oh zEIL~pt8HA)}JeB$kjVXzFWiu@bo?x12tD~=8zc)3WWsF;Mt*l>;d(%ABtKMgw#jA@J$D~>T(Z!)b%=o66S@93>{f3hUL}G z_kMq+A%^eV$g2MdV}EhGuY!Vux-r>b)c#VF1ZzT}2aX7-3QXf7i?giE%&eTES&Yk+ zKGI5>rS+8wtTQm$SZU zE?dmBxS8WOtz%s->lR7OyC-EjF6ga9-8G6?Duy3B96=pjqLupbYcSloX+i^iH$W)r z8K|4MN)W{eGAV^-qrer4q17*FisLj&)Leh5dV7B*)gWQkA^kigYU|?VaVm~1U1`s<{5=el7F&~KXgo@iRLqu1zLufCkIE??B2O{nb6^HTt vfqW3~fr`WUFnhcvF)%OzA^kigYU|?VcVm~1U1`yj_5=el7ARmbGgo?{CLqvPoA+#4%9LB%P0}=Ozio^JZ QKt2fgK*eEvggp!l00^fCD*ylh diff --git a/public/testdata/exampleDBSNP.vcf b/public/testdata/exampleDBSNP.vcf new file mode 100644 index 000000000..9e7e96f51 --- /dev/null +++ b/public/testdata/exampleDBSNP.vcf @@ -0,0 +1,282 @@ +##fileformat=VCFv4.1 +##FILTER= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO=5% minor allele frequency in 1+ populations"> +##INFO=5% minor allele frequency in each and all populations"> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO=SubSNP->Batch.link_out"> +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##LeftAlignVariants="analysis_type=LeftAlignVariants input_file=[] read_buffer_size=null phone_home=STANDARD read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta rodBind=[] nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null useOriginalQualities=false defaultBaseQualities=-1 validation_strictness=SILENT unsafe=null num_threads=1 read_group_black_list=null pedigree=[] pedigreeString=[] pedigreeValidationType=STRICT allow_intervals_with_unindexed_bam=false disable_experimental_low_memory_sharding=false logging_level=INFO log_to_file=null help=false variant=(RodBinding name=variant source=00-All.vcf) out=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub NO_HEADER=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub sites_only=org.broadinstitute.sting.gatk.io.stubs.VCFWriterStub filter_mismatching_base_and_quals=false" +##contig= +##phasing=partial +##reference=GRCh37.3 +##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta +##source=dbSNP +##variationPropertyDocumentationUrl=ftp://ftp.ncbi.nlm.nih.gov/snp/specs/dbSNP_BitField_latest.pdf +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 10144 rs144773400 TA T . PASS ASP;RSPOS=10145;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10228 rs143255646 TA T . PASS ASP;RSPOS=10229;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10234 rs145599635 C T . PASS ASP;RSPOS=10234;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 10248 rs148908337 A T . PASS ASP;RSPOS=10248;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 10254 rs140194106 TA T . PASS ASP;RSPOS=10255;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10291 rs145427775 C T . PASS ASP;RSPOS=10291;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 10327 rs112750067 T C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10327;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132 +chr1 10329 rs150969722 AC A . PASS ASP;RSPOS=10330;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10351 rs145072688 CTA C,CA . PASS ASP;RSPOS=10352;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10382 rs147093981 AAC A,AC . PASS ASP;RSPOS=10383;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10433 rs56289060 A AC . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10433;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 10439 rs112766696 AC A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10440;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=132 +chr1 10439 rs138941843 AC A . PASS ASP;RSPOS=10440;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=134 +chr1 10440 rs112155239 C A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10440;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132 +chr1 10492 rs55998931 C T . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.0617001828153565;RSPOS=10492;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040000000100;WGT=0;dbSNPBuildID=129 +chr1 10519 rs62636508 G C . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10519;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129 +chr1 10583 rs58108140 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GMAF=0.270566727605119;KGPilot123;RSPOS=10583;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=129 +chr1 10611 rs189107123 C G . PASS KGPilot123;RSPOS=10611;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 10828 rs10218492 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10828;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119 +chr1 10904 rs10218493 G A . PASS ASP;GENEINFO=LOC100652771:100652771;GNO;RSPOS=10904;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=119 +chr1 10927 rs10218527 A G . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10927;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=119 +chr1 10938 rs28853987 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=10938;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125 +chr1 11014 rs28484712 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11014;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125 +chr1 11022 rs28775022 G A . PASS ASP;GENEINFO=LOC100652771:100652771;RSPOS=11022;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=125 +chr1 11081 rs10218495 G T . PASS CFL;GENEINFO=LOC100652771:100652771;GNO;RSPOS=11081;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=119 +chr1 11863 rs187669455 C A . PASS RSPOS=11863;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135 +chr1 13302 rs180734498 C T . PASS KGPilot123;RSPOS=13302;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 13327 rs144762171 G C . PASS ASP;KGPilot123;RSPOS=13327;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 13684 rs71260404 C T . PASS GENEINFO=LOC100652771:100652771;GNO;RSPOS=13684;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 13980 rs151276478 T C . PASS ASP;KGPilot123;RSPOS=13980;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 14889 rs142444908 G A . PASS ASP;RSPOS=14889;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 14907 rs79585140 A G . PASS GNO;RSPOS=14907;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131 +chr1 14930 rs75454623 A G . PASS GNO;RSPOS=14930;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131 +chr1 14976 rs71252251 G A . PASS ASP;GNO;RSPOS=14976;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130 +chr1 15061 rs71268703 T TG . PASS ASP;GNO;RSPOS=15061;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130 +chr1 15118 rs71252250 A G . PASS ASP;GNO;RSPOS=15118;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=130 +chr1 15211 rs144718396 T G . PASS ASP;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 15211 rs78601809 T G . PASS ASP;GNO;RSPOS=15211;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 16257 rs78588380 G C . PASS ASP;GNO;RSPOS=16257;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131 +chr1 16378 rs148220436 T C . PASS ASP;RSPOS=16378;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 16495 rs141130360 G C . PASS ASP;RSPOS=16495;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 16497 rs150723783 A G . PASS ASP;RSPOS=16497;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 17519 rs192890528 G T . PASS RSPOS=17519;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=135 +chr1 19226 rs138930629 T A . PASS ASP;RSPOS=19226;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 20141 rs56336884 G A . PASS HD;RSPOS=20141;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000400000100;WGT=0;dbSNPBuildID=129 +chr1 20144 rs143346096 G A . PASS ASP;RSPOS=20144;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 20206 rs71262675 C T . PASS GNO;RSPOS=20206;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 20245 rs71262674 G A . PASS GMAF=0.256398537477148;GNO;RSPOS=20245;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 20304 rs71262673 G C . PASS GMAF=0.338208409506399;GNO;RSPOS=20304;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000000100000100;WGT=0;dbSNPBuildID=130 +chr1 26999 rs147506580 A G . PASS ASP;RSPOS=26999;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 29436 rs2462493 G A . PASS GNO;RSPOS=29436;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=100 +chr1 30923 rs140337953 G T . PASS ASP;KGPilot123;RSPOS=30923;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 33487 rs77459554 C T . PASS ASP;GNO;RSPOS=33487;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131 +chr1 33495 rs75468675 C T . PASS ASP;GNO;RSPOS=33495;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 33505 rs75627161 T C . PASS ASP;GNO;RSPOS=33505;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 33508 rs75609629 A T . PASS ASP;GNO;RSPOS=33508;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040100000100;WGT=0;dbSNPBuildID=131 +chr1 33521 rs76098219 T A . PASS GNO;RSPOS=33521;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040100000100;WGT=0;dbSNPBuildID=131 +chr1 33593 rs557585 G A . PASS RSPOS=33593;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=83 +chr1 33648 rs62028204 G T . PASS RSPOS=33648;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129 +chr1 33656 rs113821789 T C . PASS RSPOS=33656;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=132 +chr1 51476 rs187298206 T C . PASS KGPilot123;RSPOS=51476;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 51479 rs116400033 T A . PASS ASP;G5;G5A;GMAF=0.113802559414991;KGPilot123;RSPOS=51479;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070010000100;WGT=0;dbSNPBuildID=132 +chr1 51803 rs62637812 T C . PASS GMAF=0.468921389396709;RSPOS=51803;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040000000100;WGT=0;dbSNPBuildID=129 +chr1 51898 rs76402894 C A . PASS GMAF=0.0731261425959781;GNO;RSPOS=51898;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131 +chr1 51914 rs190452223 T G . PASS KGPilot123;RSPOS=51914;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 51928 rs78732933 G A . PASS GNO;RSPOS=51928;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=131 +chr1 51935 rs181754315 C T . PASS KGPilot123;RSPOS=51935;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 51954 rs185832753 G C . PASS KGPilot123;RSPOS=51954;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 52058 rs62637813 G C . PASS GMAF=0.0342778793418647;KGPilot123;RSPOS=52058;SAO=0;SSR=1;VC=SNV;VLD;VP=050000000000040010000140;WGT=0;dbSNPBuildID=129 +chr1 52144 rs190291950 T A . PASS KGPilot123;RSPOS=52144;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 52238 rs150021059 T G . PASS ASP;KGPilot123;RSPOS=52238;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54353 rs140052487 C A . PASS ASP;KGPilot123;RSPOS=54353;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54421 rs146477069 A G . PASS ASP;KGPilot123;RSPOS=54421;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54490 rs141149254 G A . PASS ASP;KGPilot123;RSPOS=54490;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54676 rs2462492 C T . PASS ASP;GMAF=0.191956124314442;GNO;HD;KGPilot123;RSPOS=54676;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040510000100;WGT=0;dbSNPBuildID=100 +chr1 54753 rs143174675 T G . PASS ASP;KGPilot123;RSPOS=54753;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 54788 rs59861892 CC C,CCT . PASS ASP;RSPOS=54789;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 54795 rs58014817 T A . PASS ASP;RSPOS=54795;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=129 +chr1 55164 rs3091274 C A . PASS G5;G5A;GMAF=0.145338208409506;GNO;KGPilot123;RSPOS=55164;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030110000100;WGT=0;dbSNPBuildID=103 +chr1 55299 rs10399749 C T . PASS G5;G5A;GMAF=0.278793418647166;GNO;KGPilot123;PH2;RSPOS=55299;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000000030112000100;WGT=0;dbSNPBuildID=119 +chr1 55302 rs3091273 C T . PASS RSPOS=55302;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103 +chr1 55313 rs182462964 A T . PASS KGPilot123;RSPOS=55313;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55322 rs3107974 C T . PASS RSPOS=55322;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=103 +chr1 55326 rs3107975 T C . PASS GNO;HD;KGPilot123;RSPOS=55326;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=103 +chr1 55330 rs185215913 G A . PASS KGPilot123;RSPOS=55330;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55367 rs190850374 G A . PASS KGPilot123;RSPOS=55367;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55388 rs182711216 C T . PASS KGPilot123;RSPOS=55388;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55394 rs2949420 T A . PASS GNO;KGPilot123;PH2;RSPOS=55394;SAO=0;SSR=0;VC=SNV;VP=050000000000000112000100;WGT=0;dbSNPBuildID=101 +chr1 55416 rs193242050 G A . PASS KGPilot123;RSPOS=55416;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55427 rs183189405 T C . PASS KGPilot123;RSPOS=55427;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55545 rs28396308 C T . PASS GNO;RSPOS=55545;SAO=0;SSR=0;VC=SNV;VP=050000000000000100000100;WGT=0;dbSNPBuildID=125 +chr1 55816 rs187434873 G A . PASS KGPilot123;RSPOS=55816;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55850 rs191890754 C G . PASS KGPilot123;RSPOS=55850;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 55852 rs184233019 G C . PASS KGPilot123;RSPOS=55852;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 56644 rs143342222 A C . PASS ASP;KGPilot123;RSPOS=56644;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 57952 rs189727433 A C . PASS KGPilot123;RSPOS=57952;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 58771 rs140128481 T C . PASS ASP;RSPOS=58771;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 58814 rs114420996 G A . PASS ASP;G5;GMAF=0.0982632541133455;KGPilot123;RSPOS=58814;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132 +chr1 59040 rs149755937 T C . PASS ASP;KGPilot123;RSPOS=59040;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 60718 rs78395614 G A . PASS CFL;GNO;RSPOS=60718;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 60726 rs192328835 C A . PASS KGPilot123;RSPOS=60726;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 60791 rs76199781 A G . PASS CFL;GNO;RSPOS=60791;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 61442 rs74970982 A G . PASS CFL;GMAF=0.076782449725777;GNO;KGPilot123;RSPOS=61442;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131 +chr1 61462 rs56992750 T A . PASS CFL;G5;G5A;GMAF=0.0383912248628885;GNO;KGPilot123;RSPOS=61462;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008030110000100;WGT=0;dbSNPBuildID=129 +chr1 61480 rs75526266 G C . PASS CFL;GNO;RSPOS=61480;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 61499 rs75719746 G A . PASS CFL;GNO;RSPOS=61499;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 61743 rs184286948 G C . PASS KGPilot123;RSPOS=61743;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 61920 rs62637820 G A . PASS CFL;GMAF=0.0255941499085923;RSPOS=61920;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=129 +chr1 61987 rs76735897 A G . PASS CFL;GMAF=0.292961608775137;GNO;KGPilot123;RSPOS=61987;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131 +chr1 61989 rs77573425 G C . PASS CFL;GMAF=0.309414990859232;GNO;KGPilot123;RSPOS=61989;SAO=0;SSR=0;VC=SNV;VP=050000000008000110000100;WGT=0;dbSNPBuildID=131 +chr1 61993 rs190553843 C T . PASS KGPilot123;RSPOS=61993;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 62156 rs181864839 C T . PASS KGPilot123;RSPOS=62156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 62157 rs10399597 G A . PASS CFL;GMAF=0.00228519195612431;KGPilot123;RSPOS=62157;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=119 +chr1 62162 rs140556834 G A . PASS ASP;KGPilot123;RSPOS=62162;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 62203 rs28402963 T C . PASS CFL;KGPilot123;RSPOS=62203;SAO=0;SSR=0;VC=SNV;VP=050000000008000010000100;WGT=0;dbSNPBuildID=125 +chr1 62271 rs28599927 A G . PASS CFL;GMAF=0.138482632541133;RSPOS=62271;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=125 +chr1 63268 rs75478250 T C . PASS CFL;GNO;RSPOS=63268;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=131 +chr1 63276 rs185977555 G A . PASS KGPilot123;RSPOS=63276;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 63297 rs188886746 G A . PASS KGPilot123;RSPOS=63297;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 63671 rs116440577 G A . PASS ASP;G5;GMAF=0.170018281535649;KGPilot123;RSPOS=63671;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004050010000100;WGT=0;dbSNPBuildID=132 +chr1 63737 rs77426996 TACT T,TCTA . PASS CFL;RSPOS=63738;SAO=0;SSR=0;VC=DIV;VP=050000000008000000000200;WGT=0;dbSNPBuildID=131 +chr1 64649 rs181431124 A C . PASS KGPilot123;RSPOS=64649;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66008 rs2691286 C G . PASS CFL;GNO;RSPOS=66008;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=100 +chr1 66162 rs62639105 A T . PASS CFL;GMAF=0.320383912248629;GNO;KGPilot123;RSPOS=66162;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000110000100;WGT=0;dbSNPBuildID=129 +chr1 66176 rs28552463 T A . PASS CFL;GMAF=0.0484460694698355;KGPilot123;RSPOS=66176;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=125 +chr1 66219 rs181028663 A T . PASS KGPilot123;RSPOS=66219;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66238 rs113961546 T A . PASS CFL;GNO;RSPOS=66238;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=132 +chr1 66314 rs28534012 T A . PASS CFL;RSPOS=66314;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125 +chr1 66331 rs186063952 A C . PASS KGPilot123;RSPOS=66331;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66334 rs28464214 T A . PASS CFL;RSPOS=66334;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=125 +chr1 66442 rs192044252 T A . PASS KGPilot123;RSPOS=66442;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 66457 rs13328655 T A . PASS CFL;GMAF=0.0795246800731261;KGPilot123;RSPOS=66457;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=121 +chr1 66503 rs112350669 T A . PASS CFL;RSPOS=66503;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=132 +chr1 66507 rs12401368 T A . PASS CFL;GMAF=0.479890310786106;KGPilot123;RSPOS=66507;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040010000100;WGT=0;dbSNPBuildID=120 +chr1 66651 rs2257270 A T . PASS CFL;GNO;RSPOS=66651;SAO=0;SSR=0;VC=SNV;VP=050000000008000100000100;WGT=0;dbSNPBuildID=100 +chr1 67179 rs149952626 C G . PASS ASP;KGPilot123;RSPOS=67179;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 67181 rs77662731 A G . PASS ASP;G5;G5A;GENEINFO=OR4F5:79501;GMAF=0.0470749542961609;GNO;KGPilot123;RSPOS=67181;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004070110000100;WGT=0;dbSNPBuildID=131 +chr1 67223 rs78676975 C A . PASS ASP;GENEINFO=OR4F5:79501;GNO;RSPOS=67223;SAO=0;SSR=0;VC=SNV;VP=050000000004000100000100;WGT=0;dbSNPBuildID=131 +chr1 69428 rs140739101 T G . PASS ASP;RSPOS=69428;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134 +chr1 69453 rs142004627 G A . PASS ASP;RSPOS=69453;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 69476 rs148502021 T C . PASS ASP;RSPOS=69476;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134 +chr1 69496 rs150690004 G A . PASS ASP;RSPOS=69496;S3D;SAO=0;SSR=0;VC=SNV;VLD;VP=050200000004040000000100;WGT=0;dbSNPBuildID=134 +chr1 69511 rs75062661 A G . PASS GENEINFO=OR4F5:79501;GMAF=0.193784277879342;GNO;KGPilot123;RSPOS=69511;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000110000100;WGT=0;dbSNPBuildID=131 +chr1 69534 rs190717287 T C . PASS KGPilot123;RSPOS=69534;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 69552 rs55874132 G C . PASS GENEINFO=OR4F5:79501;HD;RSPOS=69552;S3D;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050300000000040400000100;WGT=0;dbSNPBuildID=129 +chr1 69590 rs141776804 T A . PASS ASP;RSPOS=69590;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 69594 rs144967600 T C . PASS ASP;RSPOS=69594;S3D;SAO=0;SSR=0;VC=SNV;VP=050200000004000000000100;WGT=0;dbSNPBuildID=134 +chr1 72148 rs182862337 C T . PASS KGPilot123;RSPOS=72148;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 73841 rs143773730 C T . PASS ASP;KGPilot123;RSPOS=73841;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 74651 rs62641291 G A . PASS RSPOS=74651;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=129 +chr1 74681 rs13328683 G T . PASS CFL;GMAF=0.286106032906764;RSPOS=74681;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000008040000000100;WGT=0;dbSNPBuildID=121 +chr1 74709 rs62641292 T A . PASS CFL;RSPOS=74709;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=129 +chr1 74771 rs13328675 A G . PASS CFL;RSPOS=74771;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121 +chr1 74790 rs13328700 C G . PASS CFL;RSPOS=74790;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121 +chr1 74792 rs13328684 G A . PASS CFL;RSPOS=74792;SAO=0;SSR=0;VC=SNV;VP=050000000008000000000100;WGT=0;dbSNPBuildID=121 +chr1 77462 rs188023513 G A . PASS KGPilot123;RSPOS=77462;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 77470 rs192898053 T C . PASS KGPilot123;RSPOS=77470;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 77874 rs184538873 G A . PASS KGPilot123;RSPOS=77874;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 77961 rs78385339 G A . PASS GMAF=0.125685557586837;KGPilot123;RSPOS=77961;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000000040010000100;WGT=0;dbSNPBuildID=131 +chr1 79033 rs62641298 A G . PASS GMAF=0.438299817184644;GNO;HD;KGPilot123;RSPOS=79033;SAO=0;SSR=0;VC=SNV;VP=050000000000000510000100;WGT=0;dbSNPBuildID=129 +chr1 79050 rs62641299 G T . PASS GMAF=0.224405850091408;GNO;KGPilot123;RSPOS=79050;SAO=0;SSR=0;VC=SNV;VP=050000000000000110000100;WGT=0;dbSNPBuildID=129 +chr1 79137 rs143777184 A T . PASS ASP;KGPilot123;RSPOS=79137;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 79417 rs184768190 C T . PASS KGPilot123;RSPOS=79417;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 79418 rs2691296 G C . PASS GMAF=0.0178244972577697;RSPOS=79418;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=100 +chr1 79538 rs2691295 C T . PASS RSPOS=79538;RV;SAO=0;SSR=0;VC=SNV;VP=050000000000000000000100;WGT=0;dbSNPBuildID=100 +chr1 79772 rs147215883 C G . PASS ASP;KGPilot123;RSPOS=79772;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 79872 rs189224661 T G . PASS KGPilot123;RSPOS=79872;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 80323 rs3942603 G C . PASS CFL;GNO;RSPOS=80323;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000008000100000100;WGT=0;dbSNPBuildID=108 +chr1 80386 rs3878915 C A . PASS GMAF=0.0118829981718464;RSPOS=80386;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000000040000000100;WGT=0;dbSNPBuildID=108 +chr1 80454 rs144226842 G C . PASS ASP;KGPilot123;RSPOS=80454;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 81836 rs2259560 A T . PASS ASP;GNO;RSPOS=81836;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=100 +chr1 81949 rs181567186 T C . PASS KGPilot123;RSPOS=81949;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 81962 rs4030308 T TAA . PASS ASP;RSPOS=81962;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108 +chr1 82102 rs4030307 C T . PASS ASP;RSPOS=82102;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108 +chr1 82103 rs2020400 T C . PASS ASP;RSPOS=82103;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92 +chr1 82126 rs1815133 C T . PASS ASP;RSPOS=82126;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=92 +chr1 82133 rs4030306 CA C,CAAAAAAAAAAAAAAA . PASS ASP;RSPOS=82136;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108 +chr1 82154 rs4477212 A G . PASS ASP;HD;RSPOS=82154;SAO=0;SSR=0;VC=SNV;VP=050000000004000400000100;WGT=0;dbSNPBuildID=111 +chr1 82162 rs1815132 C A . PASS ASP;GMAF=0.0351919561243144;GNO;RSPOS=82162;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=92 +chr1 82163 rs139113303 G A . PASS ASP;KGPilot123;RSPOS=82163;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 82196 rs112844054 A T . PASS ASP;RSPOS=82196;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=132 +chr1 82249 rs1851945 A G . PASS ASP;GMAF=0.0452468007312614;KGPilot123;RSPOS=82249;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=92 +chr1 82282 rs3871775 G A . PASS ASP;RSPOS=82282;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108 +chr1 82303 rs3871776 T C . PASS ASP;RSPOS=82303;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000000000100;WGT=0;dbSNPBuildID=108 +chr1 82316 rs4030305 A C . PASS ASP;GNO;RSPOS=82316;RV;SAO=0;SLO;SSR=0;VC=SNV;VP=050100000004000100000100;WGT=0;dbSNPBuildID=108 +chr1 82609 rs149189449 C G . PASS ASP;KGPilot123;RSPOS=82609;SAO=0;SSR=0;VC=SNV;VP=050000000004000010000100;WGT=0;dbSNPBuildID=134 +chr1 82676 rs185237834 T G . PASS KGPilot123;RSPOS=82676;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 82734 rs4030331 T C . PASS ASP;GMAF=0.261882998171846;KGPilot123;RSPOS=82734;RV;SAO=0;SLO;SSR=0;VC=SNV;VLD;VP=050100000004040010000100;WGT=0;dbSNPBuildID=108 +chr1 82957 rs189774606 C T . PASS KGPilot123;RSPOS=82957;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83084 rs181193408 T A . PASS KGPilot123;RSPOS=83084;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83088 rs186081601 G C . PASS KGPilot123;RSPOS=83088;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83107 rs4405097 G C . PASS ASP;RSPOS=83107;SAO=0;SSR=0;VC=SNV;VP=050000000004000000000100;WGT=0;dbSNPBuildID=111 +chr1 83119 rs4030324 AA A,ATAAC . PASS ASP;RSPOS=83120;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000000000200;WGT=0;dbSNPBuildID=108 +chr1 83771 rs189906733 T G . PASS KGPilot123;RSPOS=83771;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 83786 rs58520670 T TA . PASS ASP;RSPOS=83794;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83815 rs58857344 GAGAA G . PASS ASP;RSPOS=83827;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83826 rs71281475 AAAGA A,AAA . PASS ASP;GNO;RSPOS=83827;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130 +chr1 83855 rs59596480 GAA G . PASS ASP;RSPOS=83857;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83872 rs59556914 AA A,AAGA . PASS ASP;RSPOS=83873;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83884 rs59586754 GAAA G . PASS ASP;RSPOS=83885;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83897 rs61330047 GAA G . PASS ASP;RSPOS=83899;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83901 rs58254183 GAAAGAA G . PASS ASP;RSPOS=83903;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83921 rs61338823 GAA G . PASS ASP;RSPOS=83923;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83930 rs71281474 AG A,AGA . PASS ASP;GNO;RSPOS=83931;RV;SAO=0;SLO;SSR=0;VC=DIV;VP=050100000004000100000200;WGT=0;dbSNPBuildID=130 +chr1 83934 rs59235392 AG A,AGAAA . PASS ASP;RSPOS=83935;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 83977 rs180759811 A G . PASS KGPilot123;RSPOS=83977;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84002 rs28850140 G A . PASS ASP;GMAF=0.138939670932358;KGPilot123;RSPOS=84002;SAO=0;SSR=0;VC=SNV;VLD;VP=050000000004040010000100;WGT=0;dbSNPBuildID=125 +chr1 84010 rs186443818 G A . PASS KGPilot123;RSPOS=84010;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84018 rs61352176 GAA G . PASS ASP;RSPOS=84020;SAO=0;SSR=0;VC=DIV;VP=050000000004000000000200;WGT=0;dbSNPBuildID=129 +chr1 84079 rs190867312 T C . PASS KGPilot123;RSPOS=84079;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84139 rs183605470 A T . PASS KGPilot123;RSPOS=84139;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84156 rs188652299 A C . PASS KGPilot123;RSPOS=84156;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84244 rs191297051 A C . PASS KGPilot123;RSPOS=84244;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84295 rs183209871 G A . PASS KGPilot123;RSPOS=84295;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84346 rs187855973 T C . PASS KGPilot123;RSPOS=84346;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84453 rs191379015 C G . PASS KGPilot123;RSPOS=84453;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 +chr1 84705 rs183470350 T G . PASS KGPilot123;RSPOS=84705;SAO=0;SSR=0;VC=SNV;VP=050000000000000010000100;WGT=0;dbSNPBuildID=135 From ed91461c49ccf0660fd6d6a484487db483be0ab8 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 00:24:51 -0500 Subject: [PATCH 34/44] Data Processing Pipeline Test * Added standard pipeline test for the DPP * Added a full BWA pipeline test for the DPP * Included the extra files for the reference needed by BWA (to be used by DPP and PPP tests) --- .../pipeline/DataProcessingPipelineTest.scala | 67 ++++++++++++++++++ public/testdata/exampleFASTA.fasta.amb | 1 + public/testdata/exampleFASTA.fasta.ann | 3 + public/testdata/exampleFASTA.fasta.bwt | Bin 0 -> 37548 bytes public/testdata/exampleFASTA.fasta.pac | Bin 0 -> 25002 bytes public/testdata/exampleFASTA.fasta.rbwt | Bin 0 -> 37548 bytes public/testdata/exampleFASTA.fasta.rpac | Bin 0 -> 25002 bytes public/testdata/exampleFASTA.fasta.rsa | Bin 0 -> 12528 bytes public/testdata/exampleFASTA.fasta.sa | Bin 0 -> 12528 bytes 9 files changed, 71 insertions(+) create mode 100644 public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala create mode 100644 public/testdata/exampleFASTA.fasta.amb create mode 100644 public/testdata/exampleFASTA.fasta.ann create mode 100644 public/testdata/exampleFASTA.fasta.bwt create mode 100644 public/testdata/exampleFASTA.fasta.pac create mode 100644 public/testdata/exampleFASTA.fasta.rbwt create mode 100644 public/testdata/exampleFASTA.fasta.rpac create mode 100644 public/testdata/exampleFASTA.fasta.rsa create mode 100644 public/testdata/exampleFASTA.fasta.sa diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala new file mode 100644 index 000000000..483a0b60e --- /dev/null +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -0,0 +1,67 @@ +package org.broadinstitute.sting.queue.pipeline + +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +import org.testng.annotations.Test +import org.broadinstitute.sting.BaseTest + +class DataProcessingPipelineTest { + @Test + def testSimpleBAM { + val projectName = "test1" + val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam" + val spec = new PipelineTestSpec + spec.name = "DataProcessingPipeline" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -i " + BaseTest.testDir + "exampleBAM.bam", + " -D " + BaseTest.testDir + "exampleDBSNP.vcf", + " -nv ", + " -p " + projectName).mkString + spec.fileMD5s += testOut -> "69ba216bcf1e2dd9b6bd631ef99efda9" + PipelineTest.executeTest(spec) + } + + @Test + def testBWAPEBAM { + val projectName = "test2" + val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam" + val spec = new PipelineTestSpec + spec.name = "DataProcessingPipeline" + spec.args = Array( + " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala", + " -R " + BaseTest.testDir + "exampleFASTA.fasta", + " -i " + BaseTest.testDir + "exampleBAM.bam", + " -D " + BaseTest.testDir + "exampleDBSNP.vcf", + " -nv ", + " -bwa /home/unix/carneiro/bin/bwa", + " -bwape ", + " -p " + projectName).mkString + spec.fileMD5s += testOut -> "3134cbeae1561ff8e6b559241f9ed7f5" + PipelineTest.executeTest(spec) + } + +} diff --git a/public/testdata/exampleFASTA.fasta.amb b/public/testdata/exampleFASTA.fasta.amb new file mode 100644 index 000000000..986e6d603 --- /dev/null +++ b/public/testdata/exampleFASTA.fasta.amb @@ -0,0 +1 @@ +100000 1 0 diff --git a/public/testdata/exampleFASTA.fasta.ann b/public/testdata/exampleFASTA.fasta.ann new file mode 100644 index 000000000..642ddb6d7 --- /dev/null +++ b/public/testdata/exampleFASTA.fasta.ann @@ -0,0 +1,3 @@ +100000 1 11 +0 chr1 (null) +0 100000 0 diff --git a/public/testdata/exampleFASTA.fasta.bwt b/public/testdata/exampleFASTA.fasta.bwt new file mode 100644 index 0000000000000000000000000000000000000000..fe74222804293ab93b9d56642f6cbee8df87d462 GIT binary patch literal 37548 zcmZ78cR1Dm|3C288TPSvW+)ONgd)e@na7rql~l-RkWE8TM&q5*E-j^@LC30Ohfsu~ zU3QdlzIVUt^VjcnUH9vGU3ELBbB@>R`FK2^=cNT15}_SO5T9Nnk|d%ei$whYhaib4 zI6%}AgaBbq5M+Wv5Sn8IL6IIL(*#n+h%I;kH^PC?5K?#wuL!4RfPRwzK@6QBzoN+P zOVb=I2_~sUu2NW8e+fnwk^kRVGw_gKb8?FELepMpIbQ;2kcW}2amHlT$8fcC*~*{H zTj6vNeZ&G0dh6GeBCtFiuOn64GmI>h{^s|N;3fekEBr@dhfy>IZ@~d>DNC=-n~zG> zdogW%Cz(cz)}Q7Ts;MhBA6?|g)0)_(Pj@oWf{%dPjASFkL_yt?qMzJ+UhUJ_`&V8&QHBa#?pc?~uD^nP1=ES`jU>fNY2PwXd|p`49f% zcO82wdIbJ5Tt4y?A(Ye&g_hGb70I+Vas}5hyH`2)EUXl%HZ!B2dF~2!*27nUl_DJo zr*&z1<6;0o)8Zs<+_|z>s9utOTcGyv>KEyoSb5{L8Tb~sCS(-RPZ%`&fFOL+Ok=N!zsA%KD*JC`ZSbn^2H| zy{&xD+BRMzA9yY}{LBe8jNcn4jH!y7)9huwgf=?;s2~LBMb00o%3PGO_6p8Hy~FU! z;dl@mB<<*rLuPe3;}k(v3vN|wXWCigc^b1PQlFEX^0RRMMJ2or+$tm#sh89=sLjiC zJrggUz(J+*&BEKm86!zZ?aN;&EveF!L2}7KvP6(f-odGz zw`?zF@Lj7oH}#=&KoKWO5L^KAAL2>h;%u|-z~zUXgqAf){sO6$IwhmZZdEz#urj)N zis1x*5H0~JLo^9zN2yH*%<{(;RvhS4v?CogPf1Y_pk-X(T%};GFMkBS7%m^FMJ|+< zmR>_VQpdtNd@Lq7?(qdn1QD6{6;#&l;gG$28~zd8OQaLod`rCZw$a)T2fpXulzBBI zuaKv1>Sn(@+flmwtM=5Df=>7jxUa}Kl9sSm(?y74DxWhkHI^nMFWt6gwSk-*mtabM zm+y%4F%BwzR&XqY8=uwB1A&3V4H*)&Wnw?i=cR2aJWiXD7;>*Q@D7#=CrNl~!VAMG zAX3Olkyre?|7-J-%D1>QC$bo8;UwEnuWoXb74*#atFKmehu;XNgXkk7sVaPe;<2`w zK4xlsnZcr@fFT`+T7pAIMlouFX5x?ozXi@5aX@a-X_?xtk_SGyh1 z?iJ2xOA*w3a|GTKP6<=wzDRI(w6Xr|_|G z8pv zqZnUeo1%(tJE3Tun&r}PitED4fe?yOdddV>IsDQ!`4WEVT5Bc0`p7q2E>3wRNYvJg z$ksWX7IAE8qcSUwRBH>B-u(t|1=kP13uE0ed&y2)v9eCxR9erpoZOh~uDYlT&OGK_ zlXm8n*;zuI1hE5d6#f{-*$N7Ezxw|1-TYcN`g+Js3yK+@oo_8?NKTLMtzot(7{X`4 zaUldNTn7OP@pl!*hYBB-1zxbWG) z#Uq?c*N*H;u3?)4*AATiN)|j;dfs--Y1o`D-!s)WBrvCQ=$;$=AJ}3<627MH-M8i( z?KY?HIcCl&ZDfg&iC=k6W{fN~jt7o>9_Fd=5^&}4Iv8K872IR02;99cU+b*4Tqc+8 zt^^&jF80&fQQ8$KMP16QakTM4yU(3kkd>E0Y98NkJMGDX-+(tujKe2cX9GBjfma1>+@ zA`xJh)oZ&+Wh6~x*~){CY^5eEDGVs%Sa~yN9fxO09WV{jJA+D|m3uz}LY3L83YD=LO~6>T2O6`mE_g zdZZqcyMs1)Cp&mtHECt^_df7+_#RkZj7Qe4Ehn#`uzOK8aP&;78zH_-Y*1Q? z-1kbg{0C)}Uo!&!BAg>~3E?g?lqYojMA+t=^yiyc{If(Yvz0o?rbBw$M^Df+y$b&b z&J%f!(8%<8`%rSTaMUt^O9ZioNqa($4~ z*ncG1C;e6l6#lFK4i!kcl+w^7Zm7hIc>yj1`G>q>%39_9B>Tuwt2+5728H&pk_YlA zNBKWmRPSV7r5$yGpMpD$C}3_(~;-+@?wSZYcWId9 z{j$M8H@3L{(#V_sJ=88fbVHC2Jt3SRl7iHPccr5f4Wk}NVWzcR=1(WtA4$DiaWdpV zD>Gu9Io%q5H=G($fbbJes?|f%Jo2wz7^-uRUgxV5d^4*>`;|VyXJ}OGn6e-K0Nm1h zeT391s#a%e5Au}ha4t{0t>H^&U#-Qy&XAzd6&z}$NfJ-t|AX^J+7OD?VixnIW*Hls z?Q6n1dY1sd+=^u5Q%|g4auwDe)j#k9{vlj6@*BA#kWOb8So^7OSLcLcbpY4>M@%XY zJtjcoquOtl!LbP$eu8L(JB09IZkHx)Y@Fv*+at`(Cy~aiFWz*c85&;Zyqi3iqZ1=? zY}^_CBWy9E2|w7YUBvO`Zom!2iSwLV^Hz7*HyHh3B}Q3kC+V;TmUqO#|ASqstDE8H zU!0R*R&7afykgw+DqCfW>KgT4ZkygBbK~UH@+}+a=irxKM+5vJjL+T~tEF~Wj;4DE zQ~2vhM#mB@pLS>uURL{QK&=Z>fBhO>dFge)-@(XB5#YB7{Nbs?$)vCN$VNJF#MxzU z^je>srhD2cUH-xg@YZls@Gmi1FB;R>sgf^GPFqvH}^TekEV)Q$|1 zL;T~0JW&ikHbKWj9!Z{V&0FKVb;1~=@J%T@+%HAL*TG_WNi2iEy<(??qFbWmlyE7Z zfj0e-z;fw!vb!7GZg&wcs`;so|KLYqm)^q~_;E+)S+B=Yjye|CJA8uZ%0^1Ui*~C* zf@7MccDD6e1U`Y6g7b&B$9O_Sa$|~$azC%Ly!aOnv-xzV-t>m;Qd?CzF6N(FLpB?N z-w3xCJ`7|1vFbPa>=)fvQNsgujoyFuNZTaDHX|`(q^EFK&&@i6Ly#as;PT;@YH^Wz zsi)gBvVV|y$O|?`xKn;h!NP7KIntNHZ|TC8Ps~-|kHOuAFT$92I#Q)~)3*;-;-f;% z3)ob*#_xIM*Q6)Plfcj7Fdf=w0bc>v249YG?WKCY_$|eyy2J4;Ul0@m)H++M>X`shxEx@{d9fyeHgJAG8`{mr=II7k;b6Jx&_SADR+* zjKFtdXWNGPzg1li5E$6q@)v$ToH4vDM!HrX(?roLhF!)+g6cXJe`B*(#Xb5PQ&S1P zroxNL>B>UrVc@*r127JGIg7UMeHt?~`_C=VhV3dxuvh|r&`fC8R?_R+2M%?az&FF~ zh2M?w9>0!6YQBWWHs+N#wGu?pytQlpoz5>Y;xD?q&**oVhr&<5<--?Zw938ZA#|T4 z&Lfa2o@p&m#$0{Z+hIC>`2#BdHYx8O`1C25cwi-!&AHq#XL>TKL z_$K&T__sb=-8oO*-D#%zaF-#&oN;xB*FG`-6>jclO-YIzCT803x$qR zI&8a4YI%a!Sh1`h_OZlYE|c+aj_`OO;T+9?uYmspztr0lvQKL%I*8^pWSz{;jDD|s zaycW??(ww?9Kq2i^yf~x+@tXc4B*W``%t9(C8}SSrkwOZwzk- zuMh9Ru+1|u2|g(yI^KPJrE2E%&F~D#q90e~+#X&{v9}FN8k{p9wG6dO>0LIfg5{P{x=JG3%r|*lZTW+bk4lwKiSE zs8EjqKM22cE|kD48=jFEU*EgI(Lt=eYuZ64JVnPgylrdf$&&Ief#zFId*C@xv+LoX z!b|5Jb90RCGFfNNV7PByePx}e$P`=A8%NUEp>uOmq0uMd*TeV0zkzT5pu%(|Zid{- zv~X%qI6TdH)F73_c6CIUOt9NK@{YWK-weNWPJf5Djkjz_SZ&z&Z`6X;R4rc9+v(wU zj#@l9ohDa3bBe+E1b+~o4VEA8gRo#QoSP)u-@5D9*VpZSyQ0!OxYx@0Vtp{(KYjbw zII<|_b9ga$CHM)q+{H|XLnf6$=P#Qa*(GVbJot6A^Q61v%hX3gZ`n*0;eW$x!7ufS zD{p$WnGpG2PHx|J?9+|9!M?uc=f|k1xE}${?E^Ayv6k>+sPVS&-tZ?veXU*B6-Y;x z@;WKG?Ks0OL3oXHkDS-uGibHnZD)2Eycv85d^CK6h<4xyb*~jisf_$nn%csXkDEE$ znAIBYgG$#Xo^PJWg%5(?4}T0^+nO5sS6P@AH!E)P&5f%guBR}?{B8`x+3ly@nTO?P z%Heb2m-_A-@Dysi$C1IosF!U5PJz0Y6bxI;n8IZw?`U;RpW~aKC=SElhQAJ95C3m; zkW)C;VGo5dL3RI zJ%k*ddpUfjE{7QPZg|s0ZP$}!TeGTHzH4GX`@P92VSnU?6-sBTTjBNKmwvW}@Z9v! zymx1{g53T~$vh*Hu~PWkF+R%P0LoTvOOC0$T=#$Q-te2@H^Hytac@3g%AKrS_lYF* zSF9)?pk|0ui_`uO^E}bQ7kX{AIQAXjli)+(m)AdE8(vCHRab~!bv}80{NotP%adfT zZD&v4q?Q`zeRYCA41Wzi34Zv%tV#jL{nvjY^$$EL39;k|S1uUxcCbhpYMgEV)Q6DT3tbx+x;jb@ zVk;kPJoJ`fyDBfR^lE6t{a$!o^eE~}7AfLl?C@NE{89mhIb+frG*tLG_*jC7?aEkN zs#$nuZ_PaX7I+8PPQ)eZdWfJaqu9{0XVmzIbF3H>2+M1!^?oI;;YLeo>w&H zK@S^Zdj(HJ5Ag}!2A&zmN_*R*yW3HR?jp9zgX-myc9JpVJji?5hOKOxH1ZpM1N>k3 z&G2O-qM`wQek&h-mX{B<6m}uSiHdsMWn&K&S1Ji39=eD~62wk;A=qsQ?N}azObRUR zDD0xH&m##GS_pFHS-Xgca$B0%m{!KQfFsbzN|Ve`0-_l zm+a)y(mEkUE1S~s?eI_GonQ|jBI_M0ehHqoZZv3Z`pUECZES~Wy2*u){dOO;V!9&K zuOESb55M%>R}oSBt#J`sZMptZUou7t_g^>^aY!{l?VP8ta`v&mg?9q3z_X+0I1T#* zSsABWBj6?|XlrsW+R3Z)$5}A}H^w?&g9xS0WnOnOQeVRxz~6^$KrBodCT@k7FXe>Z zyC9M#&bx)NUHG`rMV*TQ&m8F10&x@YVeoCR-3Z;P{%wMZJpH#PcOcEj-9m(e#3g=U zD@B!uZH<~u);sxS*#CiFTDN^cyv0t0jBr~r=BCWQu?KliMVXVcxHfK!bgt1arHX!4 zH-&!$&x>p3AH17Mc1YNK<}Z`>1oyz2!lVA5!qOfqA09N9ZTKlgoX8G@AA(nb7ekm= zuBQeD2Op8F<=pNmyxpXb>X+e8X_)*7F4lNI+a7vE^oPNWGch1sMy_0 zMBZ(4V40VU(DhgF+u#qvnj?WBh9@?M#5P}){Sz7cehe4|?Cx$*dK z_~YNBtkL| zwY0dhbvMm*Mz%C-<@r$FIfkG|yv%S>Uc~V;=U=Xb{|-L^yB|sW8t~+F1cPzCu+;Y2 zHM;GJN1+UzZKe_5Wv_Duq?DE#!}FpiSc7Xa3tqqEpoDB;CgarWnPYGEP*?mIA>Xb% zD*rSzwR2@o@z<09cnx@W`2XN--G?{?@@qT=3<@ucT6jx@m>dkz<$17u56MF&ZI6U# z8hj{x6#UYAW)!UK%F?azGe6{3Y(&fSI8W|;|4}%l@UpXeXz^J;(X;Sp;Sa+ejeVzHO|0a{$UVfWo7iG?@7Z$@{c?y0ZKB<$Efaqrs6r& z;q~DuxP~S0m@W&$uQ$X)xdKb|BF!^QvV({gd!c{Bon7*e6GPVO4ZFbm!OO#I!QX9X z@#k`+h@Ce&&L+w>+r(Ziq}H)pOjl%lm@~aCM*lNZ2I43X9nX^v5+#m+YAv**V!_n zXsNE;bylVW{xy6Ud?GwaDM4cSDY}NWXAlQ%n>pbnkr|@Jd6wDfWW)26M`bfr2K^p< z8hj3X2%}KyO-ZXsH`EKq1LV-sEOqx z-+FeosN}^_#c??&H+Tv31UB$?@DnKW_amN0um}ea=!FQg2S_%E58F@9q1T&IL z{1E&q_z?JTcx_+16k->JnMYPh(a18T=saDdYjxL9`Q`hv4~RaFs)sj%-wU4sU$FK| z6Gc($u#Qs{8Q)(~`0+hf_R2Lwr?zVwY2}%+FPJg%GjIu^31ZxwGNb< zj_qwUHBI?4d5UhCQhHQ^tD(9a`g8d6@NMw(34->zN7q^K@(g#@?Me&lr&No4VDWoT zpUK+op!w9o1wI@89{euaJpYBCdC4JTH?whG%8UDW3)Oi}^ONPPbogTU z7x2rF9INhu`$GF@BC+eu$&Ly(oh5>lC<8&WF=etTz4dg*1Ndj~y|C+$H1>9#^ns@z zO_KiV3q6k#y=#+0{lL>**S=0bJW}?tb07Qw{3xtDGA&B#c=gSy&Cg>~&iNu{?~pv@ z^df(~^Y4#5+GEVtMT$Ju;qYvjqxZqrvc~3{lIJNy{PV6qH0KBYO0KOZ`)boZ&>BdS zPg@hz;rYb|QU{Sn*HP#dffIUTJ~YwkaLSk9rE;G-=^ zErkz&J&ZU7(KFsUh;R*0j=5`&6SGx=v>~g9YYz}lGuhmXxsMsaUxiPAy^LtbJafMI zrq^eW@})~PlC_m_Q`fn#Zra^nB*?p|+O)XV7rqI;5VjV1X+9EAFRatn<@MFER$+x> zhNgjNjmnFbGVbY>RDQ;j@Hn~G_M-|RCRCJ&yQa7r+AqH#4j zO^9df-h&rH&$P79vAE<56bV;C^MimcZL7%&8MOzV1ufldh0X^Um;2Z@+M9oXH-T@3 z6+&IgVaor$CSf}4)9pUusKIm!X`m-mjN_@BZAqAH$gaYa*{3!ft_-RY02|2cr zL&Wv@hSaqmUUO7Y36T`hHA-3ryLn>^`qkmH;W=O}5c7hzSiSXe!rA#2in#-EL`%J0 zE!Ew9+Ee#E`6^%LWoP(%@KUfrNKLS2YvVjyw=sYE`de9D%l@vt%;36H6ZIzDna-w} zWwaN*4PF~I9kF+Kn5{o7xOcmg`07ZRLt~19yYmNl&UdbNk8@QYxOwU{{2zEn*z-%z zuRYbQY(5<-R945VS(eJf(sLGk7FGM6TF>-a)N@U)hZjZ9v>EmlvW4RlrBO>pleV4j zlj-|5BU1%#3Uzg~EoJh4HnWzNvjDFQ9|QXxA^pr&Jw;qNbWZZFrVKT&<*MSLzyp_z zwNgb?#2Zhw_p2ykKLNfFmOve=7jr+-?6B42Vt{p-o=`9K9i5pGv&zSUNca_I^ZRqO z9sCaXJMa?lXBf3@Sy2iP*Xf8B8s}FT-F37RXRZ}H@&5R_@iW$;89U(f;Tz!B!KZ9* zl0SL;^o3oHfpkQ>J z8uQ4y^NK&-Ske;IPDX7$^?tFl0=^rb4K@;~4`l2=-n}dMJbSc!$Z>lflN5pYZQh!u z$C!WOikkwC48W7o1IfasA|lr%m#h1eeCN5#o6oCINxAoQ)@Oy0XmUix!}_3T6@DeG z>)_YJo9qh?E!eUmmtYz5Yu@R#71`jk_%8P&%n z2?xG=B-&a7fBwNS|I+BW13%)AM`Q|f8jF~~3!}%XfscSsaNE`5Nze3T@^f%nHdND% zTVjv7-8Y)fW#cgHZqT6afY*m_g+C0>nryxLv(<57aUUoDxYpQ|LvC`(yjt_`4K)6k zRBEJX0lY2z0DLWcTest%g+IXpqv=K=v>>_mDQO}5UK#T@&+JI*0<8KKPvCdLFZFQW z;HOsR$55tB^L2$ouDqpF`Lh&X68`%#oa1zCQd+InXO6%Zz*AtUsACSG{3I zeXM1Ls3m8sPUlAI8$I|gcrAD{`0?~Hjk>m%tD0%s$reLC8?2{C2R|nqEkE`0Z$O?w z^-W*+ad=z!t?(xN^yobO)zeD<1n*`V#5{8(+CS?WoE5ttkmEd+U(1yZFN~fn5Iz&0 zZNi?LHkq8vWHTAqC2GVl_Zr(~rfw?IU9Cg-<kPU6J<5BWn5B(w4yPo?O}p=AG{y@ZunMsR&u=U+#=g(2{TDXxVeg~DSfYG2eC6a zWWkSRJxUgxARV9B(3X9VGP(CpApO`Y{ z9QX_HYN$(pOx_$W)Em%y{~$(XrHpISmjdCjIo-9|0yKvXT3%E5M);+k^#S}+-467P z(lj|nHQu+6^-_-2VKn3MMO$!sdgkNoS?{Zv3_VZykMMQyY49B*uP7gy=NddJ?>e+o zKHXvMB_7ch1g7d)lo!3e9GmyTFWrFK34ag%lXjccnD0RUi^XQoUe+ADlOQoE-oQFp0xtDW$Zr;=6lUk0sE!FdC}wC`vIuU<(?*%I+rI)`hJ!KtH? zDpr=T!Q>+GR^`3n@h96n|2V>D!rQ|Kz^6q8bp3b4uQh`&#%S^W%&^R1A*NIb&9YJY z*%%QOqnHSP7JdtSI(+b#<(gFGPA+Dx!HQY`XNm$_!XjDK6vJ4()~V1$oq?qUT?uT`CQXi@E_q1!8gGR{m}LxxpwJf z+B@w}d9YWUeN6gHhsv@V(cb0fkFcat$KW~8vz~|l3ZLXZUa&UXMrYr~y^q{B3)ZSA}KNH(k$&E?vI7P4F@B zf8nj*2h@k2F-RgL#um2@-zf@1ZmjB5vwwS^e3NgGD!W|Ba@)zhhpW{`<{3DNA z4L=2cfai}@`-hL`+LRYq!kS6hL;$Y@wi$?x+p4q3P7jwE!>@*4x)w!HXU1~H^$SIR z4Nz2k$^{pw7thIQ{aL@_y_Rb@ZBrf{A|?PIPyxax7}%`6Jhq9Pj{*JcSo-4 z9G~#*qm;rQhhN&4O@tq%Oq^C<-e*~Dmi3Nv!-Mj!L^VMxw!*)qX3j;{_LHH_@DJgK z;ZMP*SZ`_0=C5ZAlnuGWNHsZDJy2!z-MhxSQ8y}XH%%G;zZyRIKOxx$v;0-8o{d3|#_I-3gC>sgH&&PWHC{2QD_{OI0)7kp z(%j$*-?zv*_sU9{*M@Y4<*~rpIaKM!5O>XVm~8xac4))VgK6+t@Nw`X_kGvn+!bN1l+r1s@4HLTV;o5Yl)QyyD(%;#m+Kh@1)6O_uz3^;V7Ve;0_I$)8-%q|hf^y9iUKBk#7tVco_@`^q zJZOBkW0Uugj-{RO_2O9iroEhzR(#@GIbr;m!Wjb7uBsxOdDr&Yr#9 z)OUi-;nLxe_(BCw_hyeSi%*&G{_uwI0q|!mxXu>+ay%^ktNrkQjLdBI?A;NY&FC$* zfz?~HEgx??4}SpOAAUEyeSS%0mK$$jIDc%&{xf_2P7i;0+phLDY{P(@Cx{c z@X7G`@NUhU8>G)55O10--EwO`C#AC z%p7xN`j$*O$9e1dhZNWZ$7Pf*yPz$9?#bjdJQ+RuBltJ)r@L;5w&%`XUbrlm$>uS^ zl-iPQm*IM1M0&!uKw8y;C8&<`621d|9KL?rTjRBU8`)M4mX^91(XD$5J$gCbv=s%f zi#KJ9lhAG#c{ie4_tKhAm;^Z@qRqt!x7o1yP;NRk0WP1{8ji>@ay3t2cNaf z>+Nd(VYbQX#oSqnqJL8tOEblcajRdTTX#O`6ns6rK70hc&Ztb=j=4qu(9&Cr^UE2= zPC~=W4moA;+jC!2N#TAnSP9<`?+1SnKF3sW_3o*JhF&wjBdhnAC%hG$WiQfK7)=%S z__tv7Xuc1gjGjIY{uX?F_q5sF0FU9B+Pm*Jct^!pDE=mDn-ZJe(BcKy8`|vI*WeyJ z_)Pc?`1HIlG%B6bgn6HXSwE_+<4fZ(Nt33fXq%=r7Co4}LWehiKM6ky&u1+x9L+s- z_<(;}glC1Z;21fDGTcL4co2AJK_WJVEgIeh{sBBE>W0S}8j1JsW2@Rn^!nhp_t_6d z)iN@An5(6^AF>~A9@>5yJ{GGWWH6+QDL)1UBeGU8q{51R^ ze9;9qea693&C1iCJw*2E5%ul&gxr6X8LPhA%BmThJm>%~gEh%A%q8>ik5_3s4m^y1 zuNrstaS(&^O!i2R--*c7)~P_Fb9QHc(1PJj;n%~}zaHmi?OT zp2!Gm8|bxcj(Qlsd8Uz(cdE}e+iGgdH+WI3FWo$QvVBw{q@cI|x8sQC4l6!BzFB$SZQthJYiJ$W**W9@e+!-tZwbFF<*bjF ztp(fYIZ9)*ZIEqp=>0(UQxs|6{=fcy+k3mi;CtbH;e+8{G-wwW9zQyj%$<1b?K1`6 z7a5HqJwH5#=N&bBWDk$#WWmqC?}0xAe~+`O_mx&beaHAw+{DZEJItsFDfuKVBo{fso-3jJln#-@H0T}T#Jk(!g4oY#B>uMdA6 zz8l`GLo7)mWwL;!G5LLdpV6j$jN~p2KUPOc^sSMwiDQw2@ILTW@EoWcRr5_74stH` z3_kSsTSubZ>2);QGt8*ehPh=`IYbxvM{E8pPg%c3arO5=JNQBP zr8U?c_?N0LW);&WZ}*m5%FNMt^@^aCoNW%Tq5Mt`T9oNE(2j*4hqr-$1Ajx0{a)9F z9h&5-iC>FPwQGDe#W%i*?PqN(78D!|eSh{iJPm7@aQJ`lMIRLVD;~Sd0OLhD8wv7v4zHRxp za~j?p{v^Bxy#IcWJ@>Z!crw>;spmxA*-;iNV&fNE;c1Q!S@Daxc{|y)u}1;_6n-ZdeC1PL~&{H@q4!JThEW}%UO{DGcxdL@ICMc;eUQ~?j9+*y0Cn( zb13%py9)cwXZm&P$`1SWcTE&Th&|SWzYhNgz7)Qpq{!c_$2MQPR4P@xpvcF4a@%&^ z(^-pNji2jNy}T}Og71Wv##~qfU+Qu$gZk@w#FfSa7i_MrQ{A87xn=#%H&=>p`4K$( zBXq;yC*d97d*R2lMg$D{V?AzLkmtse1okujci^_W|99+xx5S!<0sj-;ciPfnX4j2{ z^i9IN)$Ovsp3{Wu7|+sq*m^d<%MYmvISC&L9|13fdEL*zpL65Xz@6``QLm%Bs)CCX zZ+jTWkdu<9a#u+znT(gg=fG#c)0gUoqjc2xlj;7x7z442oU60GvyvYjU0+Rl?PcUf z$esSx2!9LyJbW0u(x z1fN5dxHPzCQA4QSoN@Pc#0|3B!t}58s(&(`ylJL=JrcmKgBl0_75*x`vU~I%<)6_9 zpKrMEFumR}+pVKYL(a=#!}ZGx8h-quk7VElu_mEnE^L7pWm|7kYUa;~I?~1acHYaO zW9;kCosSKjStjE<*0>rcu7@{(mspyc;fDsxO=SjYg?Ct5+AQC|8CctSK%w+^WhJ|rwela+@<{V$Xrz2nkk1I z8a<|%MKKP^vYgDnnn2TNZH4cGUs@C1haX+17nZ-jk~bk#Ah}ylzM!g50 zb$k~BcG~=epM`%0KM4QHp)AeikNfK1H+SkeeX!!Q=hR9Z7B8DNYN8qqd|24Wr;EKb ztV!D8c`&cv?kRlFDx3SWE;9KO>*sU6DOS%t$#$dr8sa-;Xi+M4s_<*zzrh>B%l0bO z=_r~!BNd5DMcgaW2)y)}RYY|Uo3`;#QnYx%uz~l1XUAOb4qvk?)_9+Y?C$}Z^d;$o zD)*|nOzB-4YFxxGmtMJ1x-4x6d_24wd=z{})3;07J2}j?b$3U1JSk?X` zki$K~g`8)d3x5{g7XBbSV?F1xUl%^NuQ=s2$T(uuziE%`2addmtMb+z1CrM3Ki-C~ zgO7y206$ZGel$Y*e!YG-O}_6hmV&89g_`rp?T-sR_C!T*3i0?&kxm7D+O zd4=)F%gfu3gU%4I@QzIo6+a(l6}8n%W&h2c{qPD{lU##es;eat8#rE6l%5PZ{4qnh zbFglV-+TO2g>Uoxx5_s*{X2U9!PDWJ;s3(#qP!?NJIQ;U&5JZCe%|#?5LHZ2Rix(z z-8tnV>*|FH`E}^Q;D5slp$?cZHH}6e^E8qUR-23m-JSm>Kq#=7|N0caNM-UzF0=b%gFYO<@CGVZwH2vmTt9xMd--Pb3oO<}o;SaYf>9 zlH6yX{bZ8mOleEkUS9r{Q>ra%B5}<4 zJr|EA8*3kRez$?99sC{m$M6pDF-=Wn#ad+_a^HM8&D^EY?bh7X&DKN<*fcNP;XT@7Kf`~4KMg-H zqfs-XmbhTtud7dM9qsvX_*tj@xkR1tLFeJOZ~jpq!?WP$;G5w;kB?L}`LTY99eMYy zAeT!#OmfTg?%u1N7Z0xgTI=g))&{=p<^*R}urW3$k=(eZ9wJ43d1pDMJ1XG2ea9R32lS7+n^)ktoC%#7tu_IE-|UvKa2 zGaW-_%vz6jR_kfWV0bn7EAX%33vWH|m#a2*Ub%*{&u#^4V9vQc_+e}Z|Arm^T8pM# zg7(2X!9R!p13&&r&i_Hew>gu_xNkNSZmx-!PhU3XXVrY3IhQ}X5IT7TJ{GIlT8=cmgCd#lx-zb*iO-t8eetH%2Hpog9zGP_d#jrC?W3j2j6cfZAqBVCv%2md z5{W#1Ml!GC8E4i+awvQ<{5kkU_)jCFr@EchM!VFeHia*=XejG4$7gC?NbHYy_}6L? z_6On5!k5Eefe+U-SQ%aQxi{kb4(jDlxzz5TFCV`e-q|8lyT~f+>i=E>Uj^R{-w0pz z_8TeU$N<-0f0qUEWma*GqD^Z48t$UD&rO{)GIvs+!GD1tgZ~Au6j`8sB7~!8yo-mT ze80nd_w0J9@2>xS7i|9A^WgXU+dc4H=+Ony3-Do{j{Y0MIKG%2vVVTSJ)qRPSn6ln zNqJApO8<3c?Gc`FZKLpd@EY)|;pd&7Ze5{NAj{AdW}Lou>dTed&ly`9t`sNOM3PrN z*_On<9`}*JyThBpH6u;_b8%IH*6*Zs@4dde=x2};8+tx00x zH^WzET?uizRl37^vrWO}{%wW&rFP#xGmJ~Wm85DOo=Xnbgns~k8GdQ+vGP9kmxfw$ zwPktw(#P+`-{d0yBP_EW7hCXYnUqd)9}_hK2G1V-<5lRdDK2PL$Ue7 z)7`3&YugsjbYwjr({TucUx4p|e+hqi+xMAHujx-ZUHvEKzTiJ_m_F>|@@@B6@j5}L zLZ3W~-SDdDiGRV5!*6i^F6Nlc`$xu3-~74O0zPau!Ve2T$2jv5y$sNTz1MJlXURA+|!z;k+!*h%&mC1Ax zV_Tki8fjFj8OcR|72?}AlxUbTHsA8w!sG{h3cN0S0Q_R%4=cVi<5H`XV-JYm6dHT8 z{_=pYSxKJM(bn`v@6j`~4Y*$vo&lc$uejk`v|*Q!XF1Q%hs1LtvyHetwbq#VXokOX zQ_-Ah)G8{D?-0W#s<7&wmPAdM95Q zSBx(#J5ggNKGos;X!yaNeRq$;o4~(^cY$C2zU{?pKj*^O#jttli-y*dyH1h=IHoxc z_rLYb-;yZ*5Iz`w96k=dhiduuQPi(3*^Aw*w%blZ^leib+n&#tQ!DrSbx|U%I^i?n zIng_ugRfej7poQEby~8agVuTCQ_bTb-Qf(sIo~aM;@E*qS)78u2CoQT4KG8?D+oOq zXSX#Jc{VmSynP^X)UhLSd`Ay|lOgry%#@5Fo)dl@{3v|N{YlD>b2GAXk6NeBteE@Zmf!{<&%ts4jGKL71F z*5uj(Z}>@gKX`Zefx$du{`LP#76;yr&AIUxzVv^;DeT0U{nlysk!n6A0 z`C4OrRZfEYljb5#pWv&Y%$76L4Ljji!e4~{1b>D5m5Mx5-%*n}J1IYVSm7R-Q!Qt; z+8Wk1CgoKMVJ~ci+5%q(PheiWP%f`XDLrLUZl}JGE?MeALsJtUcG@|gXdof?+ssbhq$PJ*sw%HiKeLVsbWG~ zjr>vj&c%&67gG*Q>`G^`ZN%^A!rQ{9!l%3&le%`4dt-XpCv9S1>H63D^hV|)Ej>M5 zd-Zkh1L}J4>)>PH&%=+6smve!*&@$*kMis}4<-F~OViK8%%9x<*V4U!Gu{9H1D}mr zjFEJ@E24vNGp5MKNIEK&Q{9oy=L1PPI4o2`t;5~7?oPygmm49v%gwUc6fveG9f+)M zLN+F+%|8F9-}UdhKJB_(kE=f0K6`&&&)56CH*sGg5#LSC-2eamRrp$XjO`4Eqas#5 z{gPW}Q5vN(=|GlYrj~hPVi7e>H;r{4o`rv30RIiXib)YmS~<23MFw04XO5ok{ghYE znGW94%U6^5dF+%1cz5`*d2(HhqcZM_l+wHrC$g|3`BOegr#3fBAr~d}Q8>~Z8pl$P z!oeH?ybfv~CVU>l%F>$7;51p}%UN`{v{o4>CyvtA_tu}&vEm(UDJPUNs5nrX+XvgA{_ZN(|f%J_>6abk<}JfW#;z8ic9d=b1g{D@^jemgVCmS)+eVv>Zq z)V?LcP%HJ{4pz)Uo__qBqwu@oo8Z0SPf)fFtE<}5+W7g3>>XqqCh?nDE!y|@sFoHK z0nz#={2};J_@nRxQqA-c4VT;Wq`>OEG-|4}KJh{#hsd+Rvz73NxGJ$@K@o>;E%xPB|jnwZ6ZslY3*$V z4C@kdB`KdgwJ42LH)1f3v4cAZe+&K_d=mUxv!tG?Qi>)!j&Bxsjnh`W*qw3ICf|~< z_-h5JkS&_O1g|5!E^4<&@X{mLl8Y}&iHj#cWoNiq3dLe!3njCvRp{1XlPK)o3jYA! z68;Um%7V{js?Ed}QdNBX9J^UsvIVx}+SYHkF zC&qoa*YF$hn3+{9Yi1mLU{&kF;r@AF{ID&PW0QZ5$mFMIrR7&HL%zZUjt_Ye;1EKDal`5q^C8bHil*Gz2+T4~U?5B>|h1b(bnL#9%dZW;2vue9@aw@W0KhK7nKSV*Ru z$(eU7p6I0x{SN;fz6bs~ye!%Hl%7G4kl6CdQ;uCoO0&2{n(n$vGYYqsLf0*N0pAO6 zfZB}@Z$s!ws9c`yC7HGEg|4+1@CD@sWKXGxs zi&R(TcIM>w{XIqIX6LQPFGHOHPadl!;XiR&ogc-~C0wDIs8A`a%;lRK_H`)qG<4cc zgRWLF8{Pum6rPOiOcka}s&a(s9o=1(#-|-FCASKnIVh88afwf?bgRwf2jS<#uY$LL z&tO;-W|D5plm-;-+7^M-rmctQaVaDz=-;ho?U;g7>}DRQglF7yP7`6Z3|-)AcF;URpp{@O~MwIFnEGD8Cbz6qOR0HK)Ul&0&Z-yC3ow!TZ8%p*9?V7g(h9wX)*t zDgPR#TQga0MY6n;N^Cj1v?AGr<5 zMw4YN&auib&E!i465osM3cBcK)6~h#!@dblcd|( zvG82@T6iXW=4k1piM|vz&0_LHdBo|}KMwqL&x+Y@=>Fco?cJS3*L3*X@IT-K;LDP= zsuj6ud8gQnE=i7{vMHTynx)HSoGp7?c(CAaKMDL;&vznf$&+JQ)nB)D{)M*qQMPVI zmXJb7^%Qm@i|U@t2?MQkv&wE2d;z>8d@6h_mq*a^)wA0TvdEr$ez&UNq?~6`p9@98 zHjYSRS$RMxT59yupH=M=fG`jTCHx^Tw+|7<$L;Tzz2@C%ULva)isN-Wxo z_P^Qh&reIlHZ%qYcV}m5tb)uuZcYDtK{9+NycC`dKV#?t$FJq{?Ug2M1(DoQM32wP ztd;ZRHkK(i7JXgH9QZ-_KKKLh1uXLdjYJm9QP|}>o5`x<>W1BfeaC6t7G{N)Y}h>? z;MMSks6B7Pr#ssW3G{@FKG6)(Ah(1}6R73YwS7WCC&zeV_F%rwDtuNj?)(a01J82H zPH$5Q7<@6CZ6MF-oFAVZ`=4fa5v4XsRmf#f7Ql~#_k$mWFV%I(R4w|R9MiGdj&OG1 z^>jYX<;(MP>vHnUR5D$@7d#C<0p1$f?TJA0wDk_Vvg>pvqphgQjFLaKHrt$QTVHMY-kzz~ zD;iSCd!C%`DIJ)=Nv+CF5x2*_hPQ+_MeW%!c0Q#Y98RTkWt(?0GhZmua~2XZidOAV zVH!c5QQIS;tj6<%XTeWF)*Dg~JbTS(65YIBPtB@Kln(Mk+CIxf<|H?shFV}{1@8dA z34S&FRO^d72D)<(=(Y?aq|47OpAT=VTU?yg z$-FeeOV7@-d=Ss?5^?3!wck5RUs+YhQ{6Q1yWu79b?~J$&D6fcT63PIq0G?MrO(Wq z#_!3Y>6h?q2m=RQGy63d`{A+q5a}m8yQAB7C^=8)%pD!1h-(QObjjrw*?D?$?3*sk zVurvUgr}p%nuqL3v0hu8#A$SSJs18Vd24`qaR~l4d>i~^WL3E&iQdesCR(Rk zifu(#b8QM|7ci|{VxGOv$4z0_Qz6mTvwc+|u$_^~;* zu^Q+OJef4d*_ufol)si<$h59%KH&F~`l_|wcM?2cTP zw2QpIG;+UBn`oL!uP=E!|QqGOS8rcC8fy*dY28{i#I0o zu6{W(t<9unnE1SZ<-}R*aG!=ByJx(C?;s>ur2OjtlAT5LkXF}}!c)$C_Hff3d7JW; zaHyKO44#U*&=y`BS(RtjM3IeSk>p%1v5G!gNj@{DeDa=r>hFa!IEjfVou}a^!>@v$ z3vX4xai$A|&YlOEY)-`|7JVze@0>h~a-xG3)m70xBLg0x@BgyJX81RZB)J2jCp0H( z6TJe#+AGGg(!LD0M2qm~9(&;kTLS+pd=&ouWcYK=Q3ln;c3yAhkqmaY9z-Pcefv3q!)0I|d>~d+H!g4-CZ-_{?OFB<2;8|6Q#Cep(zpTe~ z3_tcBH^V!Y`NG+ls?1>SK!taLNi{l^oR!hf> zLE9)MLz1JiXgQi=E%yl&a^+0R$wPeBkji?_2F!WEp2hJWd|TU}ZIXEP6LCtHR9wVg z+@Ve<73Nwwcgp3QX1*%d3w|u;Ji;***;cKtCC6G4Oh$pmS|H$z*E2uDwB+$!)r_b` zttzG2VfZN6avU4rDNGKTo!*hA*Tqn(yUYjW5_x=2A(JPrV&~MAKIyv&9}C-t<2<~W zrta@g*P~6Sl<+7%tU`jYImmFO6QnP%?6Rxr#7p?|u>Cj$@Hy0xjC8_`Z`{ff+{l(& znz3v9y6O3v(xPj|c5XReyWlUvPDfVgfftRaI=W@bJf1a0T`T1k6s5B{$qvqxM_g8p zn4Tk;=!^Y);9cP@kZl(!80t(;Pi>-z?`-{^gO zCB1;8ROi5_!I#1hz!M5;GTVA!;X%b#)&5CTri@!4%}@&ZEzPv;ViUPmAK}O5$eZ9z zkyVNOO7ZC=qR%@;+CjQRD7hBW7$Wx9dm;h-?FEWDUds=kLwF_pM)<4lX==FBt>Nv-Om6byCA?BI*wjgT!8;WZ3}xlM>+QK|A+F;i`N`jWPWC@0It zUAt{Tb>rl-Ho=cwJJ#?;@G>RoXgtyRP?Ew9?-XWxr|9vA#E+f{I0sqE`SaALqv31e z$L2YIjGce%6Y9ktiCC?dOM1mCR9{+qD%UN=Qm@NtK+Ote3hu)ip*3LSGS$jRt4Vzp8)R*f4GFqsWmrPDC^+RxE3jHxyc41 zI!_{g|7d#g}PN?_U(V|4budWZbVsQ0shAPzsR1uA4EMqI|gbu&@ z0B?Tnx2%w<_TKA+DSEAScW_m_wzaq976vo(}&Dvd(h&@>*+$haV*D%(m)+tgP&N?{Z{0v3+k?m(mi5 z>Z+maZ1`#L-teLDCd#fCwv%$LXa)A^d};Cwi@2V&V(W8H7Z=}Rv+h$Rb@21yPr_e@ zFUd&P%@dX+{;F{hNt_v;Vs!~YB=+%I|F?y?gHyg>5S{^l3tj;4aCDSHC{@fTsq~eP z9kcc-k)kFDm9%#FKbIA}Zna4O)^))@g4aPd?B)}^3!Y_rdfL{VnsBQR|BP4#P`c*X)4F*BGDmKI7NNpxp;1pISUJt(({sVj> z{AiUMt=rAU%7)1zhEf(5N>QffQVXwA#2O`E*;E(mr~hgW@i2@m91-C z-*3%Tv^Wq-N0pek$#_CTA$QJRdSe zAn9;XzJ=ckKL_3t*=_H5sqlono6%#DBdxCPoG5WnF#G8$EoCOV(o$gAObUd7=fH1* zr+Dh(S@w<6h0>~&l3qDo(b0-lt33B_xsozsIkW)&ApB#Vy_qQ&BF0>iE%G3rQ<0xz+B6 zc@!yKT`HneXNkn9Jy_#3J?p>(HbeB+j! z8J8!{+=%fOz8_u+ugx!GWCYBZPg!%~NO8`U*m#DK@tlb^G_pD2z_?n;-T?muo`&q$ z1F!eHvV$s$eb&cpc^s7A&(t3|M?tsW+P$QcYoD^=bv*n__&M;#$X1ghw7X|0WO-*u zZ*x33dJ7CMWPBa1;g1-Lmp&J}x0K|4EQrUje@rUOIb;y@;Y;5T?!9>vV55!|?bVJKmzO-`n~cr;eG*M1s?#3W_)qY0@M1k0cLLrv(-1!iF_1&!uc?_xG|J0n3wLZN zPR}ad3a^8)q#ynv{QE)u7Y56n+*uUin<$cpL{cY~()*iZ^>yqHocA#*y8>?puZFLO zUv4V!sf>+viJf1}OEu??WM+^H9A}x5Xfp(!;{)4$p29oAPev9eA-g%&PWjXlIvR%G zR1;Vi!cnhqpX0BuWRgN%JwoSt>f3*X4}_lyZwY_g%Tw{%{jwcXbt{qh*RA*5WL~iM z&}7FqLXiS>0?BAI=GNdH;Mc+@BsA((wiuLKGbz_;b;_EuaaxM0e|isnEsTyl))*4% z2A=}&0UrV1YbRawy3hXn(V0L0SN&N(%rKa=&zMCc9r-wu=0B}q>TdWvct7~DY%906 z?DG&)PCAX;5t!s_9=};U&o$wC|Ixd?6|Jp%a(0}DuZ9nWFNOCzafI^a(Qonw>Ddk1 zYX1)7ALl*4czwL!ztGO(kDW2sGT?{cFTgj!ht&US%3K+#)uJ!k6&fiN^eX}yV(!l@ zYl{6JwXp72`xo$IYgTT+k4M(iYJVe~w7V%hQvcP=EA+t6X5kOjN4x^_gLWDezPhY+ zvJ-wO{NL~k;N@o(&E%t(yXv`xHR0vPI{T&zr^iINhhGkea4YBapP3kh@0Y=s!mox` zDB@kz5j9&THO#J0>U`-1sa?g)P#{w;hM{4j4JWmjORkQzI0 zXq;7S{;$&dy{o0yT&tGGRgWxwa&(TsejRWnI*#ja!3^bjiOw^#Aj9*;Utx zqj~T$coXzCKERL8tyEL@_e&SfT`3)=6)o6Lbgm1ZVJ7gt@FKL;VTJS^yb9h9o`h_8 zNWObp@;g>SNTTLY!k(CX^>Oxdi~mffjQ+Cr`L{J{j~;kaT>Gow7r-|U-#4sk?|7v# z+8%kN=c0G?_GcQS%>QC`ElP=Rd`dY=4Mu*1A6qx!1OF-@bxL0=5yJkJ?XtPdl=b@5 zl<}HCQ@`35?&JH)Rh0|ix4@6}ilX5skRQ!AJSac0`iSn#bHTaC$u+l%Zi`dxyi>OJ zOnaj*Uk!f(J{A5R{2l!n3%@M2uc=^DxbCjw6x<1wl!khTiCga}`>70);ygSLJ_o)O zerzq`GM}$%wY#2H)M8TF8SRvAy2SK;J^gM!&5`z11fLFH4BrM%QVXZd7kauHY@zm@ zArx|=h5M53EG3!s5$YUWM(Y544tzDdF|wqPMjoP7cRSO*dzuay&`5M8#Z1&2nkcm4 zmYW$oNORhP`xpEdct>~!o7;PQtqm#VRiTAQ?A>a>=%Ee$8f9Xv(f{$ZVZ3lZdq64o3(ob`JI%rrEn@9SD>M`D z^6K0~1|b;3;a%Z};mg;&x>2q;Gv3~iMo~Qe$gyyKetjdwy~ zyw=-gx206+yqT_nui-P`6W|l!x2BYa?f5P+ZP9X;8)yih6r`)8#u8a?LO|Jw|smdQGVA_C*7*M$HfxUh*Ix(g?yI|$2jC}iaG!&>MpkWE)=ccn?@GOuSx@s`(kpnq!sC+G zV6Ov_;PyWJVx^D=Zw;@24}f3l<8|cz+tj*?ddXp4GZhWKy;o9eF>Ok(OlKX59SK%vXDJbg%b>ZRiicFM#L6pS)9M{Iz3M{?xM# z(I1uRQzr*#h4=(+KfcT3oY%iX^|1x;rSP8c@8ECFvGctiDQOCkhj{({bTGkxP3o7Q z+xGcgv%K(2%eUj3UEzD+gW-qZdv2_8N+FdEM(?U?o+kb0U2Ef~xqDVShj?6VoMg0a z&TtU?bUfE1@V_9dPIAke^*uhOWepKfAFObT{Uy^x5E<5Zqks42K>weoM2F$M;LpOZ zfaj4Y%}@RqeA&B2x@%F`jncj=_(SlM!(G2FpR317 zsd(|Cp?=VGROK7I{Dot9;|b}gZ?6tN1O9*TneeIbCMCakecl_GH{rV?Hvg-1$(6>s zipZ3^pX>X7(>IGHq6F~&!as+vhId%f;kD~$VE1$9^~h zUUASD_!f9<%1=_j+Ytf5)$bMsJe+bmd$dw2+T0?QQjT07YIoY;%wF#E?+`oz-wtn% z?DxADn~LG5VioPQt21Oz=-3^K1JO9MG>Wp53)<2zrlDjb3EOACz^ODCOQ zbM={s*)V>*mKXQ3k+jt4#UcNhN4g_9(#7yzuvGLWPr>&$>Ugf~?`qVEwW3d27$SS) zA7&F>=G7<&8$DLx_~}{zd>H&B_`l#^UkLo^rw+EDY$?AT?B%iP=eN&6I??8K_)k$F;w4^i{%-m@@r_e@@b#a0J0oix&|&@Cx{4@UxITCu399O?{67 zeOd;y257nFOWoe79;>WT1`0 z$;T=nFk;uY$gtqpLU~Oy{5sf!IQGCt|7{gf9Af`=|0$Dzt{R)HhFvz%jXF+SqGLWP zBT^R*bi>EMCgDhdk92n(-L!tY3F1VEyK?x`u#?G6p>6;Frd1dd*H&x!PT28(b7Z7H z;B(Sc4cEVK+dyQg+_O-uO7Z}&q!>%87DItTt8Eaufn@8A!|6d&V^ z7`Zn1uMSjgXZkk~;cr*Jb6Y1~7t-@fU5hIVp5%!>BRmCJ@<(WZtMTV6_iF8l7Q>ul zo^_8;?)mLqiI-`Va#3tZWmN$D9Qa0fJNU<=9Gdd^^TsLd%0<;1p9N{BmKk04eH*0C zHkF+o3K={KzZAY5eg}MYNZ>$5gVZ%7?O#R8i?2%2^5O*szmXbedHMIO^O)&(1^yuX zDEvA2{pXBn*MqOeUB4URWB+=~_IVvaojFAtZf+c}GiA!C@3dU_TkyZ&xC>7kwomrj z928>bs@xP(;WDWG0lU_GqSUP$tyV0xUoCkHUktkd$18Y^kJQgUs_~H@L+M|~cJr+{ z)VB5pGs5EiKOs)UU9&xMcp0oSjsf^u8vo4u@6qKA-A*1vbLx-}hqSIjyDW8l*W~Jz zuRgJicjEO#-*y$e1+wbK-BpUP`oE9cuWgz6+U*m`|G=Kvu|C_rX&>ItA!xgXZQo$0mS8hw#4<7-a4F5m)Pbq!f`diQZ7TvkAM)liY z&zeIvH~ateH#rjU@%4MtrVUZ>58(fVFNW`K&&n!P-e_u3C`Ov*6`NVm~h% zG^n(RWVD6`VgPsbp*Bt0SfywEpdj!$4oV!@;RT2r(N zcJ3&;1-}SB4n7zD#jwq-g*H($UghuHAo;g@QN|C+!2Ehsd-vD+gFnVU`Su*%AO0r% zJNU(_YrP`!sO07MiTm8Un}2mkcbyV%BK2J9x+8bF<(A#8@Zs=J;D_PU7KU^C*PlzM ziaBW58bzyLb*KKj_G<6X?mseowm8o^qqiGk47@Q>hS@QdL~Di$OzP2BX@=)*2?nODBF@c)CagI~i4O~}nVs3=&JxyWO;;Lc3kwx?i;m@)g$!jQ&_b<{Msg<3D&j)m^p4^Kni{44xCc;C~DwX}7X`WKqa zv)HXwnYVPjBY*fTy=LW{>rXa$(9i!4zX*N^-VOfi{>}5T@7$!6`*H7I87=z#-bbU; z=F^tk&=Lj={j+ECmIC-)@Y9yzz6~Gz@gA}MpMU_FYr^60Ci@!~Y}op8@?@ib7hKel zXm1M%t%1J|KlT~e3%|ZY8Q-0wYcl!Fnj6X(%El{3Q&>}<`ul&0oLQ7RlIGC^&xhXt zp8$Wga=VE~%JMT z1T=A{F8^|8UgFoeem@h`5$|R$hxdcO310)h?a-kKs{$9?(BUpJu{dWq?fc@&apNQn z+g#sLW7n^1@;MC8h0lYR!MAINUJENzZ$9l6`Et{@Q_0?5Z#E2U`jC`mdVBete&6fo z;d9}u;I)F0nFoU|Hbu~k|NkD&U3?$D3BDEH0)GGZ zH>)1rUzxjnRYgWv@9@@XzNdoE(hDz6yKBR4xUqRwFJ*HqK{UUBB|>lZtg8;m6j%O^4qBALd#Alu<`{-T1t!B!6D#i-Y5o z4qrpwIMtc{w%y-{e17>nE?aA%eA)7${rCeii&Z_*6eX_FqTmrkAXmyx-_lcIm?8QU5~}{5sZFRnx1b4Q>&)9RJ^%6Eqm$Yj3htz z9Q>+AtK65zr|JJTY1^h$cw_YOE8)j&!FzW@@p!ZM{p)M)%<@0>#iDWk$BhB3R}Iq> zHyrkDn0ahx5&Tm4ukdE@aolxNVsD)LLmGX`-KNmg~z7(E_ek3|QN0U82Ie2D$+&UHGC!@RUI`Qw~#fnO9*z$^0cn|oe@SS7l zAK-QIO7qJQ_c_nbR13TfH8EPc9)$3#??h^}1FwPq0)99AD|lK6`r&>Xe+FJzleH|q zVI9?JTAi(SgRqk9dSK*_2PY>rUc1;1e-&N^KL_4;x4t^uR9wF^){*MB(7DEUxo5fm z@23gFIZd&r^xel99>7=*-vhr2zTaPUh(%r>?^*7bA)0nt6QGP$l-HepR!!2bMQr2QY9{p{QW0(_I;x5E3wm%(#NvnCqpytwkf&GuM|~?*~i~aMqgl)-qe=LM+y})vN^s}Yw zHhlYOm3s9hd=~sI_yPF!j&U72TeHHlE$QVnX@=}$ud882U)*JO$=r(vs0VQ#@N#&@n4k18fA6fz zCMz9Jrp@uD{=Q|@KKgGx*^9>HM}13kvesI|kHFJcAqT+6X-?=iOnM%faFuy=op85) z(rI#ynxt>KA!PSYkML@%4e-;^_a7S*55V7f8>nsNVH@_0+O&91x9Rd34mZ-7^G>#o z3XZvsccsfC;ho_*@K@o<`f4>>(mWXZd2sL|<4_9cvpRCH<@C;VW%BK_64nsE!~4Q} z!`Hx%h6mZ-(WGUMN=@xFGmovFp?cC^=JPhb-q@6L(>_!D6#gLm7I@M&)EuIn?5u$| zlH>L-9v!WX3zaW*j#^vMzb5a?52;XRg}5319DD@4C45Vwv5(B>$ilugqpn&*HAb(b z3iq0d&{o}AW#wv)F){8Co+Er5{8;bk_Oe|}AO30kH{ze&PrLN<>`$wQp8I}HpmQAF zn5A%NR`3Pzf50chkM7}U?pW_hGbq1f^vP*a{eaCiZ{FzJ`esW ze8tRvdMPbm*7R4UjSeY7AG}m+Vwg{i7un4E-Q}+9Ug9A9clej^-{51zSr)GL&8;yu zp&yp~TCZBZ&x6=Yx^Z_NgGmY1lxeQQYhzyF1H938yw@jba+b{5p0>bv@n7R(-k$cQ z`X1Nj#s$AN^rf~oKIFq&z$@VG;jy!gL&mo?i(@ltv_wSLlxtB;M^g1ti|7|g%?>A? zwKT(f!t1QYy&9e-ofzp#4GPt{p;#N&bcQ01kf?>8LMb{)6iyk3H1#ktEIbW90X~v0 z?xqQin-(eR*sU{$WZl{l?PocsG=agr5dq08iAoy-IlI8oRJw!K-(4 z^w>=yezFf%nM(I`_e;WSx5jo)q{RlH?O zbp$cT=~v+I!7qlNx&w3Z)IoydbF9(HNJE4?tMz>wdPk{l$ySE$@bz7plCYt9v%CC^&(8vZlhwPirQOn|AVs*3^egm>K9F9n%%x+rMUqV&os| zn&zcfuPuOI1%D5ItQXPkbYAk)R%!Sy(uT{>?4h^E_v=h(Zkx z+oq9}-@TWcHh4Lm5G?!_`+D*EBMp-mMK~nE=fS^)4~IWVZ=AB|50l1lRTDkx>503` zoA36gg!$A+TPmh`M8x{vhi`^&g-?e!J@j1ZGpA*dk<;H_)RC(zVnsEl_4ijqocZkE zQx^Hdu^66=c?%`{Yxw0mq+z?hrp)Budr`w=_`OQG{)b~(@8zx0H@h9}ND}vN@N?jG zyixzai+-ph`&;aH-DS>xBuP_px`K4%A+e2$A0DL055t=VNuj76;7#D??!q-UI5=gl zyrv~JV)k%jRInjC^=#~@p0=mc)?XDZmm8vgfj{1=ff~R7hBN~1^{Ej1j!$EB7RuM2noL;0S2src0vvmH_T{ln=qNU07j$$^?x zo1T~V!Y_k=41W{;lZWeoGT3n8@jn9=Jf}r3ukf++*^(N2s-^sGV11zS_X%N`zknCQ zzkrWTcOzE^4;!1zezIMhMskcQyXZJ{@$f%kH{>P92WAJ^z+Zv?2>%^EJXAt$DR3%lxCe{J)pW(-^L0=qRH?IEqo^_2wqns5ZAI+L<YG;uh1Pw?0>V!)3j8v>7c{wyiFKgxMA?C5( z!CJjK^~?DrqtW`6>YpUxrV1YXGx%(H`aT?vW#%d=QS%?gkf;~LQx;bQFG@FmX>duN zvi619D^?MFCHzlHLa4uTw*UqJ85(z<0sF zgWnCm{8T(^)T2&ea#XVM=d2}vZgb!8{gRTUH9%ZU@%8wjAmH^duhRve3{Ui~o6C%x z8sSi{hvN#nS2u%1^r=bkXW)N@ zUjg6B*Zl75)Y0&EM{kvs@?YzM|1A}K>OC^+#x9lsBVYa=Q00 zLsyw6?J0}9I*VFAXvEM~d`?ggdpi8u{2E>ezXkpZJTLY~h{QF@XMEWVjofGPfx+4k z9d=sNBsW1{xSH?NVZ*>9TDT$?=`eWUuqSYO+F zDbEMvkNT7MZOerBgMSHM3NJJxZjQ6@8k7lbh%~3}{F&jxVX5PqmrRR*e;$iBlHS1| zfPW9)4)00tImoIJ#1FbqjPCdikPn&m?)5YbGbY$AKEae0A2s}W`2XOi97KIWs~%zs zZb~H|`|pHA5bjsYz6|S?ZBr^$3Zhy~M43jQc7`8-AIlo88|=QVQ#O|ooP$jgftb(` sP0d70ftqk721jvIW5SQE!6o1`;0gKQU^5Z>e?QO1&q+R*E5*>>)0hkCYCHe{Vu=v^ZVm>KKJw7d(S=hKA-2g z=iYP9_uf~D=CA&Ll3q%}|D^vBu=qdDWz#QAy_Em$O*aqx@04i%U)}zfe_4(HzZd+^ zfS*52vTwP5{l>CIAu6->`bm-5hsK9t_kR`s_k_=Tmu@U?6{0?~y8aX8GyYH4zyGr0 zdt1|c-~aVNU)E>#fBUK9yyM6IjYiU6KluIz=ckUs zzU#feuF#q7{^L(yR@gp3C0+64*|+`UqlB%EZ<}voujMfRShwkPJ)!Qq)Mjf+t)w2k z*Yex=jGb@Wttr)#dh5x)onK$yYQEP-t$4)V3hH;Q_<+vfgzx5O+(^h_=|0_Qnrzd( zR)DY5^hDT-!M>%8$AT*W4nG7x)GsxggFlsKdWK*Ksd+U+EQG6yyRF@HdF7m&(C9Cp zDBb0>>_qQtRq%!Mbxnrv$5Os`Mj7$jQt_FT5j`P$x3sGK#+1LAR`PFI&hNsxl2x{3 z`Q%Wz)st0itN2>1c&9~!?xyst*X?C{RrIH4hwInX^ip~a)Cgs}-~;XBIjG^_TZP%qcdJyy%a2Jwv}JM2|-l zwe@{fo!WY{4I3d1uFjjAnXK1qby#d} z+eYSYVfelkDA|5l@qrGa%z| z+bxultWYNwu*~B5vqWYF_h2*Pgl6Q(7UwtDg;+DbKTHsxEnnl2?@@i1OVPw|KrNR!}NfNr!nc%aS<8*WWtP>Y1FH3~Xza zMqF(nL4Eg^0^7KZv1;`sF2&8EdOK}4eu7Da*v0$CI>m49wjNl-hbYBGR7Q9XD}@n{ zOs;8CdL^{(RSmL?K5b4)N}7p1Ewc(T&3+vdf(h6(tU9ipi?-B`-~L@sce$G>WedKipy3+be2w4E9(it{g>oJS!;o?J}nR`Oe|O z0XncHX!+4@hv9**eZsqnh_Aj8Cv6_qWpw@!*4WgzZ1O{yqxj=$CN17MS%miCI03Ci zg?;C{I*h@1`)m9xCL7O4x-|Q?|2euiI z?{usb&CUAU?zCcvLg=ej^y&r2(0)b4n>qWre~4CTA??=Q`9EZyHpa5TL;kbHiPpJM z9{Lp4rr#1)xK@$Fct9>NBw@ba9d#g?O;kIDew*P~QTf@dS%1bLk`m?dEGevI?zzh% zFf~`0;aX>QmUFxIJmNoc{l(ow1-olm{04{E>Dm~xhQ>I13dJe?KD+Y=dbn*qQy#RVFxUF5oH$N}j>Ts&Ev!3bwUp{wi!BCAS(l-tasQAUol%ifvsn@!e*VQa z8&lL`|4!i}J)4BKTE?Bmj|+EiIRzDh>X^;#ySZ7J&mlE+jSbyN38YU77=P$+{33b# zPJdrJr+ige7MuQkyXWD?T~>x-=}o-(M!|OcT54SFs>|G&$-7k*PD6*W%BFh{Mmc

y~A_Xps7 zVtTBML=OYUe%Nb`xsLMatRWR_(HVxe-*44rewD$wex68a`Qh|ITi=87op1|^Lr33U z#}B)m{S)R<7tZOrr2dYBx4DLgtCT{~mo!!sO)q@*W)8Zx)gq$La5rK(uank}?g;+2 zo%YwV{b=VuH~ZY6%vC%qx#i(S3E83bG)Lx6EbUs9l{z|^z9?LEvZ!dc*R{<#3=#SE zL7l2~YgV@l9XLVNyRjRLrI2NjIV)sqFno)hlfE6)5vAj>#O}$6$ISZnT#S~7Qy$QA zO>MGYj~4#k_W>m+J>(D1*XYtDk!h4u`ENHeJ4BAGPd===HXFYqZkgVCn%Ps5@F3pO z-tk7#N3rJLRBy6dt~xJu_OBFRuysg>lCxb%e2_?z@xJ&5t97sT*Ctq_f-1k7ke?MEzEqnFTOR2-bE8L`d?7ee14#XF)p>@xzkU*XUoNzKXrVy zOrbsEeE+*#Uwgc`;(JDc)7XwR-z4)s^1e}eM|F_noKtFPeRHGJ_RCzxPbglk;b>=b zMKhmMk%TVq05{&>jIf`}r##%fW3k?%+^Rm6Fuis-RoL4~7ObY6!)nb~WN`T6s_k5l zu-5jic=Kg)Qif8u!lA0gG0D-ecBT74$k?XShmy`XT1V0-b|jra`78G2ds{`K4R(YR zvU)lrWIsAB9UIPaB8N3SRFOBxJ7Xp71~z1K=K2R46^O%{40Ok@_#Zp2S(mlmPw)TA z0zCI#LqHOH9#^etI)lzC`0cR|W+sR;U(9a`q5pZ>4ISBNN<$fY{dA^!COnqGYt z>lADC!A61oTxdx9yWiFHcYfBkW*GL!I!29H$Q6B^0D82Uek5*9?yu-z zHt#rbip(m?XMV_XRd|PSrVV$<{wNfXi-}_?T(Q5sA7cL7Lzb6%XP4h{vYZKe#KHt+ zW(Al}h@8T|QP%8k<^MkWVP~X6hJWD#rMa*0)|z!&8>zMSfU`v>H5%4=Fpd7D8&j+6 zUA+xfHf+lUVMUn+HYmf7I$db(;nv@PpJLRKePpR$P2J`Vo@oWS*Xxrx3URe{5n+5` z3)#N6`?!&R3y%fn?7_Fl)ueXhDPlWaVSo0XlUH+59PQ-9gmVmVS}^1e!zXn^(eE1@ zGV_Tt$FK*s8y+zw;3Gp_*h;mo>ah$Q^e4e+Zl9C>*0G5jWrcTpTYp)WW-#}*6{s9| zzY=hS*=4nxIK)@8mlNibxxwL1?`*Jh3PO-TSZ7qYS(pwwmdrystsZzLMX<}QXc?FS zY$KXnQ?>=)#MWBW{21Df{B=Yj&GANNU3>bsoqdhyB~eB~PxjXoY~_&Q{RcA+k4)p= z0qmpm-v!ZAa~68ZjqOH_h3LLN*3wzlw4Xy;S=q@^-zd^DLx_x@|13@?FcYdpTN^G3 zJy{KIQ>345Wt~t>PN-_DxwiApZa%4j)@&e}i}wdPa_AK17e?Xi=y-meYfz)(Z!|}ECDrn{)1D#c0LlDWVCH=mK);^1sTgSEz7$BY< z=k#>ArBl9+4VJt^WXCSvPS2%(KIroXuIu4-J*{8lAI`>-`@+d1dtO=2v=5vc$$O2q zb``4}J&LQ7J(k%LRGVHHwUfm7LvOgl_N`be*ssE^Ld~(TWJUsOo-W#@-1pJ%L3GAE zr673?wpGBw_nS<?@m@QIItcYA4SSlJxJ6fvX~#~aS7Z8)1Y}W}q)6>gxItEx zgK*`*rKZ?OEzftH<$29!h7nI61o_QSa_uncbVtS8O-E~mKeuvdeMBY{;FM+!_u_E;yma%^g5x;A3L;?zQ%ubk0shy6st5Ikp9h>>xBTPmxA~(aH<#1gzvlIHe+VE&j zHutSKmgTrVNT}y>U`mnR>(Ia?TU8vmTh07xQ<2WF4WiZ4R|}XC=24<_q|X&xVx)OT zT=8`Minuj1)RD3@m$1A!r)Dk+yi4ATsHydKU6XAvjbSf0{wVjxPn05{@?hJXS^a@i zRLXqXmhi1)NnDlJTT9t4oIcC?`_A^{cqu)!X5#})#;!mpZLQ$_CO6M9DY&h{!29JI z=5lrNXoV?#ow@Qf9E1>M4^V->yY8IK2OYX< ztcw2okml?3Sf}tlDL)XI5EYYF?w)PG5%#w6?R_+Li{FC%-re_3#(GViE6fSh-2U;M z4&(g?!Hq?8sLnuQdi+hptu2eH<^;vnoF%t#QOG$C!ApKzfd$ZYJW|Iv>;b03ZgV|w z6a0`oxD6r$gmRMy5;|9pf%Ph6uvU=P&3&n>ix1ykS*YF$p`<#zZ41IeIvKm2g~(i< zE^9l0^r9WTs1iPq$yiH_-cV4uvB^e$`TFU3kX<yEcmVk0qhR|T$H&AH4Cy;`KK7E@@UYFD=;ZY8pQp?E(hInT`B6wFTedbPbiUoS9e&8*y<8x*Y&iLy1q$@C4kt=8wS>7fk_v%%3&L95q=3l`9F$sg zZqrABfs<`B>lW}2t7zpu_STpjaG(1!oPQW+^aQ#QLnO7L0MtI%OGunNJ?a(U;)rdN z9HIa^Wl4;ZN>to&V^hK%fl#8q{duB=Uoy;d}yk1g_pB+dQL zhpwt~x4x)WEZiOxnOtKzEr}YB48F;9yw*Vfb>|$a?@_|LZM1^F|I}QO(NOUDC-wAO zA5(s6_zT6g?WbEe%s(sm_<2S(+5SA6k=oL3_;Ih(=e3kVgTJ>I#QY`c;~)OBCH$u! z|LIzABc%Pe-+o~de*RhO(j)A9qITBx>!O8f)`R%5e_o?i$KJlF_`Ldy>*A1q*;k7i zYCpPO|IzZt_H~a^mvJ;uN@MLmDX0F(OL2~;t zGju4Bf~0pEt}WEX=IC!{2PK7BuQh?YN@Xt9u7_P3?Hy=fx(-pqWLXqImtE@($0#Pc zhA0AkBdFKhhw}te(;U`@A-7VBxBUGzhvvIMae0l|F6PvI-_nu8oP?U>)AoHnh(PMn zjBq$VcGmi28mESyzfJ!feW$)4rs%ZTem6K^%|B|91O)?cBBJ}rA^Xq6M|U09ekkeDP=l z&zd)Ejm;W@NE4YgsocYxZk$`TVm1SaRj?|kb|*0 z%(1o5%jt7^xmVX{$mP6uhzz@{FK98s4kUCMI8g?IsI)+DOhwVqhyd06GE+|_y6hahreM+G5z4ysb&7K%AdR3fY6^5G z_}rc}0D4J4;6=9jxO|KNN<8&wsY)JhjL?1@q4u0tXzXA$&ycy zcWZo@hI!}pWOqeWM|Xqm$$wM{HpELE;TEkd${PAkoElm2V_SW=xfiDzwHmv9`^bTH zU>>8`Ovtc5$L2MrJ24V6oz_5&_Y63jq18KE9|cexzE4gTH&dP?<9YEf4;wk z%>3VwO9ev>0H8x-`~ZRr z`>ow}=DD^w6@(oguFXGa<*(&8Hdqfec5*)L*zKeh8WyHroDwp;fgj!t?^y1H-w32` z|E>|p$mne?v?v$CrCJ9=(p{)P|1Qhi&5UPy+||zyJnwp8VWEZzPF@%fZxqK7V+4k; zJay&>iEEQ!6Oh;Poe6JHlU94+Y!T{4ltG&|q`Y-wZ4-W1h9mmP+~Bh^@DS|H7>X$H z!$@l_rq#mxt?bmRpt_jC+p~-H6h0d3SJq;Hu1#-uifM1%>)0)6Do5j3dReGWbt9lV> z+M`*Yj)cr$cdwMK%_5yMH_(1Tov(dRNeeL+Z>!I9vt8ZHcU!!6uyuyxhT}tCDUg%h z>Dp7$Khg^5AL@|{k!4Pr28W>#a_^4aUZzuiQ5g&b$qc(4W3|rVSE$luS?h_Lf;?CXGW3Gm8_?C&zEqaMO{&JwaS^vy1+X>9_56 z4*QY`b=Vx%GzR50IGU*U>9Kd|4ePaN!{V93PMGw}2hm1xIlqt2Z|&Q$vvj^q4~)0S zppp|-M&gy63j-8m=S-ek_(wdUGx(I11o#alvTx)1zDM|2kmE`PKOjf{aue%|C zw8qLRxh6R*XG~x`H%t}n zyygnvW%TSQx+u^mQq9d)iY3XHPP}#%Ik-GaV-!`adM~wxmL+Y9C_z96b`xso6F}@b z8tpR;YNqC7ws@eeC*f&#_Fsx@Ai5BTULx1J2c5ClPFxx-Q#vbRbJkS#>d6xbNE{wk z%52OUM=a>)Gzg6vou2kWLUhZ=b)TDZ<9=)`lGjeJov=J)${7<#zn~$=4fZILYH}gM zuKJ%yl9ULB5-(y(ry}YzCQ?tbNw=f@;#x8(nee(`1$;DMK5F_U&e=Vfn5LM&>+Kvo zYE7HOWvMmGF^Q36tUa*m;waI-OH=^@<%tS1@e!lsJ@~saCcI6vIvF5`)!*ez-C8?^*+lr~5?(cwA>S z7717etm#4JmT>ASoLT0|dFhpQRIe0}NmqdZb9}>wFV+}l>QOwdZw$wHG&Ej3Z0PBg z<~_Ca@K(tDPJ#izZroj~0)T3irI(Rr`HA46@C1Z@;_hi`z1@a`a!&zLpwYH=oIDQC zBbds;#Pa}sBFXPH4r=oR{HzQ%IWk>YI31{J#$rHWdftGGKx-zq5#yc!6l{aV5nt!>E1*!DA0SXTcAt6t2dq3ASghBjbcrY{Nbf1h~REizHb90m^~v{ zujA?CJxwbhK5lsoVq}~TaAM8dmR$wQbXR;QEsu3|+yP;BI`#2x996yU%?8RH0T2Mq zhuKcY_yNR}K!EpYFnR@qOFH%CLp7wE^D!Sct$U&-gI*!<-5>$(zsU~@Cj#$RU@6di z^&CI&DyRi{)dTREiL%tpH9Q>=n1T5R2_(ib?4FziS+T=d&Y?@(fj09qkA0F1j7lS~ z$K)ku?@qoUHJlnl4?s;`9!Sf8lcq-hxKp_hGVWXXvyPuD>pyp9#EbiWiuYGyTcX$q{Y0AAr*eU?>Do zXR5Xn0CFV6&)?4oKoHu%k5$kSnHgMwtHjw(=j%X>NdmAAJ{NkTAFB0$zyw-gvT`dx zdLkHsPWgJ*yl6n0%v)_3z%1?Hgs}O%zsZhseD^|!KEUBk2_8BgR1-77-AS8)84%b# zLO=wx0-YXh;lE$c%DO`b0bz6~b!mRSf8?pt-&!mMijy)9h{q*ZyE2RRWtS0&22&A4 z3|J}R-2!Ek`x`pU3m$Jq?8~x8W1SPF1p3`b#3+xnc4+;Wwqhe31reV| zyMT{E7K+Dt5gL20@N6bxIJ!1kBqzF{zUdko=H)#$$u%{dpO+)Z+K87h9C6!S7Pb!b zOHaB*Q^C0c03^+8LwTSkCjVo~?99(#NpYbB^1N0}{2W4W(?h(y+r4IjJ4=3(HIxuI zCtJMY5Zx83I7oHU6^bh_gSKRwh1sB2l60Y#y#~mpt=zpH)6wAI#+t{7rj*<$*dt5R z)ICc-E2(in1CM~es_ogX^B+TDcr!4MA$otPH94Un4Ed~^_bV1@2tx=eTUt_%s*?k= zr^K<^Of_q2!B1_>Ebvo~dX0pFCy&er4AI0Cp8(Sf>m|u{-h|(9vD7PPHaqJ8fA_#t z5&DBIDf%R;|HlPwqAzH-&6T(;jvUXcYo{h;`b27Q7!6M10%Np5{0txZ&B1^@*^2eS zlv_HeWpNg!Z#XdxEfCl^>c%nh-~Or@qV^``f1ziLAYp*h+-)zY4zxZ4NkZL=Aw5AN z62pA?U}BZ^@G=6NGPUD?Ysi9++5(7pKcKG+6%Ea2lTdd@rw0&~-T;V+x5gZ!e840# zA%Ds%l2&oKXazwJop-CTUp5`%x{#Bk=?`cw>@mY)gDj6I5?E+9`fot+eg#kkpL>l<3NMF|Hv1t^KG7(fDCnXoBJM(t^MV_Tqkq^3i4{~&O1NdF1VfX(cNE@FI zK4GzNUgWMzcOVPjm)?&G2pE)ldQ zF=pwy=(+~f<&pn%4$!_p4@>s7K4VYWZL%|oXnrRmG^_uGY;(=}(C89pM1T9!jn>Z@ z^Yn5}T@>^?CQx9Z;D#rMQso*Edvx$$}5jqCq<9=_G#_{k3izBlyRek#at zwde0tePk8R`HueGosXE820k~yga4G3=rp=<7aB4Aos2qjwbNRC)c5Y6`&Xhnuh_0s z{0?sYfluW_2!=J#2FFV_lSYz5V>2QZ&%$f^NlQUHrX`Zw--?@ZX;IWfkUbIV!K(~ zJ5z$ni*BhMdG<=1c1X2Jz$?4@?KO!yy3v04i+O4C*_3%^YVj!bGDe90 z)lxL?P8+ys<8dm55Q+2MmrmTdLE&sY-ry$_3nJE*pr%5xpXxRJ!|K#$GRM zoWYZDs1aTZsHrlwRB#MMA$0T!(62WP82WsT&FjXHt@1g5 zaKUy+Bt{#mO^jNUF9K`23ZUC;!A+RrPIv~^eW^zBQ~nv#L>NLp-BSvNhZeh=LH>*a zd1dC~yV|y4sKGewMYAyYbaU$hJc(rbp>_V0xJ5k3EM-9~ z#74rKvNP4}yti+ja!Gc!Wh{!z23x&uS2lzovpHO4ViKq9xgxVibJ{PC+IsccSWj4; zqVYOsXhX-9o;SNfsPu4^M_h`@+g7ID1yT9`C2_J+p}o{QE|;rvs(3UzeYRHDLt?Em zFAda*~#m$d=4Wncczg z{g~9F)H4Jf%;7=ZiCmWZYq~Mi*uzD>m>h#nHYJGVYOg0DmWqSn#KZ&QIV8`0HtoA) zdKNhGGUkW?cmVOzRPXz1>mZ@CXUg7Wojt;o@8q+p1zS#bHdh zwW(>aB6)dO?ba`(rp#+5gNe^iq-X3`Ug2uDdFm0RFo~wOwvg+V9Jen_dSjV*#MTU3 zA`?i&lam(*iunz#(M&YvzUJpai!|~)*qsysq-~hqfeCMwWIHG&znPhJj$9SoQfNn5 z+O#D&T#}LDkz~Z~Sn;&<_UCTY8w4+St;H-k=Ne>!r>y40?ERZ>m%Ob`WPuCy?15^a zMw5{W(AVc{C_45h#)xQT7l#wW%Fq`sCzi69Cu-;)23xkwBNU6{ia)vY&WlR)Pmvzy=%2kIHgiq)bGv~(S4K5`%8pq%t6P5j3d@$4? zD0`t}D%A0U>C3$8oP3d2Y7xTR)(y=$LRq2K7qv>2aDQ66aFxx|S&sprBrW_IZRVt* z3Dga6NxbB6x{0O!c>(feCCyofc<-w8m+P#iW@okc5EvpDulEBHy3GrzsbCh7o>6m7w1AMqTXO}7p zLRsQ=Ib8<_bKxF+&)JNPdw%uF3;0}PIo{!d+aWXs=lVH#BGSEm!UvePEC29Z~(M@JnXqd-zQ6%1Tjz}I^<|qJm~fyXG*~{JsRi7qQAWfZ2W=Xk5<2uYXk)ayv})eDhxt3ol7 zD>+g}Ya;UTraG?E0P_m?=F(jMJZuU!d8V>od1zq-BA7{IgX_8LQN*cCt>mr0S}qo) zCrIQ=B(*wto;#@ECac(79slV&8LX4gyQja7bDJk_pOp>?Ww0{B{D6Q@Jn1>MbRQ(R z?GnXY2zX@5%4ES-$4q^tY#Kf{0c}_!O(U-O+xi7?rRVTkIb;PNda}-cKHht-n!NCu zP$BQ?*c7K!2%`N@>H|;ID#0t^G`#b){)&s4ZgR$4I1Wo)Sh1kDjby3VIH~BtjAAJU z&K;Chrxow6r{EtzTqt+Tr(s4scY&!JKtTE|t;(Sf{wBU_9EIx!HdJ^q?Dee2#Cc+k zh2W5R%Vz$K99L`sCDjloanMtD=AZD^mT6W-NVPJrvYX3{+czQwPrB$#A*$Nq;s``) z7`TpCK>Q-l%Z{J>3=~%mo?UYj0HYhiK0B0$vG8DFIn!T3!cR%EL;+*?4YkHhb|3X67zReBmEbP@y4R?GpLDf2aAsT`9U)s6ft%ag1SV)G?qm-mFLMj} zjcn$2wvCPcRn}=9eQ^a)y7&oT0e&ziqbo%cs$^pCcqy$YI8jUTmkHwJ;u&sQm4=`Z z0Kg=^3jxBPfKx$eE}Hwqfh#Zu5I3c8UFp;5;EN-wAA64|!zhMP%YO`W9%}i zU}1j58C4P4#aUHD51_aUfUA{gHMZ!bq=Tx~+>O4^yuO^>u=t2|quaq>T=R>q+qI-b zhnQ`Kk7x(96KG#IQS>>oq~}xi>No4LN#E4ajp~|oe_9EQzE<-@Uz)d7x}xOqD5{`rJcC_;gyNXaZ71Yae1_#l|M!+g#KNHz! za#o?^rW(`%Hnb{?6ncA-2$e(UZ!q-_s2%I`_YN+7$O9bFTzTn35;`1I7p02-9FW;m zR^GSqy_a|K$T}vXY5slR@@%aH-W5qzDIZxcyh}|2mrju?C80lmIqUH|*CDC4Hac3p z`1KAq!O|t*E?iqRe@k-_!uT1?h)c5>z6HYPYJ&NWD>IQ=@Z$DgOm(Sy>-=)8VAI!Z z<_=>$#Cvx7*7v5`D`)9|HQ-eSDxrc?^7hqspfg?V#YiO3g;#pP4yQ(5VZ3j0;ivlp zT6`wnz|;hfzx`IFniHzF?&2y#8~lh8U-0zyf-erbrkaFbfyEdbLUf68Rlt~R;4bXR zBi;Iez+@voEhYUTn4lTGjAh;X@>YWRCcY+Er&}K6k6WaDogKLyPVQ9f-B%L>uy+T1 zdody89n&sTg1+7cwb%4%;w{;g5dd(vny}iJm%$IFk8HHMCdO%-IpFLQ1L8L>Rx(i3 zs0Gl4MH7h(KH8khmjLBN*ydrDjp5c?1nQ>%Q!Yhd@z8DWk)dwjnrZ0vzGB)^dkGI; zbXr+CgsLnvd`AQ&BplQYk-IjeI={ppm3Z~vgU&DS8VpmDw3Fcfs1({*Nd_#unpDJn z8zKHelUd3IFv;W07kA6Ro?u53_^|AN9ZCU|{X$c{bt z)XRk$X;?3l#q_CCK>7Zbk@BS@H`V6+@(R3eg1s=XlKZSr2ZYd#6r%F-co!IL0AkHo z>Gv!zZ8_4PdNE|N%!7eSGks-Q&@4V2x-v3QmZz5^H}czRMqGR%PGYi`Tj5gf5?$pk zR?{US%HR9dN3kpgOFMZrf|oPYNR`auoDm*l_lgd&+R^$`2+oQq&~0!qr1Q0cb^o|x zlSB9+O^>uWMqjeFJ(|QuIuh}Cyx-&){xTmG;_=W)Ic`#jpB%wM1(SI2FSQZeWm$gt znAG}B{+Gbz1CED|T=v65h4M+ke|I1Xe`&c2+0tP=6KNTJ^(nDEP}V7wfE zDSasfPuW{hKyZsY-Vm)tH*#l368-Wf%yr*x+0cp$wHIU&X0r0Y2<0MCyE38$Tu|Qi zD=2yA3#^D)L55S%c0I5Db{qs2Yf?~e{L}r9UfFw(8*#Ug-nWt-1Tw`t_>)7(q)kwO zcykH{&Kpr3z#>)7X^Zf3oAnup?sGjo7i<{Zi;M#&{(NCQvp(T1C;I*F9wry%?Fu_t zmoKD^U+xiEfNQ~^kmF<7@t90Qb; z_3JFYJ~jzkKz2>Rrm2%w9*xFix@+8h?MBtjQnQn(>4jRXMQ$8u|*CfTU4aI77nxua7X9a z#F*No|NdQfgN`n;~liE z0S>oEoIK9cNuk2{p?rJ(s};FnWL4XhUjDe>dd{i;%7N;NyWhA`=#!!X)H)94sDZCc z;*@v9OWBfa!&e)HX^L%ck9S}E3Tid|BHf>;e85guQuJvSVj@AR?ONP!_<5+oLJ}~q z+c&i;vY1_pd#c#7*ITj*@aoCTc0tAmc^|E6=V+f9MM-AbH!tS!%H&YVUfYY37vjjO zU2gu}$+GF7_|cn|!PAFjpB?@u)i;y8o42E0`1gydmp`6;9XghM_&m7q9~0Vu)$G)V z9{QX6VXsG9?fmwr_mF$NvxicyGfXS_ihuFK*Gu&b46&;-# zy4*-^6CyH8(=|setToFiuCBRP+%{n=gqp=islUm5_-ZQ>^9%$ZZ(C?(&b}-MO&*Zp>RP09gsuQ4BOgDJ`zpbB_BQGMmrp!HDrF*@yt0lILJ-sMu=Hp6z}GRK6%7;!X}0M_LdSL znFB&-1FTucP3OohG7_5?ae1(zMP4|f^i|q<`30bgACb?)sjs#SD@Xbf*M0eVSA=0i z)(Y`9YG5}iDWMJ9+{|;^_99#X6UMlclcSS|`_ucEIeBvQ7ZQ5qy=C-hM{I_bZKH=_ z1mw^F`Qa!d?(tAvNR)nvm+4#KQp%q_*~vBZqz@?xoridlBC{GlIKLMtB6`2L<w&Yre=U8KPx-5)#U=(fU-bYG>oW5c6RErWcuZ3@e)#7*TYfB^-3gHt#KrJDR!I z_M_>(H8P7J@R=kIovEOA)AgIJB!JW#7gPUOyWPNRnujV-7vAo}X? zP~#}fKSbn=L5-M0_lHib)Ct)vPto$t<-MLEWt8e5djA{3F)Kmp%s1^5?y047Qh4ZF z+zv~>a=tDDRI;j83LPr(n!TnAGwPT;b-_s5ZtmQ)aG*&$gqH6j=%a4Y2iiR#xJ%7# z$NO~p!t08Q9uDbe4Qf!HG;3w7Yuz2A4Mhm1AJs3SvY#cHl@kPMTri#oNQ0K<{Ol^r zu7K}mLFbfulVA_%vPCfQuBV=~#9*Ia7;-t~D!(z=(A%ek@|=ywZl;8*gsQF1KirCa z^>fr4zlT#ISXx4SQhZ3P?sJ;!x_^SWKkmg+z)5y&S`i=rgLF?T&H2=AjPJd!aGQ^N zeiM|x!kpJm*F|~#^UsLB!A;=J@e@xk7L%P-&K^BKl;* zFQ*&UrEvSiGri%TbT!49wW0QAEBu$~YVo1vpHFKxX>tqZmzPt&fARmU-X^ z#^HD!bc`)v7VWnLEeQkFoRa)?%}=%fs~N#@*LvHZ)W8Vj-r}lGtUYwz8rBSO4E&Q< z^qYf_P1Q@Yp@}D^uIEO}Rqb#`Y%Wjmz~2oE!n%MD7@Dx(dtcyFf%t$Wdy<#fVBJ7} zj?_cBJW1at86zqsSIrP%q_6@~T+uC&KjPv9k_WtgYaDoF_?MOY?3qS=gh$3q0}j%J zK>0#U7q+4wHsl4rzXGcot2P`tvYXlTj0?IMd`CHpItTP^1cUg77@!~{Ag0@MI0k}n zXsrC;W_q^H{&l0A?@5fe06W36T&WaVrtZK+3S?VKmE8`?xK>b!$?w31g! zJ5i(Hhk=r2d=@}7eVESOOASuUo=UO9?*LbUk@fPK9{&J?0PXyhU-6!O`_uo^#g|5< z)!zGV*jNz}5VZE~>wt&_hbW5HSx~Ve8t2sa^hCU;!|8O|8pYxaakvr_jWOiddm5Ff zsb*4>=y6O$Ay;360}hL}uP7?kAr7c`aR50x?ONxqd!G;c!`|!v<$2cn@89$P4WBxa zxEmP{uUeZup1H-XizRy3_@SJnKUk#m?EpD&45^R@Z7;EDx6Wz`x>{>nZWqoiGljawBU#?Y*D`ro?QP9c{nxMdXUV9&{9M6>3yYWv*|VH`xRgl>uvTF? z$|fQd5E5wyK4@+h%$XI+U>`I~`WC(Yz~91>7~F^*K^yXnYiPNI{+~z@LQ9N&(=9{F zSYDxMJ-*Ic)ixUs+LChM93nM>4o&bKfqS!*x~MCd;W%((&Yz4O1LkkVB(I70oJS~AS8tXUJ*$MVG72(^=n1xzAL~MSTh}4 zCIMF<bcld6Iq83Sf&Z> zf`0l)$rbhCUvs)&X>P>h$=obFa;T7DP&JqnvRv|yqB&x2V`u)$D9F$kyH-6nZtWh= z$VXApY1gnUyFzTkC%bgSql;!X)seK^d&}oRDLEFOn#iuPiNCQ_bo+)~M!sEpygen+ znLU4=krBUMa-fVDUB9}Tk_5|$UY`sQ0CMBMkds6vO^N90ZXY2tQ` zYXUzjZ9NlqW8(&beCMVED=h(En6Z@Bj5tj|H{?6ChFWC2h?2TzZ8~SkhbHU`Ka^P_ zP-Nr{9|0smLr@z2oC-%GezU(6+V_EVqwp{`GB*&d zOCk{c7*vK@0LVuKcX+fT8K?krJpz)Qs^=UhXixo>OnlPoPc+bj?iwSRq!BZOQZZT( zAGINrgXhVe$O^VlkAVRo3WhACMD%nIi=^7ReOeZH3?Y7eD{*>_I2VB|B z&pagcSsTkM+AZ2n^%Qb1-RRGUct|YeS>&fbHufx0B04*2I7H={UFUz+95$?fRXZ5h zn8vMV4^*e~05}|WGr!^rdp3KzeJducJ*G*GI~+v($9f@UvG*~CCVhD?FlZx4RgRD~ zHrmZ`uA>FF;%9mG3$+pvySjRq`SxSR!hFn_u?bTOMChLu_n0VO5@dDW8FD+rXo)$& zZdL{cjOhK0#!B?@B8Tkk7SV z-p}=ILjUy%}*w#ua`Kob_z*GQ4?Y$!)&B*k1M4*9N zfvlk(CfY$$E&(r0Y?^JjED+dFFd+b4c@k5~g3UZ);E`yItD5UgzIO(OR{{dMWxrHf zf_#j$h$gPlK@f@7n2jhofGy|{ddFA;zAna+7mNPLD{`|CZsTLQ;l|Vi>-)>e5)2%J zW5UsQa$Jjhq5_DUa(M-JsCE4r)pb@QJh+0FrZD0F9NjMhpd{SZ&q?Sww+P#4TgSt> z$T<}A4`F9kWD>moUl)$xwLIErWrM%=#44#S4cS5k+P~;EGTXAeeb>mMrf_Nc3voGR zDmYo^TYv-*#ZgRbp5?aNqf+Z|s`QAk1SzA4Boc{8g2Um&|GPv;B=Ya?;3N{9L?psd z(04e1L;}M9Z;{Cqo%O=#%Z}H2H(N6RRA;m0mH;9bJ~n4TfPiItlHBw`>L7)iTNI$P{4SE=T^3WLQw%e1X7>>l71a6P}~}1$rEO ztJ6<2o2UiA@*?|Nykt7gX7VC{wuc_~0n!OAL?J;!L!2MnZx4BQOp~X8xQ9X}Q!55v zvx9_$yy+Qac#Y(00Eylu?h>|&D_SleVqY^L3=w3E<5>^PT3NwqBS%O+@Kp#Tz8eSx z>3R$XL$HDAo&z$h&4$ce{m$&mJQHXV%c==>=Vt&7y}|@%!P4LQ z_0@nt3cqsoR6Fh4du$K-3|EEnil5G974dumKR6_mCP6$!{NFb1uJ0^B_9LUW=vUQS zVVj&>HB4oN`-AF-Sx>ppd7OI$i&O2N0`CRucW0Xq>NaSZhF-Ov>9Qgv00G=8y=eW) zXf~R4z`5**m?T=CF_*VU9@YmmS!>IW7V0h_#1wUysCpZ z!%Nk^LyK{L!N`mSw_VAtg&8ut9v@rF%71qX+iid3gx}%XTNdPc#BK3xIfHXM z|IB^8!dI(f|Brwhlh?pqTU^}DUAy60s_IPjDDKZI+=_9y@n=GM2T;&dFu~8XZ{ZI` zWMus-3SSy%qBwc5-88-G(nZPlXKU`r(Tm_$e=%gm*?#r(XZDQqg2+O04(0 zdj5(-M%G`l$QgH=hIiuOKe};URgjg+{~7;5n$Pdoo|5nU@b=HvL8?kt)biB2T3(d0OCPf?IAGNj6A$8^m-X|v9&J}q9{m6X-5$Ax zr@Nh%eY`#>7AGcQtZ+-!T8>|uUs_RbTKHOQwQy^Y37H9*#eA{dh(mK}DAn~^*0rqt zm6S9cRJtvx?Bjnfjr#A>mH($S4l1_|=;J zPenCc?R?=@|C@q$rS|-0RlMpina_BVfUu;3>rRu)u#0yWc3r>jV6Dh9>OU*ZUO)(s z;}-oLNNM(K2;qJ9rvt}78trNrEOt-31vy8^t(ZSTSGMiuhV_)%FXA37R(Fc-hyKO|yrl)j6zN%^T$p zx2FxolX~mhEq)#U60(b4%yc{t~=x()Z*)hZ9mZle-O2_^4@9vLm`4*Nx14b=_j~5rfBv`$9HDYzLMII3( zt@1I(@mHoUabxB`5g~c&j%jVdYk0|s2Z-o>zRt2P;6lk`%$Dm**Z9X_zI*~un8J=& zS;46DaX75*wlsV}kexzrTAEqnG%X%XHoGQoI{B(%V`TDcDKVyBn=j3ymuA=2)uw&B zj;_BAX@lZF^W&Qb`tN<_$4?=CJbdb4)A2f6W`1k-7CmRg(s$B=)i$eD3$;)-T2_ir z(S7{zou363WsGi`#{zjxdYfcJ>9TEp`cnKi7|Vb2zV8?ly9B;E-C3N^9>|V{^fjSj z>>fc523WECozrR~_)mFHn~!U)bDX})lw9{J9bC~D8gB8pQBijJti_o^FGuDVLb5SXuXKD==M~pq!tx#bT)1@T{PL3 zsM4m6DXaKT`u2wfuHJAqC8cAPu-quIA+XL*^|R(3kaLbI%ee0;tzp*Wn+1?=_VYUH zt@u6veMU#C7TOUHrIu2dzpMWZv?H9)>NrPE><7D_?a0h#hS7XHSv{;T9uc>_T2vQg z4XCuJN^KS4qli9O%+Ln)B(+N+=`gd?za0ipv#XrS9%-bDl1Aj)>cI6ka8G5(_PCeB zGj*JtD0YiaC7C344(%spQ9qsbq7V$%7 zOH3_L$D8w5p`5O7AtZ#f>(2f8s6wy)^K>7sd6*-th$^}hogta8vvc*+M%UbirKdb` zLg50)80>iKmrbE7f@a>sj?VT3No`2rCijV|kliS1Q5U9qOuc;OD>+--G*Da*US%ef zwSTI7M4rccB?MR!u`Fmn8d zh_=DrQQDyqDeS4~?s^RP&0ItJ3JT}kv}Ytei->pOyE}JR2J=$fSKg$up4gKT4|SB= zc9#F)@+)s2%j#~Ud32&hmy91f#J`L87~9>(_dm?iHUU>LF28@aU8xf_ePM3;Z4#WZ^&*FKK-O`TiwVEkq3r7IN+RxkJE`ep>(OwUlE*N8o*>*oex8f9tT zqNGt=*HR{Wq*R*o4Uf;al9SWSQtWl(q9m&&7wDCDm2y1fyRnJpWw9XHHKu`G4m8GK z!`DKSj_;-PL?-Wn1LopS?_A=S3z9!sho2P`mn7c+kMbP2-jM;-&>f69DHJvZai zRymt-Z*F52beX>jHw-do?gi&eN8#ye3Y!%3HMjtdZ*8dr*FfBtmr}QIbBkPH;0eds zTbfhW;MRJUZPEfa?Lk_gZ?3O0_?x@$M`*oaw|6~7=J~r~PScVxATchpWntU;EhdaT zm{HN1dx0$cjelF?#|jzBj9qD(6TArn-ZKK|1H!fnL4nuI0*jaP08f2CfEyODQ9xJ+%>qu|FdO~3fH&d{ySJYRj^eNhgmBgkdnsa> zY)!&UVs5!93aDMdK2MYhdrcGSbq;$2va;ce5n5%OC zCMtBye*Yit-(BtyZxhNXnwaufZxBV4S+KoY-zVHzkBo)fzc;&V$*wDNP?Q9iLc=ar zgh|i?F#uhj1QjsWL!yT`0cEZn-=EqwK0!>0@`F-s#Q!u2vg+ zi}}0r9rMxYwR8wMHpV0FAXR@K_6H>Gf)sy9yaB!)pF!Y&$30$}D9gZO;Sh+KMWZ$3 z4)^b_DT8{JpxHMxWn?Q0UQyq%)3{wx?8DTttj@r zNE8`0~Ed#qxu$)WwiaMk=NCb#k_pO_{H6Sx`O1tqrbB;-N^eM9 zbJN#7TxDe5U%hgpXiZssIz07v(_kavNpwOgs)bcKIssb{!%SYr4AI*Ir}bf%wMe zp^)NhX}A7blhfX6({eklX1lwg^{d(kpO4~Mg{Bbw4Zg1b}1vdB{Z z_4ksLozV_sT1ilTOSA%!qgR6@P*U8pIy?Nma19T$?E?lEwPr z!+G{KiptaGoa(80DGKW9B$F{2$`BV-3i}Vdx;&sPht<#Nuf>n~*>KD@0Yc@qynVw1 z*A!cq*51RIU_TzaTxk5kkF5!XM0ZO2zC{G!|l}z#HI3IHvpITvLy5!Nser+()6Vo-}L$qsh%d{c~qR?n8OZ0)K&Fab(?x>iP#U_)#JY!axPsq%!FMBCfsM&& zsMTmB`2N#99KrHFZP_pYP-bA>bqCWQ3CL#5oI)`PsTO!>wEqd;k_0Zl&!$7aoovTh z15TQBXv7JxnTrktIuq1SNxtfWVJ6pNHS)%IxYC2_mwQ8~xE%iAovsiy2 zHpewv`!2L4VOzoK`KZugX@Ky7brlOAynP!^rgU2QH0tC+V!1*b1BSO(Hs~9S6XBHi z#^>lBXF^q(mlxf?Ct_wlTR641hAZ6(BD7s7m#y7{JI4L>%~ze{x`3zj?74>6?G98)@YZAHnd(sZ>7=9 zrGHKaz#OTy@$mGfljlNwsg=IJco>Sak+OcOoIo$RBtTFh1Gf0mJXRvjV$;^`ir#r+ zTMp02d6Uc`Q7%%tGPf3%J?>a?|59_cy`Y*{XMO_5)T>Z@RBjPVaOy2KxV+q&n5OhC zKF=ad9;(;Im`S$yf@?$=`%I?iSu#`$%aHNSn-#e>YZOmeCJL)87bb5qzmFY8ybbX_ zm_h>&4a}JY=lkctgOa$f6(}E}Ej+lV&_xe-yrW2w;k@+yxw9X~xcTIoYuPOq31fC^ng{#tD+${Q`X&fiOK%GWKdc*$3- zvzN-!mt4v!T$Tcy^hoDyfi>gR{3%9xS`4&s#dD3T^0+*sS^r5zi^@?LVm8JO&ovW` zi{lufknq&2VMpoEvt(CF?Wlp$FdWo+fMJ9m7lj?vL{pV#ZE@?1>%AV@f!;%Hs+R9Q zwMNQSPD3|vprVzyMGy(wN|qPiIb@Iq2!R<^1(PDTrf|AWyOyM_-lcv<64U65K%F$Y z&ZJ#9O+|c#8}5i33e(kxM4A^z6f1AsYSK5Tq2=FE3ue#O(vq4Hyn$PB53Vj<=95Vw z@!Tmoz(Fkc&7DLLkLLO($*GqD6G|l9ir#pp8SwPH=Hzm_J9DadCTf;01lg$JzQA4SD)b`gn_z7C5im@yApJ zPDqyi+78_o;LH5;=JGOlH(jF3Qai?LlGay$Hi^|M(BjCiE0m zmcu<=OUfPBZO4h=cn?s?DiI=BQ#GE{Ze;S&%+^3COm4wAQ)uaQ0^OG2M}MoM>%x-_ z0yBx;t(;M=WOOvhhc1=Zf)~ii{@J%x&*thgxwF@n5_~(4zNU-Q`y5VMjMhR6zj2HQ ZB}yj)2zrcEXk-#$HWum9$aWL}{1-qp7c~F? literal 0 HcmV?d00001 diff --git a/public/testdata/exampleFASTA.fasta.rbwt b/public/testdata/exampleFASTA.fasta.rbwt new file mode 100644 index 0000000000000000000000000000000000000000..f623b8c3941a33feb692883a4fb568c49bd8e7f3 GIT binary patch literal 37548 zcmZ78cT^K;*gyOU2|Xd9i6uw`3nEG&!5}0i5Li$_KtL=YSl6`%fkjs^L0Cn_ifye} zRuLPvwc$FDU{C`A0nrr%d)I`3pa%1P`904c&v`lLd(IyGFw9J5?)$p#8Mfy=LGUsO z;>A5=0EH-CM<|X>bE6f# zKS7ug)bdEKn9eyu+fqUZIbuH>p0c*!!P=8B)`;i-=JvF_y2MdjBWY4xW`F0?wSkh4 z+S){+a=d=l6N0X9f*XVcAk&aX)w2ri`W1UBeTP0Y^Sk$_(&e>PYJ}gT{PCM#Oy)4} zSmF?{e?%g(0?Cpr6xLdXI-W}N7~;A`Fj)KYjY&Dmn>K2;6(MNwQ2WD2z%4=cARS6` z#dg}oH{4(Rbd^gkPHK@1?IgL58RE-^+}nRxlE1+x!^x4G$oNnP;+99-4{`qJ5C?~= zg049m{djIkLn2)sNLxeRcI|-w18x`6hK$lt7V{{a(8zY>+V&2UY3-0QV)RERh0m3^ zo$nHf6?fpb!ks}($ZExy-?}}<+nTL23v)O8&4NB$F~ujo@)>@?bZKoAM7HqK5zlHw+9}BBNd=^`l zRWl>1i@gt%3i{meOa7xDvlPD<9bfx?b{a_@(!^*%B{Ady^Y$^D--_a zsX1HtzLold^4;r<OQn@a}L$h!uX(dd#Y8Q4fTk|_$t+F z-Q0jTHst&@#4>mR>{Vnayti%+x6)3kjj=qyv*QF7#?xpvAuXH{*Ze1|Ime6hOW-HL z)*&MJjLx15*Z!>-OOie%Oh(!Iq16Y9ZwFDFOH19&OG~^;>)~g@8j)%6xkqhE-8-*~ zblxK&&ACSXIR8yBU|m4uD;WjIgyBUdrKVaxV^Ld5v$KbPJn~-jv z5505H_9}N*1;={0a-o@;FI=NoXpdT$TyH!rz5I20Iw%%4jctu|hXFwV3D8@bMBcWPDE?Zxi><`(wUA zYW7hkIozBen&EH4N5Hr3Sz=UPnz^-xeYVyZ#5!rROM1GOH%aI(5IVG3B{3($e}=Dt zUkzUr(nXHaZSAylqY(FJJaKv+u!XvrYABp$`<$+(|FbX~o&x&{IRr0}+3R*%nCX`1 ztE0?%6kCE$SIGyfeH5&1^k#ZP;Ewa~{b7F~|H3~IalE;gC7Z`97=w3oQagOzbYmK& zWsj&lJF7~skn}h3BVq0P_8lR}Yum~;J1}iHUwX8xAu3yrpTW!_KqA>n+Rh(xF=V&} zu1|O`*l$QNmli1D`Yv%W!<9bUPZrZz>HB%SHDSJE)J2v;B3cMP30@3qhjU9R?)RkG z9ZK@s_1Q6%H$b)jWo{sAzk%SICl7R{Ys>b+FMyv4?+rg^YICmGGeG>p;uz=Ru(SQ& zbbkEDpd2iZK1C&BhBXYj4c|9LtbmV!R}L$@o$s{qfr^Ns>&PH{mun>K%{E0VMkcJRvjH%@7kz#oC{JNLQp z+lNdeY%<#WX*UhEx;?FAuSICT`m+x#B?%S1mFA~bvX{cE;BUb1f`71n>f<>lygWNI ztBRj-h|&S8tFPNumvA^?s&5Q>$$z`y+u)zWpMsxQ62eXJFFoC<8PoG_YT|6dDDWy( z&@2;)v9r!mhFAUzZ-lpiB@x-$4X!&2zUYd#95TD+bBxaYY<%zWkDUb}QkZgyUv0j? zTVvZJVcQT>;Wq7&_e<`E^dkpSiBmjwldbf0P>-o7k6;AQnhV{m2!ahSg*C&qah!3~ zcI=tZ=eO6c)!;QRw{ja z$Dq7*!)3ewaNpXZ?9p9bb=Yzh{3Q51_;K*u=fcE+3x}1uZ>X;7EK)9H(z|QtCJDO< zPiwmY#GJb&@N?jc;itk|UGP44%yg$^O=rHX_ZyGb#K$^2l61Czmm#v^TGHq(@V~?V z1HTf!MyImd9T#HTfAIv`;*4fm?*kDz_vN`rN|iI29q2^bpm^8=e;0l?d`85L)U{kY z-}m|JB9%iUi!4!i*6r0v5;=I;!iD1h)xT3I= zgyI^9VE6X7HExH$2LB!Y7QAAzHQBxGzC&A@h8$(~`~*Ri@w{!UeumU8FJSFWjkpJ| zhIfH|gxv4sAIl5kJliI@u#+&1soG=1P=pLDKl=Vot>VxbzuP_VkKhAgzam?&-%rfv z+zOY=7K4Hy@P+P^`#f%)(9dYa@

f#$}C~p*;^M2)v6WS0u*HvXPZ$8b-s4p z+H2~(y3O!R9P1YNsqprkNDZ%;>9N*vKaV=I{lpsG7KKCI?PH%dEUR7L{&wO`csKaI zaVrbHV89e5m2O=T6FiVvy78T&&?5kZvKBZ?i$Ms3;5 z_sbvjo?AAaduwQ=n$lw*1wRAc6}B8nPk%A^Pex@mp=3rB){2k3FDn?eof=-?&~xM1 z#cSQ%Z1{!nBG@*B9Vj3MeU0aQ8f_wX9T{=Vqkt*m*$-VlYS8_=KYh0RJO-ZwKOMGj zEV{Vx1Xm`seO}nV;v z8=|K7b7;1ebzJ2Z2Gv8tw=2}?n0dZ0%bktNCiqTx9lRQzvTjl?=Z20NMCdMVn>~c5 z;Yxfb9SISsWW|@=hHj(Vqe-yBXWMr!+Yz3aqgh!?+Er4kDPoU*GU!6>T=G&U)0|$- zY?WGuO@!|c?+9y)vBL18;c)Cvo$?HINPvi!OA}b@1Y3sq)?Op46`jOqQVu@?zVF=g z;dO2XA@w?sb$tSvM6a0EF@E-e_bb$=P2R-E5$>I{4kya+xCzWzLKKte)781nzV zY!VXU!@nhD&Mio+2>6%!<2HN&{6zTW@T(R($Gj={yyH8ErQ>ax<7K9d_V4n%E-zVG zA|{lDEpOou!Y_v31}{l{uCRG5qqLEGYa$L9`Vm~Io);slWt(iP!>`4ya780^1wIG< z0{mOE_l{@!d!?Ec#FxxbekOMtVfk;!{t-x{)!FBxyn5FC-9fxzrb@$hHF3A>I3c74_#=+A0A_-Ka++^ zy>I`bW$6y|&Q_d-?||?7YzJcOnjHO;UY-AHgOF(Y*N(?A2^8z>U4&LvZRVauqdIIW z+Tbm4Ej)!6!ROU3ou~1!?pGMWyKqF@}l<6qf?v{oonc_YLrV;!8fnX75YwOuH6Y zbs1N`L)Uqk+HM=zHkTMrQ4uTP`|foP`~`US1MaqHN||D@MFWj~>XCG#xM6o-%K5Es zj{o}3=M!Z|;1l4-!PmnNpZ+wfpt~^Vr5RHiaq8O874k_ockSFW>_3fnm_SRksfN#h zPlW#j-$0`+?$({4jvY0NAN}_T>8PPU-99hu{ze1^bkF%Tr~M;*A$$hB6UN56y8adq zbv9H>!C5^~_cM<2gXZ?(v}=mhvE9EA{QEfrSp;zsegphi_~P&FjP@rb-!{GE60|VjTO1b8gb5y33mAWmcL`-0jOsOUsla z*TesW*THXskD2%2kL#oUIbhfzxPIxcYbQ;ow^=WVx{{JhFKH=SJL3E?_<{IrU2tyB zz>gU9S9sOG=l0z{(KFa%$*5U&FS&`vYa$`j)DF))7zOXTVR#_B-J7zqgmVZ>&0#X55}~s&qxU zS%&;Lb*z`yPpdhL=Nl(p`3}DVejU6W#@=e#{*y~D{!-{4S+M!o@-oxH%e^H_t(U!f zfBCn3@O{_LXn5AaxblPP3}d#WU|fRBZ@BFs$$dEAS2Fq|l9Kk6Bpc~lW=b=fEG>-pxnUv{T1fG6QQ;1|LxkFID* z`?#W}+diu|_g=v0Z+Yd8h5s#b8Q6J-(0P4z*aZIwo{3}K(zpHUe>*n5EKJooUQF;OnEx_7S>k@NeM9z+Zy5KQ(6l5YJiKbGPm|Ny-aq*Q);fkvEEU zz&dbP!`fd>E1tqr@Y%+~*TElZ89C@(;YF$0&&lmx0Si86sSXrV>{LFF?6(XY-9TS& z<$%u?eg*s&_?o51uSu%6p8mb7wL5#(m54nI$&}F9Yjz$FR2$0D4=G2$2f`P^J7Vsb zv9EN$V)DkfQ`gO4T$(L?<`(s;=j^Hj|7gaM9|HeYCWpY!fWHFo1Hat+pC4uqebUCf z78VU(Z{fdgj_18$leGoc7B32yo$~uJ2YvPgo&&OW$f#<^#lQjg&~ zJx)w6_1q8NcW(v2%i)LIcz-tg)!);3*Vb-6b*!K4@R&1hgT^oWHTKN&zlrfq30L83 z;b+6|gTGsOdtdjuarxdy&JA36C1+L4v&YH10%m_1mpMzZi=Myx0sL3^eE7@ozj;_L zExz3TwWN;Hp8sxhe8|0yy*U=v*DX>76YG!WQ+nZ@aP6FguZHh(etL9=v8=Q>)Bo^^ zn}K(amDug8d_3}f$nu-JJ!hNg`#a)VfNzF>0dFhZI@HVWj|Goi7vG*f&U>4qXLa=8 z&!^rKkGi9hD|{$I_zCbm@KnrYPfG76@CzOqE0=zH`NM4Gzf1Fk>qyTaFPWFcmn~BQ zf>Pj<;Rj>e!{OOKk4-bnqUDKO?hbtWOVnHEBF{k6-=(Ezis$qe_-uHmfX{%BfDeTq z@pk38y=6yYhi^T)$Uef}u+*~t`KikOMH$DZxOtub<4ZAoUyZ&3J{kVpZ$}G$?$l-2 z)xC^3dn(}O?_IqAbYl-t8m_!vysY5Zyno@(!1tZ|Z1}Z~Z5J~`#9JN~1)krspAvoz{X&-^cUe~jO| z&0*V-*ae=+XO6UgUeBT^Y~~4GTpAPYdGz_NwvP7>lV3C*nkt4Lgll;e>6MK(~?&S(L z`8_{f>VES0{lB`r{_dK)>5q&*;UnPl;JNUAM+;sby|FmqsLhuBf-36yyCKD+JWgL9 zex!23;F}KJWJDZShTU14?kRTZ&#H~ zHT-J$7w|LSUEY75xaic8S5DrljHr!0l;yGwHF=!c`+H|ScUAuSe9L?I-SF11S;%-v zgCnPD)XAH~BPZ?XbNmzMguHq46aPG@;1H)DP2TU|jG7#NBy17#W;QM4#J!t;d?-0_ z?$2nS`wLi?t99ddpVXx6={aNZ@AEP6ci|IYFC!UK!*84!{(jVx8?I5u4=t1boqy}| zvY^3^!%ascYo|zq!r))SuYqkwO6FWSzWVwYj@fJF2F*70%1 z`hK#RR&QAT6swiC8OK+uw*4ilG%nfS4!;2Y1AH{R?TY?>Z5MohRnQK6qhA=C{&(Wf z`d?p9nG#8#zWT1sg*mi-s5Rg%V3#6gcTyL8oVvX=s&Lwkb7#)&IyL@d>3}V-c9)*L zM{SsE@tzHT0=};Vu&|O|Oc17c|B|*X;Ar%|#2N6P;8(&{Ad=18zr2>^ zE*hx#@4ibRZS$=t_1*`Df)OVN28LXXnMKWox5mAA0QNtmDQwl5$HNPLU3%@N#O2Pa z?Nwd>P1fJtX!f4B?+{nA`N(1T(eNZ}Ct|5o|IHUvIW^>es_=0$-+wc;MMryf$Nisf z=XrVm2CY)TN5MaY9e`S5%B(7>pIvq_^U^B*-JDgYMID8OvkP?6Z|kd5F8#M;GTnzeFOiQZi_WWaITq;)Fe{yw_?sweybcmeF6NFI&)MiM-}tSVGLX4vnk zDK0r9Gg`j=x@Ok5t=?Ox=QL69454y#sbyxx1o?vIsQKuvE3szU2CX~PZ?vP;N%+38 za5L-෇sEASVN3q^qC9Dem-*Ss5p-^}cg;OkcK3c1TH^BGRiG8&hbBRhfhePRc zptVuGU*BhUrA)uSN%P%OVm0gRKEJUkgZdK$9b-vfo#@(E3q3X7n8Xj9u`q~a*td^W z`(N0!r;z7-Y0S0-R(8dyJ)!VJ;alK^@Ebh_mXPF#aZVeLvv#HS8}i_c*VHQ>p0Omg z<;hKMgl{^$7~TXQ-B$~7&(QMc*O5op(%%f!e1593&{tj#qU{tYNsgK02jeODDe!%D z;xc4p$v|r9idl=fOw}SUrjI0ll$Gu2itpw9k8>85uMZjC1V0yE3i~H=VuRTUTXI|A zsJWL|dEaOHxY0_*8#!w|@67bLabRymh6z3semU$xWM;>y>H>G&!_5H!)khbNjHYRi zm{;mbZB8sJy;%^-aqt{~br$$Tu$K_8l6`YHmr_g0&&^rDQJ=kOePLUWXa1zOBP?nc z4&L;*bQ1hN_y*W&BzMEekmB-&V}U!$^3B*wZ=N4>mgpFx8P6}vZGYS1n7RtS6ut-c z71HLpA<`+Ar6-~b#SX9PY*U!TyZ{yoT@c+WQp-wSJEQW7%I8*qM zIMy(;?!WggHvSRGrfu(ajT3b%2CNyWAgbY;;e%m0NLSw5X`B;*A^V~>{U#~1AYRpd zv&?_;ZQxj|kR2Z-&&e)$J^Vb_v51+2F`kwCS~DeNVE9aSfx}a?Rg0}WeJJ#?XA6&A zxIiC15Mv<5!oIpM3ejdvqlkZ4#(y8upWMI4o>DZ3Q{Kr9KV*B%S7n+!_Err12>3m) zD-eQ12G}e)rtrJCjymt{bem(9g#r2PRLd=jj+LdHxcUw7{_raxUH&N8 z^8{@sG27`z4P9i$vm*b7e+*v(>yOlYFk^?*&Y^oqROD9XIM0m^o}9}gPyI1Rv(+Z3 z$S-?Z(UGCX>t3h(x3m< z-po*VewS!2YR12N?MAA8hxdc;tFtb`XE>K7Us%0q)6iwa=Q_@78kfG8((Jd| z(?!4S^*jbY3I0C38lI88`P^?~N_TthTix*Zne4oi_EfEeKB4?%bY(&5N;i%YelC0$ z{44l%rO%7suPyohZq(;c@Rt1IAwUad_j=tnDSt)_c^W6>N5|oU9q=0X z1MutMQ?0CsF%w>W_4G~mYO~vYvUtbv2dw`5*39Om15XZo-gphZZ;j?Q{6Ton9jDjM zKeJ)cgtDGH_k8{uq&-k-wkC00Qcws+!>~F;{$ozXE^e!O=%QBEQq<7aUF( zqwwxm@O%A2{yKYi&>Mp4wRO4QfZ#=ohL;4?t=)L1B(KH4PU zm&x#xu>A$_6ztRbwR?3QOFSO-o4w0y;Lsyy+k-yv1Pm3$=r?TL%Zh!>Rq*rS*TJ*l z@3Tvv_o`={ZqMWYKKt^?f7%EB+3NXV5ii(_r|WcA*d2n;hCc%z2>(?wDu`^o`e9>j zy)vjnkS`vj^?!cUtG!^EVqiO=s<;bZ2(N)pfiIy|`Z@St89s{nX%5T%j)eKfu53;j zb)5ZCZo#%4y0N|R#qgite}}(E_-YjtKK(-L?-MxN*G9-D{pVqRp6Nb!>F7UVD*Yx&M zuDV5%n?%a+ta)M~5lPi`b6@)M!YOx96)JYZzlL{%e*>>&gi~J#MvH5eS4}w;K0I#` zHJg~c_!5`tytYfJF;l|#z3Xv}-#whvO)fCMD!BEm5r4ZiaxI*46_)*w?UmaHnUuUpvyuvNsEU&iS zeQG*s>u= z`jvJ2HQJ@2WV@%rjGb;)IibVB^ODsr8n2T(1oIdCYuM<%pE+5}X*0_oD;DT#UU=5Z zTdCF}rFzr(IYq;oS8#dH(%McNO8T-WkrNw52)GU>N9dgfu$ zq!o+*fd3cX0(Ds>yq`+Xdz`&`uGUnpcdVJ;t0kf>1yqtCdn1iz+dJb+;G5y?;0^Ev zN;cK5DA!1nGQ(^yiPg+Ie3Q}Sv%6dE)-E=Ua(Mv%4xSA^5=lug8g*2v$8rd!uX{V%m+DXJ?S%gc-)H9|w-oJ?4~APlCSA8nP2QWF))mKE2R)dy{5yl} zz1Z97W$#W9&Y0IFu=^0&1EZ;$R`c4kFu}MnB88!v$ded_JB8Wf>TP!F-+T#z_koXx ztwav69F^YDJNZw~N`#l4%dGStm9eu7o5PnI>1=XzUhN|I3Gnk^-ywo1Ka114+Z813 zUR``;*Y=ipWsTl`W8=67DYaNpD%%e~A3hV-7PZ{}`C^f4Lj#^9Pa(EC%=e8s=3V$l z<$nJ+@T=h0!4HH#>!6yecr@Ji@2N}w6;MyVe&~BTDz#i8vd>}ff4*+x4J(iTsol=N z`@@%PVOViio$*+tS$fAWu%5BJX-E#|Uc9${^h|!3`kzW4_$%1{J@_f`Ti=^KBc@Rt zK27T_;pIH1Zq*zQ4=8XNG0SrCV}`30JsDmL{|bINynI=6_0{Y2DA^plN#j`h=7()Z zZqmHQwb7RkXSF{{=7IU{?CG>D?0P>jXuJVM>?d9o|Ig^q0NoJ_`9O z&un~n2;K(sv>WVoUZg9e+KkUW0Wz|edioI`r?e4Dwe)Wxu5?m?Mo&65O@)6 zJE9x!H-DxXvy&|z#ayE4nJ6v2SNv|>O1D~rVtcVxci|(vAN)*MOVo;H?4Nszm3|t_ z^@IF2%Btt?^4~K7=T5>-u)?#`7y{q)|9oif9y0OYlqKi!_t_j~??Q=1hl^;LDVWqMIGYRmyDZ4vRf4k*&h40UO|V!QX@5 z3%@ex>bXzO&)qHmHM(~Nx5 z-Z6;e_11PA9JR_5A*)E1iU`X4N6#H?=JEt`vzVpsQnbN<@3Xe!D34gL#!EPNvT zy_Ys$CcnK=`yzlsWTqHpQIEM{_oH$ge@VYaA*{5UcEMYrZdeGv8a}V3t=D9_eeHX9 zbM#?e^;=ZA-YvW1&K+Bu zv|a1_+KMZd4nPcFS;5G1Gs724X z40V%@b^1th`?qmb%WY0&^4mq0hWzBA5$FHh(y!kP_|@<);3N8KSJsOs?nJotraK;s z9jh;NbG!o{jPTxP^?i)*)`4-k83pj$;mxow^WiJ_B5{4VGv~!zU9w$#Icf5Fc*(<{ z)N4a2Jw~_B9T(tF!876az#C{p^njMmpTqGMj!&=Yc3?+y&3e{l`=Bv&B8F;F^aB20 zcsF0dOPbav> zGI(p$JqzIbYVS5GPs*j3B(X%-^q5F4yTe68+|gI0*D)epOd6s0Tlk^yi{LlHkF8-z z!zdhj@9#`YZX;d9HYJuXNvGC)&8FL1Hgu-+_w4JDaoxjTfp^N~8>9p`jzXzvqa-!O z>s{4E#V?k|qON~Bl68s|gbofW`I=xEePpCkRbgeJx$lzuwx^>Sq3VD_l>BP2L z2cHkW68G$|>OVm?C>E#w8a#AI6u zX>@LexvZWbm0i+ckCFJi;5Wf9gin~_W^52OR;-~o1@IU}3YK$fi(W@JrfV(Njt5s7vuJu!t&P+X=2Wec;jfhRz2I;KehPds{Nuj;`9EJP2e&2Q3M`WnmU2sN zi>A(KAJ+_D1b+EVIFWfKieSN$K zYF(#Le};xaTN}(}(j6(Ly12%MMsBt&D?FHgLqZ?Vnh1Xv{xp0DJjxq_yXQOEad(nLb;9@c3M%*u z@PGQZ_-N^p3QH<)WJ0j7Ytk;st%mXyjrJ*_lGx08(tQ-J19%O*7QQi_7525pk|9%d z>wfj0l9+rcBEe*Sb&{`vPH~aBP$t34;2YpO;01zCowaKiJu1DSOQO#yD$1Y|72IDU zr!lnMKd5|4HvD$@$M6F==t(>%acUForGIuv20KNU8$`r8*_nLB%9+N{*?RN3Quqt- z&*7!;Ws-+=RUwQ|B+*2W_S33bXv*F;lh`#`#jDg(Wl62@b?|TD7sJyU+HbKH@z)+u zrFE=2Eu%A-Ua?IVDP<<57hK@xcer??&V%oSFNBwnE?KYSz9|Nw>cD+{dRtbjz9Pe) zYyRkK&d++K{(As?-yaAV;YoOn^qPHe&6m}tmS#sooLVlgp!!xcr`-Q47u#hg$7p82 zkAkOSEbM{jQ{(Nu?@+|cJ0jU`v?^n|SXskX2UmBoh4l2Vz4;sA!{KeMs+J>hr2J5Q)hvZrj~uuO4>&F$N&=y-`;#?Xjl+2)Q)sg6D${yMfl2L3Yq1!tEE ztMchaReXFXH-D97JC&+w?d-BH&khq-WHHHI@U8Fy_-1(F?@g>A+N2(qWpI}uz~rsu zI_i}EA<~!;!G)x@jdvHm8(soWpq6f~5Zm|C#ag$_)?I0&ETJnol4Eeu$5)$Yr>&uP zvUKo6P%DMPkAl~ksJ|Wjq0~gSI{CV0lQNM-zeWi~7Kcytn0+Hiovb5Oe|Y#h5loJlr`6fcgO7%fgZ~4*0xv&G9fkZ9x{B^W&mlBg z8l4b_bQ>wmF-ctnSpq*BJ`uhcKHIIwl1|VhPINaHZk%a}a*2WANL)~}*aj&>UmyG& zJ{x`}yc*uVj-{N^7EP{7dhcq=;%1Rfv5r0>x%nVzTZPcyDT(fb_YUFb!0X_dLMek0 zqSR!MW`C^ulg*5Y@6MvBb$nJjGg@Yu94dxa!Y_dDk6N0_5B8@yGMcH|h%!fgZx^3k zL6J(CZX#{eU0HX_qzw4y@QdIj@aaJ$>r0K*1s)NpA#$@5DQ&?OWDP-;u_dC>!WhFv z_;2ts`1$Z-$a=D~pJa_yMyHd86fqqmDZ;^oqb9LaTjU<+*!T|K8MV(U_zm!xbYVJC z*C{h8zfxmp?90v~s$1(N8YPKtOsecMG1**#@P*HT{|7!U#@^{;g1PciWRps08AWPC zvyxI0TtbVO5|g=@E1d|R1h0Vq51x!{G5Tkjr^SeJTBrV8A@dCn-6_jqq_?sSl!Sz4 z?sE7-_{kA+O6ap z@O^XRE%=SS#R!MQG`9P#+>Zj-6XB*!w6$rRDhYkCNOAHD+q5Ii@zXsOHb zF)dr2{MB^sxWWVpsf+0f6*W+zn{?`A9@P)qgue%03GY8U^oWzuJd#`3o>F%yNH6K1 z9{YTYPEU~G$Bd@zc>Hd{4)zi92EIA``{0&D4fDy?HzA}OC6TUip%3CJZ3&VpbRtOe zCGf*xpCJs?(xbn`=^RuU=2KdYp|_pK{>Yj;PU=VzxKLy5w1RN%?Va!wU_T-Pc)PkQ zZ53|p8sQy5K`7`B>box|h z4f*7i|K520myCd76zEj|<;BDcb!Ygg5 zMjlhj=FX*P%wq_r#^jvAUWFgsqqmD`SqHRa0sK>V7kDaaZ&nQbidOR~)MOy*%#HJY zRhv@$DM=9~T~y;Vp=+-c0}VlebwzyP>Bffa41&JRElJ63a5toDa$0lrQcXo2oyzRy z+DqQT4}j$$De!8hCd*RU9O-87yz-+#HJk2{Jz7heGjpS}ncHZ}&VhW)(XbmxZm4pLX_8f#;*1qbjI9J&9|}Jmb`)|A{{A=d)9hY3gEFpL?Kwo1 zt#x4~O|F|a_m{|khr<}6Wxv7y0ULxohc}bBhgRVN;2MOXTt1OjY_v=#rdOrZ*Yu0* z5*RKWhW`szikPETf0dB*tl`#y(FZrbe`v2t2#k-J-LxT{>=_r^q`n+%8}TpvJ=kdo z2j0fzV+U1O__j0KJLYjhc$rYRCXDVBJY{c+=>XUDLd!e&_pl3*FnFCItA-!!BKk00UdeA3xcTF=Mcup<{x^75-IvXaLp3=rxST2YzXDNAW>e(9`biS$zc#O`#e%%!F|kw?5T3|$T% z1K)R!)bPH+DHL`#HKoqYr4!AGlBFdpA|e|r99g4VHS{Pltr!7ltX-a2>=>g>btH{oM5GpCPjHO4HN6<<@4Lyc$n zGg*7$t)sY2hclH4DW<>S6X8$5KZSo$Zp}H|BuJ-+=T(_Ra<>G16dB9v4o)6m?%Jh| z6x@SXz@LS;M(-f#BxTud)EI;E1<#-rHXE}y`&LBOcjqb%{E?aKLMVf!uR#cOZ$8r!{i^6C3me-RfwSQbzgpie*k2yc&s- z?yGMkrB(sxHR09ph44)0tn$wA_-J!wT+Z1idjNL4ZwD*O-4_^!a z4}4CKsLHZ}9#yZTDnseI4oN6gNyf=oOlhQqsQ0Z)gMSTQ2mcuUwq>hhG{v9MnQU1X zD!Y_cmukVpa=k=CzoIrI#ZNf~Z;iVAA^cBx%hV4JLH6CwT@+mlA$cm7<<;<-D3T$v z#@OUKnMU#uel+}3csKMKRk>mrA-d1faND-?2Bl02n>+TQJ*8LZJlQ`%X-WE-kHh^A z{}x^fZwOa2%mp#u>Y1`;cDg#$%%7m}qPS^cYDZ0Ii%d8UJ_EjQ%=!(U$jjpSFTEKhWA8W{sY^$KyRY=Pmd3q zTWoP~$O@KgW>G@+P;2`o9#vA879%IIcUOWDeg?cX#)u*CZxb{3Ci<5B)8(w5?O*Z& zf65fUYZ)>0y|pAcpC2x=9vX-qAKnc<0Nxa#Z*rZv{OPRu`q%_I86!048q7tB0+Va- zQ%4Qy9sz$AzVA8hTzHAf#LPZSjK5z`BSowU%DQMFlV&{FVsl?7w;6i=+$wkyJ^+3z zd}|CVENd){_$lh*d0m)H5itaWZTSrGa^nwex8vNVBk)h*e}TUVe=joelf2%Lr|xt& zlbV|o4fNI;8BOo(X0H|oCne?A!<(Tl?|Tm12LIQhO#d~_KdH$|+N79ZsodUhK-f)I zQ7ywl6ymmDQhMM$;b*{GpjT2-V`iBR3zil?wRp2x5j^E-tUs$)M<|Q|%95Oj1mgw;Z079!#iczFfna#46EL_+{{!@Zs>b$pU_g z#-A6qBIZ#mdu)%vvPMv$$;nA?PT#Z8oci?-_?_@;;4|Pg-Cb=exhpsOQRJ}>sc&7I z(baUj&3E(R@2n+<&(7iRgFg?y8NL*rrYTald`rf1BvEf*`qtC_4$m9k%3FS#ljJ{0~Gd?-AHH9M-iA7w5^yaq;7LyIi7cQn@&nbj!o zl?M~Nt2yus;m^S@h3`r+PtFYc;m_{>Y0@nLgP0ffp*}H8oNTFzX%h^y-+T~$AN)1= z?eGfMH=DUFnQ1L`QYV%!k}5aJ-xzgmTWLv6Vl8RXsNwsbk=%k;!#AmVWt5C)wn`tH zDN9MH@*^L+RPhgl1xJcSMxjf>XZY9fB>YEseYZT?V92JMgsr`fOc`6*G&oVy$zrys z5(b2ue-%55Fb1OTSHgRsx6+t|N`I~WY#XatYgSq&s-vki|FPPXSW@Q1XmV>z5W)BL zIrriFde5$ljCn3p3zZ8+s9|)a^Gv1|VWd8tKa^&OX_5PqOW`NN*TJuXZ?0!>@5hs+8 zFCx0RPDQXXTUyiL2cWNb4_^o$6&%}JZ!(q1yQJwa{R{159=WKNi9HaPpg8T1K$b%3SN6VA=%1O z7Rkht^b$gsbKn8hmnkp~ceDH|Ozb|~riYiq|AcoyZ{;|uu8a4ynHssqH#Axn$+CS) zU1BdYk*w|vr)3k@{0$2=xE|sUt=ye*)%z}Ro&w}3rPo=8s zRMKXnU$;7ZYq~Z)Mv|awYc{zk1#w-}kbsyGy)0CLf z=o=ZGkSvL#8j>r-b#=5F_|fpr@OpS>f1|psF{^>#=Ln3=>ticoGL&jc4L_49bJMoE z$9KX{h3^kP7`<3q>*4%3K{VfRr*2oXOQI_!`Q_K>#EjSkD|&P+JLA5S1n(HY_tm(e z@O1*Jlcqs++SpVzf*6sJBw0~WbK8E;_DFrRsIxvQQ3&7njAtbLVt6XgkezT6D~=uUbkdqdqC1rc3M;jP(}y2>3DZNu)?|@uj7a?KtQ~WK)}r`Y247 zBX47<=-=!Sg+Ti$;Db{n>*O*4f#4#AL z^3IwXIX6*l*3yvO4BywM&4%ZqcN>4uH(TsyFEk~M2^Xe^_9|`7$tDR&7l%1_k@s%J z(n3(1z%PQI(bpU5A1t3*(^YJ#GpV|C-Jz*TO1bN{ZfOP0US=Mta^k`#!>7Tog||qE z*6>B+Tw0Yz+Xmrwao%AHd&Lx&BnsDBcPYV9HVa-3zZ$*-UMI3NQf#s)IW5e*Ho?or z3RaO@RUOeBJ?lZe(Q=x8BYZJ@9()75c&oTGi>y@%+{6-E+T-3DlTgo1u&1R&i+BK%~@dwo1 zS|Op*$X>1SH5a>S1WqEJ5Z)1e)Nc6w@Vn9}(GMaWjiN4ba^!$;U3#-HofqmRXGc}h zS2i}f&4mwy-v@seJ}WCTi(Vu3V5W;Qgn0XOmeT0h(A?{oEMa%+LuqbX;gjJH!1us2 zdM!;7SHjdC!)_W+jpes?>(U8kT&PhbGMG#}`y23oz#oM7L2p$-6}st_lor06_e=2Q zL{fd3$X=c#aOvVkcemAa$X>(u^^JW!Y6`rn&3AjIKxqn3c5!KQ2o|>0R;d}Ck-aKv zH20FD!Jpq6&2#zGeWG2&1O)nF3QQ{t2{?J`p+k4L9B#D!%h$x}YPgwKG# z3g6coT0Y?S)@2Gs#^`ji!9LDe;Gfu8<3<;0_=dzJnX-owhFTK-I{Z=i;IZ1411i_n z#<0Xc?>C2SElkK@#;Tj*c)bt6nqu@ZTNcl9Fy9ZRKw?Gu&K0eofE&Vp*z^g zs1qA2sIkJ{pJ|Ek&*4dUOY}Yo41G4Y>u?SuiWkL|Ab|OB66zl6usQSK|AhC1{{l~> zN@BL=hTN;OAN#1Fsonm_oC(HM-yx0#jON^a5#D+G;E%%x!&A^JJr7=FwS&Vk44Q3r z&$*z#>oZNQn<$7yNbbuP_9jiAaSxt^Pl6u^zwX6erg8fVw_1u)W;dT7vTNT&|6x~R zYEK+_O&vai{#*y&2A>1Zhfl2F_9`@!YmR>ZP;h2vOYhoKjQ&qM+((rbak!Zyh79Q+ zjxis;7(Na@*JD%$I{~!1Gis<=ksx0*Xz*Ey5#7p!*ieyEQ7a!KSVtG_Q{u~RT^4R;dWlnpKAS;{)FQpFE8~I zE{#Om`rE@x!gs+xfvt-A zOGdrLMvf%&w(`WD1ecG&1@B`->Zz?nA5D_s-QZ8a*TOgJzUSuN;kmCxTRUFjJ~dYZ zU2Nr1C2yUat+bJ;C)rpewJ=}9>%$MiOShURWHj#7)1#^@%o0s+X=Q68Z413sCVS6g zq3R5CJ`w&Myd(S;^m;cH8)xLgwONWZPX^cUBwXWq^mB@jNg_y@aZ1Ie(y_`Dz8*dh zezO<8v|J;_Dc)iq+jnT7e3v;*&gHT(RuJ0#4o$H+^vf&5eQ{Yd*3ws3(7U;f3 z_Uq1$-LbnBe6JiAHE`S}EKXRYPnaG32~UP^gg1eof0gF-OWn&`D3IGxoa*$g>=5Bq z+KFz8<#ErQ+r9qK)11bAjlOgg-W9%>zl!1>wXtS4c zxOqVa{vbR%KYmZ}xpV?foMohPv29i9d8fb1x=^zDx~|?&iw%8auwroe(>eIfzFi(Z z58iWV%*69m;%wJ9-#UNQef|PEkMD0kn<8!BXLi%qWQhMRyg9rH{Ac*HRn~MYV&QTH z@#7rk`d^4+8sfuS=PaMAT9VWrd_>$ zz-ON0<4-ivq$0dfGa@}Dp&mVG3Bk4*z!68GJb{x_6!`)jJCG4PAAq!f$?oPk?WLuZQ3B+%~1zFLIdC zEqG@5a$4OQnQM|GW_>C*jG-ulU~+#Dz6yR4egd9wJ=JR!Dd6^pUGu~QvCT+(f8js2tcv*M{coq1k$Gg)B%-SI# zu~d!T7jC?}X`^-|gsL#jbBLsx$0bPn75+SYDEtL@DbIgvQj#H_-VR}Y{8BAmLHPxe zRJVG{_cM7;Nc@^iT8F<0pAT;XANoE)F^t;cCNE(sJ@L>|?VxN@3O&}V@B5`sv4si6{+rm2i=sbM4?5q^QdJMc;H8t_T*=~@~p%&exY)uGd} ziy=&~1U@LcSJzZN``5?o`x?Fg{w91qyrmD#pS&W~_Ei044YkRG_2edBEtSgW zSMal>`V9Z9hrkcQOPPI4QV_WuMbx+UF-dPz`j0?g)in6*{PT0BzUqI1&W*rN z!)L*-!7~sxJh9hZfJ2i2Z)cpi~E)WL5@w!pQM z?V(7m9(aX^Ofw^08Fi#xmq{2csQcsdgiQ<`mwP1cR{j#7Ogk;(L5q{p;N zeIxo=sh?Bn@Mptch37no9t{4HuZB-mLTt46-3o#BlnE9>Wn^k!%UXuHsb=GrIStw_ z_&e}2@b2)PKScd6@ew|M8kT5cpxEgN^xO>2h~Do+2owvi^K98mr-SDl{t|o)yyvk1 z-xETl5ZUN4_b(J}zKDvMpRbk~k620x*)V^&b6yla9^M(g2wqf8kGQinh9&mk)&Pc& zl}xv{j_|cEspN>pKN3FjgsJ=*d=Y#!{5SY#omW2Je-iv!dwfMQbR=Z%Sr(O25xhq!bTrj14gij=~>+=M+Lt0RO98?v}%=9|ebk)5I=O zG&mncd9^qvm2-OXy6fHA`B;or7iS;gkHc?fL1KKQXCF*MBhvI<>`6`2vw8W1i`vE` zVtBqWckY_kj1mvL9lQnnGx*0=q$dkMZ6hbO>Khp=iW#S~QuHI%?@^oWJ+Np{dDvPO zJ`_F_J`G+aHaYF&J>t-6LaLsDi_wc!(@ryyI!D#0_AQsEmOof&!k599!dJrkg-^9U zrd&JK7J4XX{|A$aqWgC#4uuwGg^H{kjGc!UufzAkcfk+B+wO}Je^~Nim215r`tysc z7v02$d{c$|sW#La{_-mith>Umz|#uj*;2**|KHW1%3_|HEd&Rhn-7CMiy2mT@a19-Mz!JzU!uiaO~?6%BQ z*~l?oPp%^RF=}lor15L>aKEJu?*sn^J{&%eZreoDKEs1FiG<^4yQ%fxXYZDXXx_T( zc))8vPiGCUd+>?yCGZ*WyARkb%fy~ZY9PthReAbL3BIHoQxZ%$Pi)^GYeRclFABaE zegM7{KHik-GwUQFD}}_WXg~P*!89MYoo{ZNmp09A-~31ejvDx0c!opB+Tr6W33aY8JP zg=3d!hN;NIGsK88ss}pfI4+=0MNXs&Pjv$GUS;Cmzdsi=)Z=#y@@v~%_l~+8(|R+M zi?}7Dor+zLs0gnIZv?*$K4cG7NJiL#xL#{x;=X0M&NnsNr3>d&$!E?RUTAW*mNkZV zg?|7q25;Juw&k}r!*;oETKqdia^woB-|VF+q~9vYX&2i0e6GkB{uTU7cun|MVy4qy z1sHZHWi{OP9GI&rzLN58?^GYVg?(npQ^Y>e8+Q%!W>fD0%xV*h2S?cntFJSXEJ}v zV$Yl2F}Dplvs+Z8M^O9Et=Wi^b4&|-t*Vdc4B*eeKZNgvf3upa_|aE&RoLMXzo*2w zbxo?4cW$DNS_8Evcd*1yzX16E;N#(!;LAVrUkWsPmS3N>x75)4jpRXPyAy@&nzt?1 z&4@o_uGS^MKZXAY&!L8YOT3L1&G~q|!s+k$$sM$`D;Ccs86pW(F6r{Eczn~Pz1cFy zz>Guu;=+&9pIpd$aGG|SsJ=t%B0M#6E)jTd_${%VpZKFaYY!Py#n>f?4|7Qns`^eZ zvWWil*?7B0_`5s&Uij1SZ{W551`27MmJ1;8SEz}kM}$=|P{-Fw@>PyZ@B(Iz+7cdhTg@z3oP4yyD-kerCGO{xi;G=cVwiM|N%_L`1{?hL?iB0&l=Z zlKvZb$S00u{!}?XTtqmiDqbr@kaX4jw&NVJf}&SwVL5e}{~!=J}0`4Q|wz z>|UenkYoSn@2~hgHP76f?=sosKbPucE`=cGvr)hiHqg`Tv*9Z zM12n54WA2tp}6~`{grePHSvcj2lj_=S_kHIUcbt~^!Q{BI&$k3P@PLUu_Wr)0RR;PXRQ?BgBZV|#S z_#g0&@J8^Wxu;)mDeBbH%1fR;Eaf#)KgXK)omWUkQvDlaeuK(1l^*Ig_z-wc_`{A& zv7zG3(+}!Tgq&;3$Y2`J|L!dIgq#20!7W8RI%Y!fOvokk;iKWVRI&yKs-Mvc_B0Ud zT{Vl1B{G$lTf)o1Q{V^Si_S4UJVp0&=~(EVoSDDBsS4xed!tXd%u-r)WzTN^A^rrO z2)|DZbLvSv^U1~IH=mwYZ623T9Li>S=)OL&`0Q(s1W&Zc*`DSBkvH(S;5Fc-;F(L# zTwgu&HILqL?&yN#vWC$S1(4*)13HS~8&3*B`@Hg8Y zf8=EOBfiGc^JhtgRU#l()Km5BWS7LxDkhKBGU{9K?8r&i;OS3c+;Xm!Ilkesn`1!o z3c0CjJHM46XZO2UW!^rSGBxvV$q;yH`0e7j*Wkqy!`u$%A6Eaxa*2iA!#=l^pM1(r zv$UqF*0F3#R>nUMUI$(j{xZDHy5Q&M&xzMM4yd z*TH+ipMrOQcaLOVug_wdzc=`GHnTAzE4=bImq(A^%=GzBkN@ucdTbOv2L2{|B)pX> zEz3HMm+6NfCEuwg9rpgfZf>$3@%U|>?M8ebw1U{LV9bH{fX|12A7dbUetqaMbDb8) z^|g1A#e9+6-^hUm5l#;x+SOd*kH8PYzlCpsXYEK$x}U)7SJKX5nSJfRD8F zB9|hHN#`gR@2J5uBA2X#pMXDQnu1vn0xGdLEM)N2I=T4 z@VxMg@H;e-hp%Q57^PBw>J{!&`OtEa-sH{rO;X20c3u{0(&M7mrU&rK@OvfjyN7>S zCGc|It08x1KxMp$ogr1IOVsr(!vmLjPK5}|%lZ4hg}(-W5+3Ku2qAHK-ivzCQwH0O zCZ5O_oKWuX!A!2fv8HQ6nDpGUzZTvJ-U|LI{OMy6f4VOo{wG{CK5u{j)vf8MUca2X z(dWg9?XhEjcNO=*Z|1_G@J{fPEly_EW|XcoFG-}g=J9TZ$1^B@&(#=ScK$ukOu5lG z178aN4nB7C=NDz0y;@e$Z`kU3%cT3vNKr^nt7uD3Mx~HRPVdRJ`R!LRmcX~b*TQoI zI9X8iuNj7XAExNAg|w4HV)X8;t+xoN$!n5{M9o9+bjS_+;iur~=Qn;?)~Jb?7HWNE zJ<*j`sO}fJJkN6b>pRPZuQZ~xy72qqH_rjEYhex2ka@9i=z4klV}}b@8w|Lw$MY{d zc&WnSRp!Kbe)}ytJ9usQ-IACx;42n`O@}|OH4s<3vRmtAo+{J!>#Oxnj#faH zMf5qmDg06ROYnVkq2aG$i#1Yt@9*O{k=cH--Wk?zXdPF zT&boQ&d^7sW?3!b3wNUX6YG^@eBkhRUQeH{C)uC>!Uw`Xg7<}wF<@~JbtseCnJQ|0 zB(S;a?cR`*h#$(YGTY2-OLhqvY%xF%2%ij}3LlU~qCKw?sgytco`ytZq))ILEA-Ig zdGz*T{SycFl~_UediW;zPw=D?Qv5Z>lI;0gs+Zp$s1J$YV|qSPJWwkKD?aizkJP&e4O?ckZ$kMC*qU#i#4R_Ou zPnBkH)mCi`iZ6FxyP?`tX?6Quc>inoWALW%r{SNrTPbx@PiYpBG{oF~jW_teJ+S|C zwi>y+dCZrscgef}o(TU4{tmp-VV9ZZZn}Ljkq9BMz;bXDO6wDU?S%vsdfk zZQ!HfbKs|RY`zg_y5b+qIrP1@*J_Rlz8Y2E_j4(CY|EBCca$V2;h(^Nfd9UEeKtwT zQT2i^ll-}gRy*t4-c|k6Uc0;?-nxe{b8pT{X!|wHFYsUCDews%=bBk4=cWY9^8CBl z(kRauCQ8k^9Zq?u8a(z&9-2A=UjaV`FLDOC{xrjYTI@YG$^m1KX}0%T-*p_%mb2aN z8qhTVEdE&h?s@p%@C0f6yWwf@84#TH4o)D z!!N<}!`s2LC}jG+-ukI1bC{&CYON~e%a=4pO%%Ui#JJwIU=ZjR3D1FC?<~9@e0=7C z>pNNgc|H?-u*c=4jMTGy>ZMO~q!^QghPg({(!Nr73HZ&uDrxY)PaS1SaZ0z{cYr_s zuo*W|y3;E``d9DIWU``@j_rH(AMlsp1K}Is~GX>14^mN5jvfowtWtId?RunqHTT`hlibS!WjrTr z4vR7p_^zq@`z6A6!mGho!ZXv>9SolW&kg_8M3r!y&9+A2>oo2g%5V1pzzYjx-EvaK6z z{vW)K=h5rH$HPB@XFi8{qi3e7^qnA-?y8(*p9CDm|rp@4>%= zPlrDOKk_^|I@WcMfy}+(9X~I1U+hz}R26)fFK#aT?}@aJ#O?_ApYRRvn(!VnoCbHi z$b#033H|Ew>^)gSg~5(%fmJ%W?#o#o`-DUI?&3x%0!RZl)0*f}TNPtE{2}=F{(zG9YZ5jr*Yn`-z?Zo3){zFyNQ^P$wk2f}y4Z)W39R@;t#^O;K5eQdv1lCya5g3sP8Cnu84um1*x zNWB8ff8a^*8}Li;L&0-tSHG_H_50YX*tqN0bTwqyHFt%|&mZJXkw4VwwgO)bzu998 zo=3g9FkSmzJQ%O8O7k|z)T`bzW_Yf*Er*RX85|HZ_q3n=I@UzsPrzS>w-@r6x~Nj| z_H6#-r0@s+WoqeuX&aTC^CHxeQr2PH?T^Bb!kfdp!27&E$iXu=5$#_(WAt}W$>P3U;-NeK_J>buqzWk z?vIqCW}m{!b!o>>)q7l0n1c_1SAkcDuOD*|=%CC7f7rQjJA4OGSAk~BB+tH_`)=&d zr<{iSS$CMCj)A`de+!->)LU#*FUx*T#5BRIGka}WPBei%#cNxc6mcP(J>iHr{3m#K z_#k+iCZ~;`l$J)t@O5D;fw`)E#$3rN$C8-{ya(L~uWhRJ;eW$Nz<=1he%};%Tk@z< zQhzr?rlguobT)lG8={`+eCce1Fr{tY@Kf;b;Je`8MdsUxD%aIFZg^M{{(j6O;LA;% z_^3=yoJp~%iY6bPpoUx$o^TOkE{jRa?^xDvlLHkS;|j%nTT`g*cwc{;EsdM>s$!)q ze}~@&zq$A72)yHI37JIN+1H8=p1+NTE9%@_n-j>24qqfxY521z(5v0R?;f639{+Ff zjne!QY^<(65>pgPJ>@f@x<+EF&73qBrD{%rjUX{24=)VA1Aa3bFHGp`lWvsGqMoj7 z5vo)qFf;qqw5c%Imhdrj@rACd-Gi5b7lO}$uObtr-*`I4WJi^kb^MkJn3Y}2sf`mK zw9DHFxNV;`5Cnf3{w#bC{5R$HzvP_BVPPd>of40>Y0APCefe+A8z16}$2kJt1SG@F8`Zl&(s>d6d7?T|-I!nJ>X#%9-7i=B8GI{vF)^3ElJN-!5+++$uwLT57o&IN*8WF+Of8^Qb~yN9DW#n37+jT?$3(AuZxtg zE8Nb<_mR7(DZ)9NbXEiI_x?N}8w*jS`*Pve;h7aM2g55gN*!!Gs3k(E2zYu%H9bSI ziMi%&S@9g(nzZ)|fs6gW;hEJ@XT#gTE55Y3N6U7O`Yi{+ASvDK=&n$Xj&hi_1!9U1{oudDXZtW;Jhw8w z7Rh*2DZZB}ZR?l#t3x+zxdgP-3u4NC$6keh48PfvFTjrl(w5t0d`}(|N_@B~UPiI5 zA4_I$jrIT2M!Q#A+9~56d=z{qJSP#)&-!XMulSeAm+{XJ8rt43HZtNM_8oT3tflz2 zKFq%`;16F0zu99e!XI$z@Fx{DcPC!UB<(8hAFsD&`mv9H;cc?h2)(Ag)6y&WUU*?e z)HLwqAM*1T2A6LfPh}kv+K9-EOkJIPzh5;t&4*K{v!~|HJNPyD&2vsZ@NJ7_EOP4> z%YNr=Gv@fNtvigYx|u#A^lgdH9@ya?Qt=(0L*suva|XO&)q+yu!bnuZ3aRM_yTqj9 z=$_Kn68E`)lW*--$BlAF;5YXWhv51h@CMHO4W*G3XyCd~*ZPIMb z*4SN5la-9-cy{48_c~GFi>2eTS~3<&d|U0tKR?u^{@5Dim{#g_H{r9;XSrL9GRtp1O*%ov_0T2VUcGWq`_vF%QJ5z~{P4(|iM2Co92)G+pC z-L=r{+CDqI-uI(P1@+0F&kVj_or+({d&5)t;sSgUys#3+3Ha<{12fZAO45i`Mc38Q z@b^e$yyQrX1VL|?~IvXE$rgUK>##;sc z_mug7Jt8(_dtoYm6^8v#a1kVh$LYqTGmNZ}6lQLi7DeykkR_&j(%W!y*ln6r~#n=YRDu~-&sZhge)iK)r31}9%szsBfT@|fYWjnNVK4)}}k zBJf==cG>>2xw3G}-a0fzBuy$^^Tp`6TG+oanwy&G<+L*D@MQRh@EY(2VVu*>^1HIC z3sr1NMmf?RUiCvRVjOJ6;j{SS=IysVI zV>kE22Yz!;^X56KRCu!EFDVaG33;WbtM-qtr;Er1>jiBod{`P&ETlYqD03(;TtU5BO!4=b{J^%1jkh};BnyX!`&$-^lM14iSS(RHXrfn! z_l9TcIjAW@q9(ec9ZGuYEDL`QUK>6S{+@YXM9F%R{s@0$p{P(>0MmeY zscqnvm;3)FAIw$ee60&_3x5Z`6@JNb(Yv*8Qt0XZOTLb0YjkEvq_O(9Y5v?DiD4&` zlB_J?1L0%g|G{riER0^nq-Va;DPns2^xDga4w~fRH}PZ6?Oj<$O@l)B;IrYI;MuNX zUP@`J&oX;YY9o!0`qX^aaZM;34$iD^rZmRqZmcMs`kqUs^Fub>#!2vF@I0sxF2Vmay!w%zQP8<_+bTXVpnU4|m#8zn zu9XZ*i;d&t_7^N);2E`$Gs544|Mcyk9O=v=k@owaIom6SLh;$Xr?VdkuILu-SWQ1! zRrm*94E`E?1boD|e+C($z3+O;o&FAw4$NK;eRcL*h?U*YLzBA5c5Tl^cs+P``1kO= zW1Jqn+e^5cpDCnwcU9PNahx>_xb}5CJoWaTSLY@KS**|}!@q{_gTKo2Um26$zx?d+ z;Stv(+1FnEvKlT9BfV*~let!y7B0LO96UvX;<_Q`xW2kI+2IrH*@fLcy4$jcfa}NTKD&4dKm_CgRf}sFT=mhw!ap-i9ct~ z5!=Jn;s3!ipr+A+uPM7?7;!~!$Axc6-@HjA#f32OmcMTnbq@9`+|}1lN-==vI*oh} z-V~nm_oo2T!~VIiXO4D#88V;bbXGL}eN*|Q{=Qdl%xphd+rq2CZ|=o-0-rwIsI!t@ zYmXa>lvx!VMR&Y_e+IuaJYV#ydzGj>akk}oG4nU*=&nD% zw|XQR=facV)8NVQZ>C>{aMO&A_gr*+z0klR=n{P6g~LGUt$VM{pPx(Xh^mAC244@) zeGT(hQwPQ#8OBunW7}mITQ5IV>mu~T!RlqVkNfIpxM2<%B^{<2>2Ph15lHJ0Lyn z{#cf>2VNIm3BD74qxFD^)H{WutTXSPusz>xNb5P7ZDo1l*&gy2J2?f%cM9;9@TTx< z@H$!FR;h${Jmu#%35#}XbFMjURis;QX>eKnX%PRx&cBB6Pv8UK+5ba*l1FRaVQ;$Z zF}u&2)HLWi*nGh$p*mifSk>nKv+jEEefT8!ba;7qENvU6GVVKmDE{l;)!~iFKYa61 zCX#wD(gnH>@gz?5MZ%ZCe}}&U|AR6!Tw4BM^h31C)W0Xu^B3!lD3^a4+m?@fm{G9w z|C$E>6Mhx`F??3%ns|7zg5F}5PybN*n7{U*H_3CLB9S;6Iyq1)?pzN44_*{CM;iQ_ zW^VdSM+#r)P;pY4%B2&`9Ix`zs+lINBh%!0iTqie@GQukFT&TtU)9|E<(O|LN6ALE zYu){!=al$;uJp}oZ`~a-SH$jTGEKoBhQ9|t1J5Wv^YY6udwEu9K$Xqb=wBW^#Yw;A zBmS0+1wa3Dto9GR4f=NYc=(-$xL0Gg{vFER!6w-GvR6gE!v1K%6%9=dxwxRM7kIJ> zQmex7hVV7;N8y_&0m^(!?n?&Sg^LCXa~&5S2{>nQ_R>t9B~&Vyx|1~F9pMMyZ@`~g zA&uSmI~qNZQJwc!%{%d`ZA$U=)N6T};|50Y^>NPE;Ge;Bqvmki{CS9{U$~K+NhPmH zgVxEb6Z2`OSl!x5Q7dIc>NDA2SZ~2+!C!=b4o}8Acst_)9;XR2#8sK3#yTD-o-gLM z@S2R1ypwJlITj568Gf_IBEf$e`!966oM1X7Vd$iPMpy*(P-^X+_^JmHwxr9KtX8s9 z;YZ;!;j7>)==4wee0H5@DH+R*e<`oYewO)e{P;be_m;&Z(*DVkDtKz-)Lrm>@HZ?) zlI>>R_4#qs2);0Ew(=K=HM}twb9iz~(_BrAwQL{!<{oz{%<=2+%W_%u3v$-Si{#VC zT%xSXqV}lec;xZj6#jLICI5l>jb(Uccqw=;BRo5=$@{idjF7dGn~itMDrD5l^RlKI#Tlr4|3DY$QDPLumZPz|$LEm5aU`r;mlP zdCS9_!F$1*z^9IrHWt;>I^zy_jff@BrDtDTQL%9;Z(eKH3TRp_mp=pV44(%duzCG- z>DRp5X6Q2>NyWxbKB!2g6D*ZlRJs0Q{;$(&HTx-B_)z%G8YmOKdGClP4{aY&zQ3RE zMsq`jfW+76B$D~_x!kz3zd61J1;eMpufVs!ms41iSRbrcQFhm)Kkj|E^s>`;Dt%1z zw90@VF~}*iFcE%p&;35k+3WBhch~FGbKO76EJee#8g<9!kl*_BFjsHcO8KOChh^y3 zD)>%#HF$nw+|$Oing)y&?mn*WALuHCY{o{Fcg<$Hc!!TCkc_Ig-Tw_g4{r^x37LOh%Q<`t3U7%E|XF$Z0T)%aAR^;r_@CNWBhpnzY4Qh{i@?*WN zB#()Y%O=95ub{&I&Yv&FgQG`%ciR4M9ij%_6`q!sZj0eIrP5R{;)1>|k(m5C`iXsF z%30f6`@B0-t3F7ds;oTmBMX_t?{J}2cvH!RqPR?bo_1LAS^i6&| z3vUC@j5)X*o;1cGV!M^H7CGyj7Pr@Ziez(V%+-B^v_Htz=gXiul@Yu*yfS=0e1MEQ zsUkVMePU>KRU2eHXyIrNZi4TJ zpN78O=Sv_-n(zpUei)?s`WpMB6Nr z@9F6-*r34Gw6mt=d{pyxmaU5LGVn*?f5BTUsw7=xd2QQ!SuJwd@#sWPTc5+nIkxqp zmWzKHX|p}9!t24Gg{QlYc~#)g0eyvB!Kz8Bq>q+sQcq?ozTa^a3V*s}+;m*UiSq&c z=DLU}yaar@`oS|>joK)ai)_lK1C+TCs&Bo|PJWzrt!Cr34k!1#gntI_0)HL8;%smm zvqB+_P-cb1D^3+dnf1oyXRbb%ux|cY>Yq$tIea{P2z&_q^1r2&YVD3lbw^$Dv!aJf z%gZIZyI2=5w6UHG*L~7^wFAByJ`uhO-l>H1mNJiCMz7>}nOl>^ud zUc?l0hDYhVqwUdeJrDhO>DRAM&c1dlajdB5e#UxBY+Ki7UVivpx){^pjp3(gv=4vY zxaq-BsoC?k;bC3^$KJ)TJ8kcV+T?`#0;yXx;HBWHF=zY2D|vj(oqYRrAw>OV$3Mdm_44QeDhR)p?e)(4}KT?JNVXgZ3L-2QHPapPPyKTDeX`Lo+>-eoXnb=-^$I7^?0}cz1X? zc-|X$7Q}5Rv`TB|DPJfHOF|~A?d6M;3wv`U#0rYz%AMBRs^KHxFTm@;e{ArO!{{nxftZjg1Vv_f_@mlZiDe{z)xc!)#%~t{2QA_UU zYPxOQ@SX6_;Md{lhEGr=UM4#9x+@w`jKhwpPX%ghi%?Q_{!aUgqI6R4dCbDqv4P06kRpiP8?0EH5~o%=6R34vl>gLnWCJay#uM@ zq1{RTzr2!N@3#L#lkBETqE$bSm!W?mxGVa;TY6&pf;}MzUotc-C^>@ z!=^>%Xd*HFhg$dOg#&-6ZeuG9yfgeI_=YLFDcb_oTL)fL?X2FN?5TOYC9vagG|dj; z9J%yZ;#WcV3-Hh3o!~jQ(su-PEOoV<+yCRG>IXGvq5F$RVyq3@)?Mp{-%gY%!QY49 ztl{3mx7%zv*BRyPS@Blvt?okQUCPfT?Hj+vtBtz3JAd3QD7pk64POi20MDzub7QHA za`#=#(o9d&rMBr2ZL5~0rKKG!r?-0FD_Ob)Ukv{Pp4J?{pT_Ke6N{8>zdH(sX5?fm zdjC-@+=$&)*uvENoOx!@4Za(G82$u2g{&*xwp58vI#~CRRXCw2bo?~af7S6ZKL?im zT#S3)y@H>IUx9al4^WrB&P~;zVsqC_J7;Oj@S{DfxNEr z>sR+skG$L3R99TNR^@?`(?ZS*uYajzNIsZ^zW}cWPl3Og|Euw(%jZeX+^(_a+rr~@ zF)FTA#7>%s)ORI|zWP7t?_doS-W*=g0&~Dd`caDmp8bz1ZdAKoCo0>o$v3A9c$8$S z7#AwM?V#+1_l189e+7Pr?O6}<0`1)g$$af>_f7>f%Lcr+#Vd9<9F=ZWD}VTV96lL7 z3jQhlN46BUxBrG~=#0B8DC=1_-by%6UCJ~zOSyir(Yj*2R0sYud;xqR{KdD?4+%DQ z3Z=KBo<%3Evw8j967#>ea!FAt9(K8fc~HpQ z?f#Sxm0N4)CX&|VSJnqzY#<@X!l@uGDC zJ8B3=_)Lj;ae?RJZ|0gypLh&eE$EikIlGog=PXQJ>VEJoczpo=9Q+~p6!=wkZJBl@ zNhVW|vWeW4?{Dl4t3_-txJ8!nu9wv``9!b7Z@zu}1bi#}SG|pzy@?yTr`Je2=Vr%W zXk1H3skpcJ$zvj1C1+iwmfaESI`D?@G*%eTpHV*a9CXV5ZTM8>_}+%*rG+0ga=Mk1d%$R5(6dp*eOJ{tx_H_&|8Uj$;>c z-$$)E%y0bbDcm@0o4}i&+%dAOkrUbThwFKnKRgxIfhyqN!(X0LD!)12#Jy&Rc`A6>{zi24mTs*ptsYV# z(k~9Yo7z?duM9s9&tZ+8oL4kdpGds7-s|V|n%wQQ8vmLi+)zjtQ=2WfFy1}<4c-EN z^Bk%gys&|>%2`^q!*ZGeJ!&cMnCopD-1UDcdpnd(^asQSI?ls;zze~f!^=u@MX%^y zwzr@b`LXdxwD$vuuSr4pJC;gWnw-O|ra=&QWz&AA1?2=4?h zWP_ZztgGT~Tj$(rm!eaEQrFf7!v#lC<&Cq`7C8eej(V$4;MuUwv{_Rc!7F5+8ak(Z zzgJ$mZpL1Un_ua3+np)r+LFUU?tcHAc2B&8KMYTT4}_;Q*D6T`IBIkLTzVz5tHuH;< zVd+mp54;6@JNz(w;4I@l-f$MB%kvLC`X6%terQ%rvvHJ{YUa-RM9cbDj}7>z@PFVp zduOtCrM6D}ezthBjj^utM$fss6vd1tw)t&y*T}}rYtnr8kdeaAz+Z%?mWoo@F?5cy zpzBe+@L-Berg*ID{G)i_Ps#=+8A| z`?;R$y6(0slZe^m0yY}Pf+K_!2_VKrZaxv#BqYOkS!eRKhqF$y$4Or2d#Ppm;dN`& zQ|0y3W*tfDQx<#NORRhEI2t<``&rfinbkTbPKH~SB}k%W5}C|};crSNvb+7Z29tEu z&z_wfuL*c~IKdvUs{zq9(M>_s;c=b(YJ2$6|A{{>A4&EprcF3Ng{u2DF)Z;@|7X;% z=Lg2BZD?NA_3y!?ng;Qha=lVSAH>3u49}}IE&Ir14`=d2Ii0Shx9$#&4az z*Xc}Rko~mVWg96zp?Re+m8ZN9>+$(}?;Bv3U??8hN@C~s9==}xeBDq|*4gQYdAeA& z1*f&~%*Qb#j~!GJxex$!P-d}I#ez&^Yb8)s~ z{?#ZW!Wo*(wZ>_BSKc2XzlV?ENL_dXRR~;0AH6uObP(|Ddgsy~?}C#tpU=}ge6t)g zBz_H!YcHiC;R|q@WDDIm16ZOswxuEdb(+d0#^u7z&*h0*lXKI`f`6=MRmX&;riDe3g|{Q6J_oK`)gXiFm;PDD@AWa*x7 zqMtN0-!wJy>EY!i6EWhK3*8r#f?OVUSuSrn9?a@ng2qPp@w7!O(rzCqwqiUA;!_NH zc0T-Q_*fg`@k;m&Apc}&ZxE)Q1L&_OLUWh#N7~=Pf<2^)+wcIzYbTaIev%N{!$O8g zl_mw5BKCJ@qsx2+(fzQfcVlmUfl|b1qoB2wM=){h$)UVjE>P6#4@G7#lA|kN03@EO9%R!8%`*c%VH- zaf?1ZVgB5novi<$3+mazW`Wx2UP39`Aj#6iTg$YhOF{;sUB5q?W%WIRwzwYfL zAl%-)0e3|lbw}2}0~@vd3)1u5krl}4Q&aMk{}_Vq<#F5Sy*X<Z>H191R}$O!9WQ|sFIJx z69b{_!A-zV9fCbkNRWXCZ$WC6v8`6O0cN4+Ri6U+qP4a_0`ED!wpX#&$D4`{F+VZm zZm8=BK%w3y65wVgp3V4HE9s=M58}86?rQ$7DA^=cNvDZ(Es~-C;;zjS_f7@RXKZ?y7tfIERLt}d`I!l0xZL2|m*oZjFX$tGFBL@};s^xN8B_UXTzaDB5Y?A=pj6u^9soX4cub;bUN^Zz{UC@GJ-|o+R;-m)2t11yQG(xC`cIZ}A4!k45z90|@e+?w#RNu2fVIqvGHLO+BkozfN>TLGP z0j%1g^ryiBn-8<2>y@1B+#9OJFMiG~O(yg_LPmVvta8*EpND+BlkSi<(a!982Fv{0 zjkSbva(pIiXQZ~TKlby#HGG7wLE`N6pp^7>=Ih4|nb}9mio29A$uSa_*qrTu=?$~_ z>^NeKJ=ZirJV^EXsq-;sr-PlD*i!GY;e*TlF59Q$-p7tb8jPmIY9(Ip{kdhx3)jg} zeZt^Az)8$D^2%GS=i`U%hfC`RVMQNJ*-8+jJAc+U+R;8kVfgQ20Xh}|?80_9v`6+& zH=J**+$j29W8G)(j@s*I1&vp1JN`4Vw(KyscVy$=TLCPv%5`LqOWb}_OMUBE=2LWK z+^YKZN>#z(PoMwOv!1?P7W7#Zv`eY}l$VM1pN{nH!D5mlw+Ec41z6^QZCfYi81|q1 z8zuxUz_lR`+nu?wTCy0<)0bJTrt@ss6`}9HNwaz>t~qZ$!+`vG`JFqgeIDCM%3i&+ zC!Dp#>{Z>AY=FgoiQ&ssr979-d6DsC-DI2_pv=bys42?aoi*5sV{B${W@@H9cJGbv zZcio`a*eNj=@Jiviu3$)8!*q?F;rhCShaT{pVT%P=-$>i?%DWxzPhbVI{~6Q`Tpra zS*}=9kO$r!XhGP*NoYEqjw&pPZ-{)enduVKKkww{vR~IQj8V;aJ{f78y7~k0@XEx& zd#4~E0jcCpf#VXV;ac~Xo-cf3-P=3fdzne8cH6P>|CaWigrU>*La-O!nxTlq?Au)h zkoduxWe~&?nm56&epkz)1>06qAK|mVeH5~y*{b^Y^&je!?-n$U+jjIN&K4Y2Ui)~> ziOvE6r|6jNOxkPw%?*d|oVUkKxwiLKo`&^Dy}Zd7;VH)*#~d-WZL7nbMS z(YIQwJ{%+RJ^_jQK>Nhp=In6oNe5_O!@L zFrYk4>yHlwhMPBFujBxMj=bH(C;+tcK(?z&TzN;Uqk#uL0ADcA_px!!azIe#u?f)h z6&Zi-5N`!utCo}ba2me?)&3*}H_{Ug*{rD5afQnG#3C3zTV3ZH@!yAy^&f^W7hG-e zs06~>3QJ$lK6besdeaQ zvQnS7JAJ5!Xq{cxHE!65w;#Uoi9K&QjoJ1p&wE^XR+M&Mw!+-u-uwV8MZG3OD- zUn9XXvvO;z29$!2ryJZixiUdyv8_+wAnci%`Q;>!=_c*00$cmLXo3(IteiO$I z9Zy#}&IWHgp0_*+ZOgQ*}@0$uUX8UaIO?QiLYZY8fC^^XNQNxmlyz&L^vrJeuOGkza`|39AiMeL@WmKpx8^3>wD8=p73lOyv;E^QIIWGu1PCIgk2nJuiVKE+~dg$i8V7wiTnG3V50{5JLB=z;;CFQtQU*aZI15v`6eU z#j`*0PR~zWWEjI2QCtAt7;80z1*vymnd*l?u@f`$?e&wGJnk87!#z)Vb2vRCx5#Giir4!&@`!n~`}x<6HCiFb71>sD(`uG|y`#z{ zAUwND2Y@!3fHv<%O#cMd5EZa?zgihj9;Xe-`A zhf&Xe!2wL_=pPcq+Kc3n!WGYCunphnc_+JlY$7+3<4_=%e4eABi7VWvYx_0cxv`!6 zS4|o3mZ=*y&Iru?sgm2Lo&ftwvlon~u|-_zx7&6x9X^xMK1y!8*tfYT{Yrhg&My|6 zA`U4>?>t%|mI;EyIX`i@lhPj4^tdXs-BK$4j?$Hxx9Jl5WZR|JQYswZs(wG?=KnvX z!f1vvJ767%&J)L}w)}Q52hvkG`22xvid}4fIzlp(yjjR?Xl8$aIK|9405K^C+XwD)$R**1uO`yIa&c@?hy_;D)i}FAk%C(AI3{uIY|} zC{?F^r}fS`oEKnf%5>04=#Lb_Z?aQoGowQ))w%-8b8hlkt;BM}m`{F{{$P zl=LF$ZDRq~r#45HVtPXhg_a?)dtp%e#OrU1C+FMZ`A?LI^q;M{#AaLW$iIxgcPM{u zY>b1BFQH@T&%9}?GNO1oMR{J5u8SM-m~ejHo;sNNTxNA@mNF67aatrYlPeq@t#G%> z10xkIWX*PCf4SevbRT0D2d}RT$Anr>upf0M744`^M)~J5{bQ7M17{>q&w4&8I+C+02)0*L zI|!~1vb({q+H03B=#d0*}VEWh>qEb9O+yrM`b}+4t zr}vA?$-INly3r5KH(XQV!r-qJ?*G}k=kiDN@P7N*cloHoON8!Y$ zj_h%eMWQU5-&IZ+oO#e+M6JiYx=ZzF)O_|;e{RODx;LNx#mn&*$yco18nSz9uN(ai7g<;@)}oTDn|+V1@?j1s-A|J3I7a>2N7wA^j7 znf8e0_0EDEe_CYKR5#+?(R2Mz!bXy>1v%EH9X_iYzqEjO7#)$ga}WM!{CnkhtLEGH z2@9W|{6nM2+EF}8c>m{dhrXA6>=2KVLf?Lks}1?-`p=cw$`RGw%9p;ccDC~BU4}b) zeoKk}vcl2uNS$@O`Q-O8FQtPgQ(Hyt3k-lu z+Iw|bQmq=;ZC~8qyzPB|OSLogW@Q3>*g=@^-HnoUS@jU#?LIi`5i4WNvm1e)?U~B- z)3wxu0H?74*yO9hnrASzQM{+#%@PIYBBrTO?;^(9P&7*MM(@}>O?mnhal zDfE0~cNLGWs4%?0cqTRdG|3wlKFV`Bhlb4dpr4JimFO;l2<$D9CdGNIx2BX;J-M3G zv2H5zr2Dsr*w};}M86`Bhe~qNM_O-EQi2-y)G0FSQX9NUQsYx8FQ-Pqy*aHFNWnN$qiEj)biungjm2 zGqkt%^lM(;_OF9CNO_aK`KG%+dGNjHl~VQ3zXv+giH|z;zq4_!ASkbFGySuJh^?mLOyq}f);NIHwR^HOi;gc9IWo6vG_?9kb=lJ2erRibwA5yqn z)#$C^|IR@@YmX*xtvqS*RoorU5tQ7&(xoZ+2m9cU?D;zMn4N>zF|Uyn=MAR7SkiLV z*;={sntEm8HrYAho}h7^o01JHtxP#~W)9z+&F#!$JLfo#iI}l~G;P`lP3H{fsM1m& zc0KyxUbHp3Zeaa;rQJDO)&(vl-Mrz8#_>b9x`tqjSv z^8p;hf>@#^8s+w|DDeU9Jud@o8fjis8lvx4>(^HBNI|1`o_dmXZhhDVf!i)wc=+QEcKWUOBY%2p8 z<$ol2xG-e$0B^bsz;Ttv+*t0j=|Kd(c-hrMaz1pQMD?`CdIu6|_WX(1irVCs$zz#V zX6NICqaM2q3h}<$C_1B+0rAYQeJVjc2pgqi0snjl$-AiljM*P{4!wK_i5CmBV&Dyq zT_6z(g%lZ>;fF*EbH4|us7Vo6epZ&=Z2A@NMRzz&;%iJp`6Zv?ojRcN9lgt8RHS{p7YoA8}Ql=v(-tDOd z0ha(hCc*MNkQ8$>{mq_U;^c*d%)a$Y+iX%$bCN;EM4;$6!6Pyn=A3gkCBa%Tkb;u zw-P_Uge5W9bdhJPjTeeeYLOtQSe)W6-1PJU{4NJ6Xz0YDn`Z6A3?Q>JDl%Fl!)l`f z(M%@HG%4FdBSGRJ;D&ogN7ge|=~sN`+Z^b-8V()Z<%xlx3!_2T)|FLi%rPUf2*g3& z=%{%nKCZs0dGd^>JHQ>j!M`u~f+o@;1mKm&;RR7n!Tlet@Yf8JZMq=iKu;sL7f zIZo&Edb8T;(=3AdoJNX24=r7QvD|)o1@Jth)(-S%$z7W3>@Tt8B7TzepRM@~q%42U z_hD_QNz_G9bX9c4JW^(ZqFmqG1S%3_NMOZeRmm4kz%lwb&!cK!%#6mm^ZG!WYRnoP zX4gx|NCfEkpmP|j7Dx(oK?4k!1hw%!8$?#LkF7AMfTaW5%JMV&cswWo+HD-=XYG9; zTBUWq-TklUgN&IgwrxV7O7Og;BGeokWowTfZbLtp+#I*+&YA`LKnzlHwu?&m0oZkV zj&39Y9R~R7kgTdhn?$#6(q*fVOl$tbB3HDTol-#~Km)AT(a7j%#!UaQ0Bo2wktV{w za|>+&HULoRi%iib04P zwxb+2HpKX9<@VxsM&LCBR(RFTc{8u+Kk&~kklg3fS2WX!-Wc0eT<|h{Tg%$yL*o zIP2CNNy{nQ<^)?E0%DVW<3bSc2%qQ4C>SC{oR}JF1`fF z%Ca{vMrNDoP6)Uo8tY**9VdGNVh?Wh3t+L$53s92sCOkg*C#FD3_uF1;-^XOQ-S2} zSnz5UfK~`9b+ZnjYwEPH0=Y(lxn7Wg9ceR&TI+R|ThKT&+nB3jCmRU#Nt^={dp*J7 zP1%l)`StEY0h@QTuQ+5nI^4V7lX@?^pu;^NMvy|PyfK#O7Sa&p=|0kTA3y7GP^Zmv zAZW{K>7H$wrtGxc8q~Y^lzVx1BCln7Z45iLS0!Z{O81~TrP`F&OBIw3lLXoKFfTs0 z-x55e4aeU9&b9*_8{#U!k$5pFsCU&Lu?*O|YcxE{Bau$RR@BycM?1MN=&Wh%0@b~s zpe-isAm8;twIZcNF+ey2D8Va-G=LSUpBn(&-Zk1dtl+dIoNm7oZe6GU$2xS z79<+~WHGWCEM|&A9fSxi$W-{tz$h)L8MT`qf|x4){3-?Qf~QgcRc1q^$Fr8y^a$;S4_cAje8Zc*j3Cg*d}0|8$~PEu z2N-en@n+!B?w5+0cwp1R3K?koR6H}C+$RO&5~8BJD932{4I0OaL7P zYBhi?jCJ@*2{-$j|0vKa0iIeKh9)X7i zJkDWzZFOT=!4+*@!Ynp=80Nn{8bmDqPA=Vjz2Q2(+qHRK0J}AGKw>+DCbs>U!LIa8 z{9>+g6G^qI$#Aop%zFe_*~nfZ3<{tR9jksfome%Sk&c>A61Ux7brNEJH$7u>t$)i| zLhVK)7pPbpRGuqR(TGyr#Qc3*Qe?sFJHQN;a*V7 zeFs*r$Xd)&Zo|^JllOoY*|E=$-9^LYV3{Z1^St5{0$%O;MElffbhmFzLJ3}p<=$Tq zR9#VEPAjr{fGffwDLpNQ9UpR)Dh7Bz@>*POMW9N6Ey`Vd1<21GW#8XILq!) zmR+F1x>_epe|~ShFkE=ni=w6!Zvus+?1Eof4}tVj(0WngGgWNmVWI&rWL#Y>|BkP2 z&Z&tdjY@s!MTOG~I6;`oiaR|vp2Q9Z_1|Puyxng)Hvq|ZHmAkb6W|Cs%E|-7#q3ts zBGMdHM1nU5;SEP`!-KHaiEHdcdP89%jPCCERZO{gGX2;nnm(Q_VzOFQ)h;6t@6~{c zGW@m&a#U+!Z(sS;i|#WQ)E<*B{g1(89wd6(4hlkR`BeY0e?%|RH9Z8`%pMkv` zU-g_xbTG*coS*2i)#dxJ48xZfDYdFXXI}6HKTP0|D!2^t?C)3RfX-KVDt#i^WA2=@uj0{3jGRB^t z_;6Nw&RO)Kt)721bpah*{D+M8VJW7PGRgIuJyNb$ERNMa#K)xEE4{bABup*7#(Z4( zO;pVF{~F(N(j;gk=bv4sZfI`3zQnCESrBAS>7a#W4O}E=1e&JTM(M5~BUxQf4-W)G zt>@;M_1vlp04@W~O9;&khtgLnUFs3~h?K%{*?l)M7NZ`j%|MG@hAS?weJC2yc8}-E zo|<{f)4%~#-hWPXM0VZy_V<)KFYMoada>)=B45=x94{P*WOJK(H7|aEpEn|%Q)K2~ zU$1B{>MwW)`d$fb|6)C}RrC9>{&H5K`f}%BG;uUFBeQayS3>!N}#04hU+N6*b^to5n@|B zG;a67yAIy(Y?=xB8!mFq{tZbSa?_zEMpu&KbZxvY0j;oIKyi*H1l1F>sE?9A zc^w=Q&BE7k?}3J%0Xn;V7a_G4w$>81(`nNVFfbSGlEX6V6#bso>za(p+RK5NABtKh z)T5W8@!acvKBm;j1L;fI{Xgy_zu0p1uO-_lio%qU%g?jJ#A}3z zOM%9Hg6O5LF^Hj^;ny?Udg8aiu(%c&VS`~449n5HssTea+urB!D_1(=+HJMlps?DHv;ACL$fF@{z|E7JcfV^xk#@dAG^!rwxcAT?q`%ZLaPSUIV=mWXJjCj0-5ZYIa~m((y)0h&99OOU8a7qyktm)TG-^X5qxty32}kz=IAur8GT6% z{*AY>KghXH4gxq({y0r$5oD`FFKurhDV2xON2ZT5aE{Nv z9j7S_ewXM@@6$&d8SuPfZRun}jKOn9`%x=Pon4;*eV!v) z;}1HfA5K)RuKY3oPj8+4{@~2Zndu@NcI*Cg;ycz8gVRBY#CuhP){H=VbC*+q(r>`e z>%h2js+QFsr$0p*3wOU$TMxx2`BdE#lvN(?Y)_VOlhd|1w`RGqYpL5jnY_0`tbVuQjMut^{(9}qAnwNW0;3H8#p9m9CID<{-b#mnSZh2ph6xkv^$83{Z&q{Dhk_y%pNhA_)NuV-rIQ)8|YWVAO1v zSe4;IZ;oy^lQr*U5ht*MHF*j1zQIT|s|=j!#a3Z&YB}Xj2>Zx-0I*UnBmb>uVgX?Gl$&f_YxF?4-dO3epp@l^R98r@vdHaZ_RCQN+WZ_LRNxUE4{pPUdZm4i93)ym`y2ExM2$v~N&fz^Nvrak~G zGszkBZ|8EBOvXUVEf-m)CYhLFIf-D%^>i77DHGFWTFZItw_h@t&|ilL%Uo>9V}LS- zW&GEKzg{5I6J;`!Y}JxyIX(&SWF*B6aUECFVac@<+KK3**@{I3M00H0w6SoJC0MHe z84>cF_;7XA6}gX%HR>8Kr{H@D3@DS68I#02`p5QEqa#n*%yQ(KFSPGRRpe6TFOHE{ zwwSx}spJ{!2r-7PrG?YOaovX}d?9>v?ebVx5iq(m^NOkNZbD8%q+|HZ3|b$6NAlUd zuc|Q-&Z>_iTFj_4opqkXl?Mb>*z;{!y;eBfSzCavZ2MNdYCd(T$9)rdi~MmH`_9h* zZ+jr-ULL7|_7zpR1&G0(fEl25pdZ=elmwVh4n@Z@y&=rRTG(ETFqRnfbAJOepi*7U zOF-IJ^^ZK6+C;+ju`Zjck>_+QYyctd&I@9#Y?hRaA0zNoC}@xq=Di|mip00QI5@wy zhG;iSCVz`YzTRpxgioXkO%@fHrn}of8#b95JePqrVj&+_QucdO7NruLN?$67#zTgr z!E&v&JgnqnNizV5Lvh!GZ%Ll%T#^+U$+9GekV1Oa9r0aZ#K)*WPhE58o9Z{?y%!Ju zo#1-C(=)m|nl_D@>1{S}HAIodBA>NG%-z$-h%@{PJ7Hi1Zgr9+H{q#m1y;gPErZoW z$96k=4}#rB6Vqm84jZ_|($}+CuW!p_(UL6Lp$v)$^VgaaeMnK_;tAolTDA-Xh>szB zK7XDUHe(Cw2Kd(7z!2b@9|}qS*Y&|zpOnSncPNMMbcE*(+Twu@FZp}#5D*GLVV=;Y zMb$F?^%}#{>@fJvaLTdhy3`5M22Pd*t($ZH7{crCe-2$-j{Uc5?B9N~Vw{5&pMa&h z?>Y~?P_uLa%UUB;d87Mx(uF;%p%=$9SGx^2=jIH!i2vlA9J4mxXz1&`>boXvIV7qt zzbajPqx*D5dfaz8r-5bo=p&0`R9Sgn4c%%UydrHL+3Lo8XdKjJn9pH>bK2Qc)4AQr zDpZuXxUbxE*yg28`ICRQ<}{*a^xsi~{5!a4W&sNn<3v;}DrlR|I$z;*-Ibr> zh_Z)f!6Q|khc!Zno+OFFC^fbNI9@GOkqdo*v{QZ19q8C}YLcGxKu$*F@>|)ld=>#M ztEnvA%981u>2SO$7*g16{H8w=^IQDF$gbnY z?@MDAe;=>iIPk{PA6+&EkX3<{q9u*7XW_-ND_KZNdf z3Yn8a1hY&HU_u;c{4*0221Dg-6z=P)b89*2fHHf@Cu_XD(<6_!s861Zp?WqSEK~k~ zzO#svV~r4wG)!vpzpf&aqmo=J9Zh1@@-fRn#B-6YPNs$%i)AoQ#o?%PxxT;^J>RQ$ zAp(f=rdaUCEdNzCZGL;^ho(pH5zt*{(#+0!IU$SLWsGUqZrCi@>^3)}<_UI7mV$<4 zKO#5#)3FTG28_N|gq1&*8m^Nk5NjL3eIH$8?+BGLmJ8s9Ei4FB{^}E59?#7cXn=DG zq8Yv`9J~?OUPk>Svt8^eTDp&WY1UeCIjB#m7g!Z5TPVo+9p=bW% z+ChC3g2kgmo>40L5p}jj26mu;^>O={!RmYF@sPGiS&?C=^5@#V3-cs1t|AmU2;^SG>l43DPH7iYB=bD$zL9II!6Acj|)mRu5{pCr%w@G?F`&0kG6-s8_ zyTJ;!S;aavb6%RI(>xxGD>ttDrYhyUiq}Tyu-H_%Xg-^cxp2_%ch|0A(pdQnVX;tp zRL@NWL&?D47+WH<-xKxol8k17QXgL*;LKAP*=NX-inUl~X+TUtMS>z@v34kVB;sG! zN@!@PBDlg(1O@Qld#5KLYS4PvRbWuZ5*xZp@;JLN;q{e4anNlMq$}@(A{RO>0ZoQVv*)K7{ISMLD;8q$m8N!S z*XgFcGsoB+je@(k)o_kn%dE`R!HQ~ij>0>H9Z&U~<|0hUAz^S_>O>&sf-|>+6<K|%+q52-B3qjE3+ zc3AWwqS;y9S!08`j)fsikgagc6QYCFfS;rAk)ieU57Jm&} zjL&69J3VvJC+2^>9B7g#+rNx~}C zJk)84?dg*0?DIFnJT_5m9-!I@_`W_$f)J5;s-D^2WLAR`{TuV~rc7Q?Ept9SV_tO` zRz2)>&8{ymU+(F`2BE7`TjuNIC%`u-(W<|3y1kyVbZ`Ci6f6zZI0Zq*g>KvaJM%%& z75IOY))%g?Kn%>?hpOf3-Pv)eR7>Hp7?<-!2>s-fBviy&g(;)05x*F9)x_SfoqH>I z)TaoFcFaD7!_0sif5C==s(qz}V06H0kquWRI75SBdX^drSwxqhlcrYi-sF_ElbPew z{RDD1`S{67@f+vLsx11n%zUIawbF*M{W0xkm(#|52fLMAr0r$`257*K>XfDebjwBF zsJs%NT+}B9QBcr@4YZt;Ukuf$nK5}L5Mc^lKJ>Ml6(e7@7OwGz5^vaY4&H{vexc&c zU#CCqRmD}i9O<@h$>Y(5S9GLy&*&Va#|mff?$(2s|3&9f$eAa>qk2VNrd3bRvbrBx zKuKe_Z%7v%ftEbc25>(@b>NG@N=2}26aJ4O%4C{- zJN;XCT=v<{HtByn{Z09Uu`lX6S!T0YXEs9|^{G<*|Ay%}nf(5BwC$`bE89_*vYeQq z_E>?2`?f{4_w~sIccQiml}G-fokPw;>z>kJwYjHKkYb~LdZz@eoJ}p*%ExilGu=MY zSXMqRQNOEPqh_8QmYcSMf5dKJk_8O#cq{SJq~VzZh`W<2fR36|&-+uR^;i6a9$EAn zFDUTy;hg-@a0UO0TgYV%LVa+n;Zn0;^>bUAfcF*;zS1d&1W+yD-~2?&vg6b3^p z5J@1I2hG&-Tow<3uC6=zV5)xx)$4PAbzXkvf5#xADTcFL{z%s-*hn-7C>39|f zf0wRhfcpkNkWTMzp0f+sHemyR%6fzL*tVah*7^AIg20MJMQFRow~9?f2hz}!7Lr7J zLT!j~GO&s}+;INzFsFTMR&%3hDTul_sjI!(?*?B%J8m&c{-S-{8-;-VPGp8I{ z+F0JB-+Co~+5PB0e(~hZR`$bx`N@~P|NQ0SU5gGGW6>eMv^dQieqnK%eX;QLTc_C# zN9piCzyI>L|Iko0K;4}fYD&lX9fhy#Syf{trsTnL>kt5 z_BHfzxN9_LwD{DoI6K~7hT6674wnJu_#|y{->cmau0}v#e1$Zq6_jER?+BUI!i<<8 zF;h$-tHSUVjg)7>buPSn%f``D8B|doWpzVLfv)`q-<+ex^=MzsF_E^AsX`Mg z1*}Ev%DAukd-K8seH)=%R3x(PACO zBVm>RvH`BU)cS0O74R=2SqETid}-WwRgWu97STh zdJ=#8>vyYBRP>{U&gy^r`=-?wy;tz;Hh#37fwqXC{mI9&m;D*t8IEsn+a#SXU2WX# z?!GP%mAh7Xy)KD?W3w2dj8?aqW)Blw* zZzT=M$b7(O4jC~>3`E0J_Vj+VbuYlYhXmq?+Hf1}W=^p!h#ajMax#ImZuek|Xb!Ww ziiQdD&gg9Vws*BZK!PC)l7DCq^Wjmi{PMN^@GSm`0r7}F__0kPj176f_dPEzAHv`c z0AW4zS{sx4U|3jY#EM8*Q9$nDy6)-7j7NZS)HLSR#qSP=od0xGNa=^g{l^MNQXVKP z53h`!+P)~NVSb$PHgboKC6l?friQkNyGaaiMZ8T^Gar!)cueQwOx__Bqb#nBJmnG9 zT&{JMhI9$b{3YD&mB8I*oiMa=LFVx0}AkQtg&>hYZ|<(!q}{!PnLT_3rA8UFRa z_%v$!sgF;J^;>2`<|^MkkQ9CrbM;SC{+Dq&+P`$xnt%cq2HKce(Xk3>gxO z_~C?%D$L%*!=v5U%|c|IelGk3w}KK^;==aQKR>$r#^#7n$-fT&fSEK^)saLNVFMTT zfjl;(F~j*i))WpR=)tc#Z;6kjQm$^LUk|)oal^&l`9lY^!$v@W39;7<@=*~6Mj*l= zTIb4k)ktL6FD_ROx~E!kznYP{Yqax0!Yr2(X|%w`Z^Ng%*y6aNTug4wF#rf5q`E>Y z3JSei#(iRrA*pqB;->@q8uRExt6G0&5XSTt=R0>T1JH%QE& z6Dke~5`;{W5Q&n?7ZuuzLHhn6P8cTJqi=!!xyNBxW>~!D#|*2R?K2 zV8;Tjy+_`;df@XBU5A!l-v&a^2Ch#&-5yTx$O6Z%;_MsNe0n2krt%7#P&O{DLTe+N zfP)J&^KsHc+sGLP^}#fyzfP}F-q>E?m(vUUZD5ZXU6sKzR*QrsG>C(Rx#r*X0-6UJ z>Vru07%I&2r)?dEt(Vh42ZZ>RO}!|n5S0tKCI}wz6C!E#JbQY(XWj1rla|iJ{!bwI zZZc$46$72IxbW{*(nOeuPV4qB#qu!?(_g`XNLv%Yv>J>P+S4ud>&Xp3<+I4s&31j( z`Bv(IW;VVxh|!I~dsN8<;q{R^_L(kJ3z4j31@trh@4 zfk@xx2vWG#dYy2)*}^|2lFO+VvQVJa1J}R3i3VlSHJDvKTn8)xLASQqcT8AM14sul zJdi?qkJpx0+HY%LhR85Tw9IY@w~-K9Uje1WT~KBW!E- z)Z(%A_SgWk4#CIk_~_hbvXm(W=)eqq#Yr}Su!0W)FNIYDzS|~`Ux@JuFWMq**jLuV z1c69$KoHEwZrTHLUY=DE=|C@b#p=;Mp#F}p6LO;eY*_|$@#C-n8)d@H=p7(L8Q_!r zzl-?txHPY{ZCt~PCW?ws5f##fibPXmlZi2EB5F08Mix<`&?coKZWT=93K66jn@PW# zwrSG=O!5*CTv;j_jawXJ8cjqfsHmBCtcqLWGEw8lyfgE@&+q*4oclTF{PUc1?sH$~ zzAj`14DAyc-6H94=?QIg{;F%=s}R@owv@-=_J-}Qr@af^%lpCmcQI4Zc7WiW5Z6FV zYj66seS!-Amq#7;X*#FI4*>}PK;-VV{R05NL;quL89}~>&UA0(OoEOs2|>QF<-Fnj z1-`wFdFXmLXei}4;FKNM!D5R!#vdrO&WPp2mE}WEL(WyV9|Bb35~KEa@t3Zp!!mcj z4ar+x)F)nYw4TqGgDV>@bJKqQ!^bcr;f2F_LNmsGp@qm?PCEYJBL0W z2|g7O1&18NHVs@2ieLhbR*8E_)D?S+HE8^ViL;Q$YHauZ$Fa1=s5w+PZsSS2W59I! zc8q-+IMWZsZ(a!?_iPUC^gmgt>kaT+oDEwt40U-jE$7=`!o%lR2B%{~hhBudvFyhp zpKQ5pgao*arav9cz24uxH~oACb{H>+byzrgWY2uxg6H_d$WLC`vNkC6{OOdl87cQ#jIGb zG;VT04vPUXdn|6g6#wcL_R|^R?|nPH7u@&9x+ewXoRJErUV=J?p^p0_gosrn)ld=O zA6mX1S>1uln6)^27Hwx8W(p=-ceZnkwiw|R@j>{rC(EI&0$;MVW5tKltMokk%XWLV zD8#0n0N{v;X)NavQ&CVNe@4ZN)09;v+Af}V*ES56(w*{y+0jHueksIx*yK~*eNH(8x2j?4ZDil^-rzH2Oy*0a>kTizyEKxb-jQK8H-IeKv5xGnYy0TgNC zzpK=;Z0z5(A33yXrEpd}+|iEWyU;n$slMT20i(`nJUR>LE9}G zb7aIR&zi&5W-AoFSHp)2o;R&74E-VDKFwvQfSF4Q3%4!=iKrk8=zi+H&VpQMo_2Jxl#f?_YQ>iw{i6LMHJUL_ zmZxlLOSuh%x=|-63z&VrjdM%@x6W3^RXZP%qxS?HYDd}C?bpFHrmmA+>H$DK zjxxOj=zJWU;2R9sirSrzuc+=C5TFjc2zknk*myl=MnC8}0$oKvfFEsl)rR}VIRAVc zPMNRRZv@i5y53PBMk~07KHZi~ix91;&xCBnP|bs9u(rL=JUr$(A9&ePmHI;w>-Uq#|-MS-lo~ptJLZ@W<{7E#) zd;u#9FwHri2^HBNm6LFv_}P z)(Fmb`tE3v@}NSO(fI|FU`^}R$By#|gBzqe6E-xmN_D^lt0`{-Mx3L#3StMF<+6EG zmXkCK%NAskosXdfDDnE?xMYcwy5dyLgel==dCMq!3wF$58~FU=#y4GVam$--$F^PF zbXSt6h&n z`l=m2O3!=SeS3Y@n%|`Z{Q`UicoDDs}_fTRx0 z<*w6Ps&Prv#OctXiO<2XBO4@o{1vCAXn<`$2?0Er+(Er?r;T}WHpTRgA0^6ay8Kmi zdT2312^W7h3+VtxN`p;70O9J=T6zB25RkGH==t1j!Si6qp2fWN{ac*9rK;)dJzHBe zfL!%Xzr?y+e7yI`|xPv<-DgC~1=f+E3(k@EwyKuLaJC z#nW2=+a~rHtF}y21!(d4V#eiDAhnOUwFu03$7FVDfC{pSaX-N5ip+%TuLJ*<@V;0O zINLz^IoC1cie|5GT3JgzrHq$Qb@kO_s9iqjYFv3Fw=u?z?&|0|e9rp72k6lAW`++7 zisr3wj*F$4ZTjQrDYC=x2gpj@9mS4FsGffyRd$>kjaO)3IYv^Jom?sd0$WPzE}$z1 zEEGo^Cf2)S13c03E(Dr)HDfbi*K&KBBtnYzHQIT?e1L(oL_xky&ZaMCq31!snE%}0 zP-7GE97yA#k0U(-!nK)^Zv~ECO&OZo$$R=&*o(;GCY`5y2(Gnz>BF5V{|5OACfIKh z>Q>a7))HDzJkoFK*-bADO0ZKEL;b1c^^H}zFAnTjcN-Sr?$y;Q$a2|=(zu&RUdd$TebM? zkG>e!@@X1$MyX!|eqeV)w>Tl?ot~R~rcbH5g(nHr|@#7m$*kMR4OnTt7 z8f{p$LI3ZhvnyN2!@_L22E(klc_b@tni`W=vx&#~d^pkwwiK&XoKug$wtL%dYxWKH z&?3MN>|P!zvi5YIvHdS#kt$wLc}s0`|L5CVXw&L12js>MU&%e&qV=R+2u}hpHcfvF z=nAnroL^Gi{!mUT2%d4Gtv?fdkF9GMsmZg6f7r5+6+NB5*ez6`!n1^pCUUYhXaQ!07dRG;^(4uttNyvNKp$>cP`&r-|ao z#>%g@5KtS=STW0sAS!={Gd?ZGH38k1z8u(;X(m{z(u+%*L{?GZ$&8}YNeq^HdpC4D zbGd3j6ZxQh1+cf}xktkbBV?hg|4-W?e~ud1b=fo$20Z|)-mkB(b&z-azfCj!BTvC9 zp5{$Kt9)sI(2(PR$Q=owqGF#Kpfv(^AA0S!w9M|h+_^pvz_zqvAwAFYbd}(H%`cQe zAhQ&6Ku#|g%st$?&2JFWVS$a}Pn9qv3cS4M+Ws`|(&mMrzklEv^%gQy@NoI+8XmH_ zKCaUnv#g^#OUa{Sh)HJo#l*4GBuW~-R!YE8@#w7DOLMiiNm&&3?dXeEP8NcrFJ=Bn z?7TpUH%uhM*%otcSStOCFH4txfS6)(KB9bC$c{U4Lr(sFhWwL3O{;ycHUawCceSib zBdkv^-aL8X?%2hP)A*bCnk#D3UCqVy)QfBokwE$($&mU!;roo!$v6V9_S5J1ISK(v zOiiU^5$~X{6q(*?pvJs^^Si91`!wyRzu%&g|NGuu*4Iw7tZy?VV)pmaoV%hda*e9A zfdnQnFWjZQm2&^Ytw62#YtSw6-4l@YFTbaOn0%jG-_m%2Z|kopfmLmvKtQ#0>+Q_5 zLSm7WQX4}oONM3;-Zyj0Prdm*p_6r^GCtd$SD14y9QYS=tjA@`5aP$h-lZsysa=cR z?q<+`#2<@IMF6cm#e5;Kkfc!nXy&+x|*JWLz-rcbD{HNX1Pa}r6i-3qiO&@Rz92U~U z44VG%Y56Z-!^&C3Pscw9_iFCi{a7(%u;A-=egEN_-_zxrIjTwM4jbb8bZ#*xgMdz} z)-S5!SUWc4`K80Nh2FgVJCPe(`P+v+4{_L5ga&T+xjN*W4G!IYIo=fx1OQET+fMM1 zeN%lp)UxK9va18uO3IGqc^@5us;%*BU5#SptA?4Hisd~qfUQk3(jh1ypyo(mNB+a3 z)j7deGB1MabQO-lRC~Z|1}xrrHb8vE>wqM>jzsb4`+mTC5k9Nr@?1u`(P*{s_J{Z$ zRa4viE$?Ih;7jGISa_^YykSQ>^Np3 z-BBwt+B^>Lc}a$(SG*o}aUvYyZIXugHrYaQVQ(Dlmx9oD9{e1s=9 z5G+Ijq*S3M<{D1Z9zgu_oxrgtJ-2_^4tZF9{AZLo_7G|qT0&ecPuI8<-P}CVk*My9 zl#gr+!=ynBZF6&79mhSlfgrCOsl%I-!*WIx^t_b-(L6wbAb@+wA)$D2!qp+9ICg;3 z5d1jS10It{tr*Es6;a&W=3Yh~7Gxc;YsXOB?YWhbHo2-T0L=-kGuDWc%mnC>x}%2^ zYm`2%l`a&OWE`zJw*%Rji-}#ZG3hX6;G(T~#xWU#r${W3dywE%$Tzh1L6H6wgD7oI13b~nqXLfWWRk3D8StG{*$O-7kl1POJ* zIwYJ_dM);eQ@*}38nvaQN^y`>pQMi<-pJqIr~RZHoA*PZCljNeU^sp8zSV=W)eg3c){) z-M}fWI3lLJKT&Oyh*9p9@a!v-Xt3%BL7AYyhb($G_iy%m)7DD|ITLmRJ~jeb-5-i! zd>fJ5BEg8+Io;ki$5Z#x{N&MIVDRzbxQ_UTPeUA6Ukn93{R94>F%JQEtJVgw$ENQz z2RR%@lvkx`^i@S-`iRwJa9M9D=#!-!40An_U5O>;r%WSgiI8<3CtiikNc#d&?$^~9 z8PAFyIe(65X;kd2FO-GTDL6yjPN|}a%KmMHv4@yyq>z|g$A?VVm3OUZh;U>|+Zr{n zg+TawMbQ9uXGi6D$J%feI^L;%{f+-4l9v#q8r0R(bzYNlT295u_lAnKU%TDzzW9?GGx69?cu($&Uw(6<&IGz z#yEcZMzj=#NV^0Dc*le6E5&D=+8if`vlGN}BIFT8uUczFr;HR*|J+!JJApA$bADB^ z82J(`n?pvRNm8x&oFb7w5HFku3ukZeW+TSud%#pB7(GEPAuc{i9rtx#{y4krdk7bs zd>A*e{msTh+|Od{rfjm~dhYh|_SOkCml8qR^-aK%z3eJP^@&lL6)8M^x5CPRed%Q$ zE%vh}k1A9eTxPF+@SHD8W||Gktg_PC_F>y2?vT?iF3ov4|xBaWg~&0qhvR%{^_6{a(YZ5rcA z$7hB=SyNbFP3kYh%|wD8v4A!FZ}7cWjC5Y1yNCt+H6@crC7=M3W-;B?=^iD1*1B-DCuFSF{fKPOhssQMr6DZ+p z`G=T*_@z9@!=}V-an+aOIbeRT{rxoc*s;~h@QrfVnXcQd)GXfRwu>U*t4!+*(%~rv zWa+{Xg523o?mtW+eAObM9mK3CW}EB+16!+4NR&QesHB8xJlJ-$^dk?me9Us2DkZ*1 zX^*gSV3aErR*gRyC1;e6Q*_^@Bqe-@)@D%pU|awYSOR8lJYHZtg*2{bhtRs2&$16@=cPq zK^g8=hbQUP6O#aa=q^LUZt+vInci+xR8mS`D;5Q0!Qd}+p zB7mO98?A)PL`uHnN!<_F;Z5g}iJKfgd1ni{(=?Kj@8f31E;`X`n+*(xDj!K)`2>UL zUtiqlpGs?nl$q10Lt29Jew*F^H76N9zOsn8=v6y|x2ul(%;i+9o(Z~B)iwIXA~wO~ z+*(b)i~6%nz#P*#^5HigBT~SxGP9bWGr(|g1z}#I^_CQgI(( zq(Yzd)f#CPokOsxN+wh$SYuGFjOS9C>Jr~DM5^chc1dxCC)Ru+yJaoVO`&5xVb7Z= z_YFTXH;5d3fIPR^SgA(CXfBIwMYE^K6KRCoDY%QQ`*hMF{G7gd(t5_+LN+k)akbv> zd`Y1bWmfoivC{lrB5Mm$ec{(6EJr+7|B=M=eCexRE^kgujrClbtdE1?ZL&B;#;AN5 z2ZQaHFjLFSNt6_#CaFjT$H@sCg;9b@9CIf~LzMNzBwa_mJ1L7lDovCe=#|n}ok!(G zFSrMNCV7k`@wo4a=CAlw;``Y?|CntGcm9c&b@QQ2ki~23)=bMq%IQ%PThMVjj(U!l zPvLz=l!y~`B5qx=Y|+jgJG+#h@?x<~eor>9lo`dNR7UKaGH@>V9$6gLkg8yQVO5(i z#fwxBGtF+S%@>#oYt4BF*M)@Gx#6(esAWLOFvfm+!2K&Hm&hkx_HR7d$jhjynlu)> zYAKGyJ0S-hi)?^y=;L25y-;s8+IOj4_+XU6bMC`I3r_b&%J+UdCb#NxBD=4q8jqpD zQ}7=bWiTlpz;)?am#{czOJb}dOS$+Av-Un+_N$8arJbJT2NJ5s0xno1xbiF?gt|h) zk)DuC?h2o&*n#?2=dMV;A(3Zb6PWiKsQBizu?{}7Y3UFS;EXcjKi3F@vEGG2{L0(Sm7Mu};il~;;-uM|x zxc@cXz)tSS^-^n@lB&-Ehc92KA|qK}`R0o&%b1H^ac}kW>CBrWQo;)_JEN!k63k9S z1J9{C_|+05c{s4~%t$P}jVC4lcM_F?69!xFc?7j)CGir8?^ThiKAz549HB!AkitYr z5>ZvymYf||HRB?C2F?tOZY^TmwrQhrdcQ@vEKw5?m)1_RRG;zhiOFWsSzv+>AV3@v zfvP3p%T!VF2Bl=i1duAS1;n*kJP$#*YoI&di_r)Ng~)qtOtmhJWw`ytbjI6yQhl*V zCcmQLkfQ~~P#xpKI!sf?fH(f(;WEn#9dMhT=EizqTj5Wajfx=_`e9&2yUed7TL# zE8+Q+l4pTSyXUZelfNbJ-#ye+#lG$>N^6HDIh`-<8?FUp(#WcYzzD z1Vg8{3C$c+Al7-(n?I}iHZ)G%Iu?a5&L}vLPBzHvf$!wW+<^_X+7+=61f8~sysVr- zr?`kbnFV5vBI{9STfCxHlWWlHE~#3R)IJ`D3HlVze>$yrhl$vZL*czBw6OAwHJi*6 zM#pSVE9*?k^b^h#u!Ca{e)UjUlkUbm=3yw>es4>lU2K)NSac&pLt3jS45v!uR(K%S z7JpTrr&!a{X0@OIp-5;FBj1x#SvFLu4$9l3=Js1pU=${MCxVm=5wIa6Yw1^-G?mUU z&&)u`&Wz6(hpnk$RXkzbTQ?Ih`3pMYc!wU$N_)?QQcW7ZxGOttsv&%#44j$XFOVD% zra~d7T(vq%uK1!(}Z#R`A~k&fB|W1(0I-B);@M z3SOsFOpx5!*l~Fsc~zT&^!~L^EFQp!1#&_s;|h!|l`8J?kmf}7-3r3{ed@`y^LPxE z_pAP{@|2EY$TxRrr>5zn;E7khv;#pElJ*MOL2o?uIup$O`%Di>D<*J$Lrx<+NSQ zPNde6@@A~NW&xIzZrwVwT;;VotXqCFDUsb$+`n|=Cp#g0@poB+ z?AHGyy@h&9eCv1N9Pjs!vttC8_@7-$y7j-N^Zz!b{>Svwr4M`K|JmqYmej(xfD*Ij zeo@Z%`fsU!PC)AN-tn68Pi0$yl2Oh5)KoHwb>@5h-gEyfCsV@yUGO&e?mxBv`Ir(G nodWq+`Hy|?xqmkKpN{|PzW3bdzdQY(@&AVT-|_zQ0s#DfmK}s- literal 0 HcmV?d00001 diff --git a/public/testdata/exampleFASTA.fasta.rsa b/public/testdata/exampleFASTA.fasta.rsa new file mode 100644 index 0000000000000000000000000000000000000000..6e7e213df66b0cb88049c39e405c6b57db0c0d49 GIT binary patch literal 12528 zcmW;Scl?g^_XqHEWqo9aviII(@65`|P9%Ge>{(_~Dl}xZWEMgqX(%aLNJ){5>WhkG zHsts0_s9LX-S>T6@Ao;c^Lo9`d0!t_&qUlhCgS8LUb&NaaaIzABL4sV>5~y97e>6C z_w(1k=u1|d);3?VsQQ>Y8FePRjwpPJsZ(x zU_?=S-?1d(RFx#&O`F89Q}!yFL}~Ab@r#>IT@}46ROY=ocEt169 zQ4uX4iKuMNLv7q=&pa8EsJYC~bOyt_FP!e695rg$_?-^0Pd=gE>KK&he z6^C!!7`RRz>a=cQ)8m=t>w-lI8EuKWt7D?>AGa}ZD+2>h}BA$_3Lv@@e zmPKk>=!b~o;;aI{<1hJrW<<4y>I9qB_a`y5VnnM0_Hj?s&Jin0M@*5+x1A!!&5hV1 zhhOmRbdJQvNo*>b#9iM&?9zrqov7DVj3FXE2}B2HwC zc%^a@%hmMP#yCEk=6rv1=OkvgPvW(vNi1xg#9zPT3$Bl`-sbrcd&<(J+<(vXA8zVg zFXF3P;FT?j6FFhDF5+)>d!uB;nq?6ihDSX11x*%CqN4hKvdtLPH4X4dJv#1;NH3qo_;wyfcZlT@ojpZ2FW^t%bP=Dt7IAk^+^Z(X z*Kx!ebzhD6dP+nm7&Kk(n*QqL^Wd5h2O1?|>p1!=9r2d(4<*s-hlmy(+>1DBtA$CMepqX^`9R-u( zd0=^)>t2fZxK+gAbF}*a-fyGDsu4B(mNGSo&Y$7+`iRDZBkFXCIJY=rt2|#-hX$h} zCXbEiP7{ahb+|`F+JO;01}Aat0IvTJHn=?i-tE4Ps1N5Uz4)V?YvR>6;wjt*roEH+ z`y#wbM%=e4BI9!rJ6p=NV-kyDU9KlD{YajEZ-d+Y)NXfc-`9no?;Dn)xiJwBXR`Ls z_~rX_GkIe6h_RECs05SpHz$#oUb@NqtEu)Lmc+C_cpt3t;y{h1bS|$S;ZTHLC)sxg z4?T35KC(qj6YKXh^Ls7z?-p@P?HbGf#gS?XzvH-)VO$c~aPDq6?Q`8zFq~c<*W`22 zJyoypZJwPycf?)e>1aHD!KTw)JWr0_;`-IK{Htpc>l-Ff#WlNIC9z{6&Q40=gEH#Y z5Qh&%Jl8IXJQHcIKksng`y-S1y`UKDB+(UiW32rXj4HIJ-PB2}et@TTN+NBAB+{wV zg#G-oNyHPS)#dAm?MBKX|;yO)bgHbCSe_tJ@j-}215og!ASH8LZyQ5rZ!++L;5hMHX zyc;5xT#h*NPs9T`;fX6pv$%ec8sXtzbzo@yOWEaLGKs@2)zR-QAC4#_o^^DP-fx@S zHv;dD<7m^0a>v8f_W2nnGM0Z?IJRC+m*m#JaKwCdS*Ny>HsHeTNjwP8TgAWF z_wS4A_A6$h7wLPtd7v@xAI;yy^XRQSTusiv{#JWdmS>mJ^14_5eaehe0uQSsQFb0a zjrZwE4<@FZ=!JEP55qoWh5rDLCAg2JD^V0UXA)Tr{x3y+87ZboASK0l&*A51KqE zhW(Sx9zBOw3Vm5o*f1*7~d(Y(iuh*M7H}i*H>LkWlemel`8Gdi^f!TJ28t=s6 zvPsl`A75c~7_ag+QoqNIq^cRKHQ+1aL25+%-`I6E1qE7RR$`pshN?kUG} z>+u4-%rplV|G z`ER-S=r&yA8z&1Uv3)5`EW>v-KbkIyE#K-x@V^GrXa0#OxL<#M$NKhsw|){Iit`9B z-XX@C&2iap6U#a)sP!@0UDX?h#w5`}JdN$24rcQcv5Tv{bqjyw1KH#EjavqEiJEJ%kdYT;UX9tp2iJgxG1Ug7huv1S!} z*P)$Ed*v#=S$Ma1rt=pco!yt;e1^0CGjrk4_pnTX&4f&5G3(c{S4Q_QQIpKQ;Z>W) z-!xy%(8J6J7vIvuV44XRDw)gfO0BLx@a^$hW@8w3 zoti{7zh9kVerSV(>FGty)+~w`I?0-H{&~6?bst_>qtaqpdQR=caf}x(m>|}d#fr~u zO7el)^jpQb2-cUa`_WcsC_QIDlO*c!r(tHRTzHkg0uJO*oBMI{M?G}{{SDW5aOFY2 z?Y4ej9L@9_ez!B9-eGp2g;q73iD7%Kn?CUu?zXYc1J3y3Xlnmo)oL(ad}ZxP1tM<9 zikoSjTkz>zN{App3MLr+!!4oCSvTBfW02gSb82k(HuKh3JNk09n zy_%`T2ibV#pK#ZEySis%QQSMrvte`8-knyOg~WGCy-(p;DmnaIS}uA)j$C>zeuh4D z4o6qNpoWK>Tg6lzMhk1{H?!yqKd#NtLpzy+>~*tsXQ}BnbzAxbPwk7huZe|8A1_0L zVtlwBjkvxV?|oYxGC$%~$9b==c~=d(HOE=lKX@zOc++eymLt4sr@iLLyT5v$n1$nW z)kD8~LL9&Pt;R&Xq?q&5t!6<`BJlvfP&bcg`|b!6V-ZK9Jd5p#Ed&u96zAU5#sey)myoRv2bH z^-A+3eh^#viQ=fj-zKPq-$PwKl1r(!`btmS!Q)l-eqY{ii+4QD?3Mra!Or65;rH8n zo*~WyF#cH#FVWX7IIQERlhmnsYhKnEXIsJ(U-J}8;-Wo^zrmO4!$w^X=jDrCY4dAb z;sH77ZZZ$*n~9IoW4mR%_d|QgJqK*Bbl?fw`GXoHVk+MTP8s!8`FHE(-V$bxqGt6< zzC*X&-+CS7!7bIJkKSUP9&qTM!Ts`TdDU5$CRU2+R32wp*EZ$})xV*u3OMb3S{B|_ zD~UpKIn9g4zepoBJwFgf6Q0w<{%vuxNLO>Vbx!V3vpdZ$U94rlV%A%P=Udy-PIvvQ zhqF;{9yA8$^n$)kXf1n0FPP8Ag_-v`hlu65?%oR}F`R$hS<(HYoE`C?mHOO}MbD?r zeN*j?TMIVxUwiFzZFTk73CmpD?2&_}3hDpA$ax!`;rj#c(GQ;eK`Sr4>0Er4N7Cvp zc+`D6qGCPIV}6XNt~M>zxZY^~Q`de?@p=&spTp;(`bz&KM);1JF*8_a%adwZLr!bxf12|N4(FPV zqw>przusEe+|-$-VNl!Jx2S2gTAsDo>m^<|BTo|JXnnkRp0(DpVfsRtBlGj9pTwOt zi9P0pzVtBze@gT2ZudBM%JY0~xy$>np!d0Qpq;|@ncNOfCnfP2>{I^Z%2>7StS|EO z3hMnT&1WfVW_3+Xx>|Lsv*!f)sCkKZcrNeVvduju?ZwOgml-bBUqqK__0)O^P2=2T ze)Dy4=Ox!}68k*4RaO7)=H@FOsH2=xwej9eUz;yKcxJ(u7gL!t%q4y2>3{ZUf1_uZ z>ilQ}XN7ykgIi@JHE5z(E|@QI=iD`OLsMt8oe=}+V00QD<~mRdTYW(fWd+59BM)NkjZ^NNF_*`BL)oOc=0kaX_H`X(wx5wZAV&ij_ zUN*jEyT=wTZzl>WoavCnJ- zgX6EmajWNW@5{-3?Zmtj{|6qzU;kb>h-W-$rl`o9M&J(~d@sh!;=a@f&S`i>7w7gd z^7t1Pc*+7IS*_;uzDEpsdYFlEeO^BOvzX7PIa3)3BAp( zYiX1Zeqx<{jn(c`_23=1`KUNPXd)jAA4O)eJ8yq@w zg0IV?M}Il)u*akD_{g&se0mxu(!A#xiT8$QU@{e#tk>7RWpL)I>z*pXhw$}yB^;!y z5wM@Rl+W;<);*J$bd?VH!`(qN*Q;r9)ogPy z9INrhVS3uiM33gHcXvtRW?ojg9AA^e05wW&?>?|tSO*4os-Ya#U&kr6nf8z8V^8t> z;=H_@zXMHEOZmqJYvMug@62TBao_hls=Al%rip8q_+RbwpP%cgI53R&e&qT+FjU5< zEBBo|z87D6())S)yfDo9DVygv@@Oyi8AstK=RB<|JUqiQSKgI_=WbE= z?CQReevjE_dI`@Ke1Dcl6vLmF)o49D3%RZ)d?xV6;>Yw#v%-cBo}<8ipBUTYSyAg; z7W*&O>VChO;9<{7Z!~w}S#SJUPa}(f@p=Yx&L_@Ad6T#gFZWjC!8o{mo@ZJ({3}0v zAC8TS*h3CG@$oC#U1{I7mE8p7A&=Z^+fim@68FPoj98x&=O(rLMQ%lh;;i3B zw=*l}_54CD#>*>hf4T6Lv|_J8r+2t_6dhGjuK_K1r~8_@cCEd#!F#Uj*Vy}HKQoM& ztEusyne~{@aYCIkoO2esj0ZT{r!zjD_nChGsBac3%uD5!69(7o>P2)nSO4q(oo6Fz z(X5df-u1J0n?c4T@rpPO)6e1jw5MLL)|7X-Bqp!J!O^(d+Os2j{UiRPcwev(Pkl~J zYIwd-93SO9*!@}F$LWI34*c$yGj&C1UDYduRyi%VW<(5j5H~AFV@4SAmbK~iVvOKMfy4{Mz`J|{bNG@|MeQ$vCgRSVSqxBx)-R>)ji;L~Qh$k%JueW7)54=}&;}aXz+Par=@fsfRIqucq6ThqVc7E3UQO}avm=9q8 z6J0Dx&resv9}n+P^EU8Hf3w+=*UlMfZvV)A{%ro8N?T`r$HTsy zsXq-dm(_Qs9%DjsPYOXJ;RSK_p<_>>`D!6?mt|Np^Ee#-eIy*X*UGjl=wwARp6&e`J1Y@b(X^}Mz25>H!u zX07Dff6RGwQ0}<-I;$Krxh``O&&fT1Z@pY@i}}|A_Yda@ozVq{8>Lc|F6? z&o|(1C3?I<&l6_hDE!lGu^0UJ_0$&@>K%U1F_8w{`}iQqYjMEvMCb!-iJQbuLllfs>%O;cb?WG9#qG@ z>%@1OUq5agJZpRRf6p>pH-T5DcxHzu^Uu5PfLU}c|F>U>OgKzWU#fNbBl-*PTlR^% zyr*a2%j*N3iS995^4+|T(B5Qxo#!6czOWPyui~5U=BLxM-uJu!Kf2ur*OTTR+^@k4 z%JGnmczIdur(ZHR(B#Pb^vn%Bxt7}Atq0&hKR%R+SL`m$9|!wfNIVPF@*ijRik_+a z{>Vmn;@F70%=G>F?RZ$F5&OF|r>;lfy|+1j@Uc1SIEMya=W{bhh!N+SnMq%?M>cgF zQrG$V7jv-~?=`2Keh>$bn{Qz;)LzB#Di>_B{L3qM;e%L@wN<-mFtO(SU&y7jxm2&& z;NRE3!j+F{;(L6^D#z<)CLTTU1v!cFA?r8gC-ocP3VikzuGeDjhz?X`bzkJt+G?jyMS5o?rtMf3hL99-Iw@1Hl9 zi0w5s-#^8yT-(|_@y=B8l*3Eb8`Tp%i_}_dXc33h=z1mB?otapn#n(#!)F*iXTsMSojlhoghTr6=VkP{oqRRD z`M4_F2lKzv^4F7}>`T-3xh&p+@;W=z%*dA)^RzBq^>==h(i|pLU@f*;I8rH#xpS=N z6aL+Xe#fijayab&(fO;F*}Xr{@%^&&e*XX#8Tck%ml^B)(8x2F4SI5M{oy9p95ai* zt1f)>VH&E1YvnWPUDxpRy3gubvklz&Ol|pG^!v)j{vHL_Ej+R%55BCv4-XLkEWP0& zeo@=Gps&53F{|QGkpgCVIM#VoE%ERAI9@%{%rcvowKBhbM`Jj(%z3F`QCQ>QS8zKZ zzV2!|B(E1-DwpI_cykqhC&T|np4S>rM)2=*^z<~%O>L)+t;~hu>Dgu_~{4+3$R9ACA_l?D+|< zEKr*R_wCK)jWd(_5F#PUiR*`XE$?Y3f*Lr z1Kymzm8Y61eiD2A8fy8Cej%P?qj+Om_^8R+d(C6kZ;Icac1ofo&gF+&jhoFt!#!hI z&NJWRtMcyrf_FJD5_^s-|7ilJ)UM=GzQ~tqK0=#$^nCw5vfp!q=}EN3&2nPes@6Hn z^Pd%Xn$`2tVsM2|Yx~}>e*LVq-usq%&PrvS;YP!SuU%@uKjB^^kMD7Q-E>^(Vcx9_ zTijnfkarx{bK9BK)%cTd?ePbW?9ubpIbSJfJ29v5px*l92c`HIjy-w8GYZ#shg$_0 zwzce&p}n1Hmcdy~k`AP{~U;cy{i5*>eVubkh)yEPd+la#`INc8qifqfrPs^pANm^t@6-SB z5tg+!d6soqZi9Hm+n!096Z$UFf2!*t;w<=pGtX%;Ea9`(Z@N?NUM1z<>Iu)6?ROYv zC1|eY7H9J>VIy20ka2+woWJZ=j8Gu9^IdeQ2-h;UG ze2ST4G@e)IrQ&Y3zzipjtvGO&Cl_pJJ)}y*!+tEL_x+8lBdq1VLGFL$MKec1eFu^R)xlt@pxX=767YE}iE>f7&0{KG^KMx&wbSn@q&K(LxW{j}d#CL?`tAttyXd;6 zlhs}BzxnRtUgk1X?z@~prkLe;$(YLKZ?%7PDZiqf zJ*~_QgUkiP@xyCfUbuFH(ewPXq4{LGXIpi}JP$t-^FJ&%HSo75y{AAKe^aT8m-OC1 zPS>~5{uXtnl}e&J_lyvW z_;ScACw`P#tB2wJo!MyI-mTT_0=zrGYE3G+rZcmt-LkjjTgO?dKDhJmzOU>3KIip%o!2?<>-JB5K8-lrF^Q+UC9(FAh%*Bs z(oBxXQY49wtvPmF5^Y9Cl=&p${G5nvqmpR!49wCb(Q$Rei+dvS_8g-&a{4UK(cRM1|!Yj*(i0-)l z6qZ}xjJThENBZ}eR&d5oQ~UqdI%4N&y4e}AQV@CY{`TdFeUC=`y(wZwuO!C)OY`pk zn8qf%H_x0TdfhqBMlgLyuiH5Fih2vDON4%%*HqE9ZpAW=Ut>=7hjT8LiaE~N% z^ocl~SN?K&bPo>a(p1qTHndHmlDL=Nmc*cC>K|8kPN$VJ5gFl~`2+6_lbGI8E)A0C z0Joe|aZFF8GAHqdwSR{FGWS+W$A`tU&Gmf?;{lda?D6U|bRw2k=frg};wO5&CXPRF z_gaUDM;;d^zGp=e@AHOc1tKQ7F2ia5wLT)XJP&77`!x9)Jb%swvz#;!mvtp*m#!vc zq6PeJ7V`yuke06Bg83(V`Nv@Qc31nEB1VhvN&C!ePFHIquEVEUM7->Ee|mMni@%2X z6gU=EE4T21CiqF~{*>jD7@srZp4<^%-PorMEV@Qy)k0NuW+%~pwA#xfHZ}QhC4Sk7 zR}|&zZ_90XL=M{B-hej^*WVi9|0;f5n`Z(}cwiGcnOWE0J?ZT88~rF^3yn>{%VC&) zVEr%fyl91(K8yH$L&VlU=z16L;dB40z4dgM-@QZB<`y+GeU-Z227lKLJRH%|I^E${ z)*6r6`?tjr{qZ~zC)Wz|&ldb%yeA4IvG6H@!ZwT}=;D zUwY#+{IH!@UpOwq{Y`YYBSR8z%}QdNyhg!ezkLS5pi_<{TKA!+C)F8GsCyrNnY@v@n_eYdg!DTDL5oz&26r$kwm{K`gT)2npRrq_y1FC zpU}iy9yF*tZx`ySPu`Yz4G#wZ{_ov zK3kxVS*DWSm4;90D^(_n0XG?Fyv<$kD5CGAj96v8?wukU^4ZNF@s1h1D2o!?MrYmi zLYzHxdlEad^5{R+i1%=1Dg?IT$bLvT8{UPOQJM98o|9^OZ@V{!IqS^E4;Jk#vQ@Vujhneq3Ccf{}y{NHxXp0RSEv2GV43RKr$lO$61 zQp;kT0k=`P`LH@W$@(lSu$STePhP>mbc^U zOBh}JpE#f8ac@K%dl+|TX}gYIyODQyhEYqt=kub^>*Wn-0S4JK($-UQ=Bt_M=@{Mg z{XU{*RdXk;^{>D)tg}nJgX~j%D{pNE3p^AXAZKef$K%z9=2XB4N^NZ zl9&YBO1^Ifi>qrRn&YhoEL#2t2dCzMJKz( z{p>Qmq=Xr$IB&fq-v^u}_~cu1S?~UNYBA&8<~!frn>LBFbaMz7KTV0qH;*@r;VGS* zZ+OhABKFmDpON!5wSSftcGck z;z?9UjmIUv=Tj$s;Q{i$l|R&^)w-^2E03Su-|-!FeN}z^q;}l1kVclhCs*sXzb+R0 zeZwQa#c7+BaDdUiRC*#_d+NoV=x}BIBt~2N)^%#fx+mzZFRibI)4`|JA`L#fT@MiF z%W7yyUO61%bFMj(K8c%aIE%SAr}g*4z8_BJc1)t!4!n5f7h8v~oRe0;&{`evT-6NK zqlnp!uZ-jOTdi}Lmafr3j=8*Zni>9Mbv(o0VwgiO&#IyKZ;wbn5$|gIv7u&1Shlox zB{3}R?QA3VKdSRhH9Xh)Pb}c?IBV#>f9d6E_@#c`Ojnbp!rWM1-4s=)DR|*RHQ}12 zhs{7VI)v9(6U&cyIZ(xSdco+d>LjN*>=x%dIKEoe9G)HSbo|O`^PhfMb#@ZD|K)+= zEv2u$Uov8V818vPoRiII_#JbE2f=6wET6$cSK3&#QQno!OSyT#o{0KO1lSvYrSUDV zj_G)n^*^%iYTh@wDvcDy6RjOPXkMe8y|8(9xBATH4D9`#TCj3WWA%OTn%d-L*RIgZ zU|!Rg59OlK=glFso*r&z?72IWe)cJiSEDtU)bEwVaxrYjUE%xaw1YlTh7ZFu(HqCr zgvTuX;6t-6uFvy=O?)zy{<^kf#2a<(WBnN!oOwGr>yA;|V$Nobm-+MCYIYobouQlS z&+-NJGX4j7HC3ijHu`T!$|leo436?RO{zPx^|#o}}$eu+MB(C;-bs&a6Y#7VOT-t^Lp> z{-TlVJZ5>7h#zX1KgV`top@yp)bXRuWb0b{e)SS&KkmS__QQG<39_|n6Vnr&=@`M zkRFK7%Ef6qm!4W7iAUjnU7mlIGT(^jVOZ7|`|6A4JGig@Q{7rG^F3x+n%hc)Yvo>^ zZ&kC#Usv&enjT)|t2kTF8;1Vnxr6w6|7f-qcb~5OX*^xhVB7M}5})b$jrD^AX5|Iy z$Ezbu?nv~b<@%GDhW^6`zc$yZ`yKv1ZoSr9_@4OF;&Je3db9VX7v!GVyja4XJX=oZDC4a9@#5U8OZc-f3?5oj2X7FE}?%f=_pAOrw=`a(o;X zd2{hJcwMp2<&_cT-oZJZKeEmsbv--_4DQy`T))Vkzu_~5_qS8t@Lpc))v}KMm;%@L zsXOl%d2&{KwW&$Z<>)S-mpZ9AR?WffAv~UkW#v|KQ=8Xj(SY}_Ti~i^5~Jj?yaC6kCQD5G3g&ujp^>6W$7cQ&y zy=wGfkLT`nkKEJRzm^(3{*v6Rks1C|UgJ~n+BVMIvtGR3%Ved2dCtab^*gh~YhKUb zB;_Q1{|EiRKGkQ?a1;JPEThlCmdCWi%ZQx3Zk;vtyIaI|YQLJH>8o$>+=k9RIPAhB zw!*9SO|V@|Lx01*joy{ozgw7H;d*eR`rgA6#J%k|aoT^L^*eZ9?fvx(>gs#Bw5E?v zdPyf|37@yZK_R_l4gK5(gWgNcMXnokhK|dK(|3QCH9I^81Me&R{bEng9^i0?egC#@ zYV+hw-|wU4+ur9NpV9@*jBn@Ow&odgOwMdKwBp%dC3E&^`4lu8jn(Je|Kf0%e-3My z{Je7xl8^V4-7$E}LyX`VHQjxHC{lho1`e$rsRTHzCS&&J_Y^=9_^&c6>F^89e5XKtJ5 zT0Pw=$A9aLjpoP2CV$Gz$$uAGZ)j)N?8kj^wQa55srAgW`ZsMBTBKgaz>X$| zi}yG8F8kfuFu9l3vrM2jF{Tj9sx~;kjqkmyXC2|2E9G+CjE~Rq^2>Mu|L)DUgEyFE zztd~m$m{S>by}A9!fhWPY;u^_!FZ%tXWZiNqp%d?_K$c_F<58!tc-sihGq9I?uFA_ z^In$Lo(p(Cn8F-c0@v>AWbIk}<>^Z>Rx3}d%M+t{C>>O2l*GP;G&{pQRm6Pg-g0%- zhkZYm$H>v1yXS@fhw!JrQXBMbJ$TOp{1tX-M%|c4oR@mRbenmZ-i}w+JK-?(_>HqW z50djvIX1Ir@psg=YnG-kpUg6Mo0e0 zi4pKBAQ$-!qu;W-%vi83(a&@4{PvjQ-}Ez}jJaFhY1GXi_uWB%gS){AACoeggY7@c znisIlDYKu$NqFdgl^m_La- z^5`c#I*->^YJGh=-no$`cRNq={l2wurQRz}G#jqA_g-^uKIbGk|4+^4Zx3VG9KRjU z^E_88>1+e@On7a`3)+0n6F%b|cgp#F^Beq64))AWoCk3C7QJ;=+ua6>$A10QY;)Mm zqpy?G_&<&JDlL!NW(}_s9rUpK;Qj^gUB*M``!b(sYt7I3@KM;kJ6TMyURng^z3^)P znOS*cTK&%V7n{-=zuExHAM81Olv$()f5Ah4dj9w^I(pJGP_-8h=Db!0p1vPdz?tbk6t&&v%xCn7ka%;8~y*J_est( zPk8QShH5RY?D$)V(=&Xn^gDQc)l37Q7ih3R9=i73&gsseu(&^mYb!Xv%}%?5^Q?0&bUy_d^^GgXKBbT^x&R*zbn>Y4{Hrrbkz`##iv6sRnP#qZJLPy>HHo zUmnBxZ`p3%AG8)OHNzG(d&%WtIJ}{MZR@92a8(A*r+MnHt@(N#GbVj} zT*|Z2r=2tS+;VfrCi=cjt#{!&o5tx;zI(cm8nb^#T4|P%#>KVk0euD5-{E_goR{Ni z-?#j0qI>vr!+-Tz_;kHXjqA;yPLhjj=lQ#BZaRhM>$n~X&)+Lr%f8L$^N#lV&^>fP zPjlYZ>u~S|4OP`M>ZsimufevSyw;k>^n}g!c>Qtx^(q~xkzr55omZTaYY+E~ISAjr z`urIfr4=teI>=+-Ky!XRwFs-~x684Ob5Ln}Pr32?GtJY z?1YoGIPK9`zsu{|ZLsaEj`ft6X|z{S@%U~8-A)x(wx4-q9Xd$mw+OmDn$^ra!#t~w zYUPmYBWBU(Z|E2&sd>Roe71o;&L>uOH5b$Ch_W!`)d%VLrw*Qh`ggxkbZ!1@aS6^J z>ct(c^NiV#9_PKpKVb9Y6~Ec*4?Szr<_EYrVLi2d$oJb;dPYD4sXubxP`?}0hut!m zPc_T(z7c=u%QXFBRc9Z%x)bJQ_}>5WicO5IT(pcBL!wbxAOU-$!tnnwTo1cP(cz(*{Ub-&z zoLSqfu{j-lVDWlN>z0Q@d*?8?EzM~^`hKXInSL0o2B}G0)S-jF=y>MWH0i!K{QF{0 zGe%nTGX4G1!0a=a)>4`CzOl!zdN3?frI(BSPgduZCEQCR`PIVmm1>ywpM+fj-1f)k zP#7HBhX=Uz8A%V`FAewGhxPs(>fRo_1Fsj~GmGJKgnHPzRecurT=Y+|4yOrgoT1yP zYH7E>vy0d=ZXI>G}Y# z>H8o|o-pq$po`*aHiO>}r|{W4c%p^Qcqq(YZutO)X<$Ozi&KiDom#lnj8%zO7jV9Y zb1wQ`!V6zg2df`&wp$H1T)jG*hn7s@TYRT$K}+^I5PFtZAG$bZ%{0^eHr7Y{>SA&~ z&uwUKtbb=WAIyIg7WNu^)O>J=Zx-{sy{Bh+JfM=g={PTm-;dA_tXt{>z1H%D#Wd2M z{=ANu9nwFmpIy)|Iwr9ZZ|jQkW4gFBL{FFde1GTsOPx&8uSc01j=O$?`GDR&=TqB@ zcm{-*eVJgQ551@^FL!Y+J-)S)KP)xTzCZCRb`*koz|IE!|#W*IDww7`+NF9o(Q8pux*`1ozP4zF;tU#o&I_qj9-B7eeR!p zQZJ&T|6re;S3i1<(+@@!NMnn8^DBSbf^WtUy1x`&+B49`d8T@mnmH7|-GUPG=lgW!~%D_K3Pl zX|~+Kr%O2J<}`Pzog#F+HM4#(UER{?di^8|T)rpv*8E{IEVAal@%y5B?XA|tHTQ^| z;Jig{rTqOJOwJUMZ(+|*N7Gs{+Nw!A*4s&o_o%bGn)B&e&I$d*q=ueNO+&Q(h5L6- zbsmF#%NO-1^`4$KA5_m@`2KiWJxG1^qxtPVuWg<6uj|uroJWsE;8yK9&w1L4x19dM z=Q8BQty$u?MLsX>IbseoUp3mekB7nO_yEsk-S@ur58!lT4bPpcShtSnfZcJ~8}8!& zGchw}HMd{DT{GVLm>$i`%D+ZiZ_>Nh8uc^JzMtW--Y`0n&+~K4dhoh68(kOR0~^h? zFn(o#8i4!q{yb5w)>+Kw-ZKy3?WeGGUuv|T&}6{DQ)2C?rwgoD@#+$&BgmYlX=JI)*WuocC@x&qFBzTV|xz#+x(o) z8G$EkXziK`;^sYHx+jOa`@4-Zw>j*G2KbWqQhTl+<9>SCGSmL@%ByFL*r^s_Jb_Lx zccF`Xo-KIIpQ)beIeS%?GrsHcv+wV~=X$>0(C-jS^}B&4Qo~|XGW#F zs~z=()$SMTrK0>B?j!K`S}VV6N!-&Bs=n5E=W(CoFydVi`b zy`v(Yo=+uzd)>oV+Tf=A&t`}nbUjs%QBTEYn`ye5YfJk-4r}xeyv`oS*V{RJyY`eE z7y52aJu~YSzB$7)g=P5Xo9F4e(C7N;L%c=(y!fhFa{_NuhmCshLv?WRaq+_Kbawq& zjik0$3Z775qBGDG|Hk2_bhPvdPi@QF@!TE1*Kc@76CP*(+4R-OzVpQuIn*89Hc#tp z^U*2q^H(sab`SKp-o{g`VPiX7`939TcH*s?9V>(Fb3f$9Cjq!3VLO(d|Hbl)^yKxrJC`R&TpJhzt%=8^Uc}~9S z{yF!*B(7O-DN_#i)~K6-mgJpcgLfFVn?Q?l+0S1N(r^0}dNKTlR?=^8SF_ddP7@o{ zcJq|x$W>+#eY=YN8{K3ErLoiaAKcSSyEjCviuH+oPn16m@PLxOc!|I%N>ZpfCZ`-?yXKdmaM|bHz_1Q=CAcu7?^NmUL zuO=R*^(!y(mb~I~&Hi`ww^sJ?*$j2^h1{pM^t>~ZcyT{E^tVM~u53<&Ut4i*JPyk@ zVb_vw<+Zb~I#eqcalW693aZ^t5AlVs%zzbc;LDp~xcV*UykGPSd9M-cQ!B-`S}k4E zPl|c=E2lf)+toEII`P-n^|R@&zei48&8=l{SIF;+ba?Pj+IpJ*sh53tTsB|5+OxLU ze?5~F^tC>X{nlzmd%dJ*wT(TSgiSYi^`_yPLt*0kD)nhjZ)o$O zbH)jH9`k>6j(XPB&)!9R*M)9p(sW<5;aXh6>id#rfUkM+MwsEa6tA!Un_knLC-b1| zTg((sIup6?7MyN)&v_F!wQ#v%lILD(U{O{*p_lsIZ^jeT2sL}K1UyIc==Of=_4$44 zZ^-kj%DlogOVwj?+S~%)FL>!Rd3=te4+`7!x)}M&f9kqr3eVQ9 zU-LF+?TWlteBJbskN9X4F)tQlK{au8oEh+ccxXnG6RkgwCPuGAo9pLzD1ANku$VLYe*mfJ z&-K+W=2E{z&x)R(E!3mc+X+5WeX;&$jbo$mFP7szJK}Y}&;AdANH1gn literal 0 HcmV?d00001 From 7cf27bb66e06db348581218d3afc28381126cb1d Mon Sep 17 00:00:00 2001 From: Laurent Francioli Date: Mon, 12 Dec 2011 12:22:43 +0100 Subject: [PATCH 35/44] Updated md5sum for MendelianViolationEvaluator test to reflect the change in column alignment in VariantEval. --- .../gatk/walkers/varianteval/VariantEvalIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 0f22e033d..4e3d38c4f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -298,7 +298,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec("-T VariantEval -R "+b37KGReference+" --eval " + variantEvalTestDataRoot + vcfFile + " -ped "+ variantEvalTestDataRoot + pedFile +" -noEV -EV MendelianViolationEvaluator -L 1:10109-10315 -o %s -mvq 0 -noST", 1, - Arrays.asList("85a8fc01a1f50839667bfcd04155f735")); + Arrays.asList("66e72c887124f40933d32254b2dd44a3")); executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec); } From 52c64b971fa771c1c7725e7f4efa3934b0bc60d7 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 09:48:58 -0500 Subject: [PATCH 36/44] Updating MD5s -- really dont know why it didn't update before --- .../sting/queue/pipeline/DataProcessingPipelineTest.scala | 4 ++-- .../sting/queue/pipeline/PacbioProcessingPipelineTest.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 483a0b60e..0a5784d51 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -41,7 +41,7 @@ class DataProcessingPipelineTest { " -D " + BaseTest.testDir + "exampleDBSNP.vcf", " -nv ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "69ba216bcf1e2dd9b6bd631ef99efda9" + spec.fileMD5s += testOut -> "a9769a85f15e77505835068353ce4788" PipelineTest.executeTest(spec) } @@ -60,7 +60,7 @@ class DataProcessingPipelineTest { " -bwa /home/unix/carneiro/bin/bwa", " -bwape ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "3134cbeae1561ff8e6b559241f9ed7f5" + spec.fileMD5s += testOut -> "fad1fd2e69287c1beb423ce17fa464d6" PipelineTest.executeTest(spec) } diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index 355420a93..b0ac5a6c5 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -39,7 +39,7 @@ class PacbioProcessingPipelineTest { " -i " + BaseTest.testDir + "exampleBAM.bam", " -blasr ", " -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString - spec.fileMD5s += testOut -> "91a88b51d00cec40596d6061aa0c9938" + spec.fileMD5s += testOut -> "3a23c96063743ddbc35897331433e205" PipelineTest.executeTest(spec) } } From c8b1c92a6ca2d99e8050d04bb30778a2d4008099 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 10:55:41 -0500 Subject: [PATCH 37/44] Updating the other half of the PPP --- .../sting/queue/qscripts/PacbioProcessingPipeline.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index 6f5dae2f8..1d3fb2622 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -177,7 +177,6 @@ class PacbioProcessingPipeline extends QScript { } case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { - this.resources = R this.recal_file = inRecalFile this.output_dir = outPath this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" From d03425df2fc729a3f50efca10f226022babf313a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 12 Dec 2011 14:46:45 -0500 Subject: [PATCH 38/44] TODO optimization targets --- .../walkers/genotyper/ExactAFCalculationModel.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 77a940dcf..ed86897f2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -56,7 +56,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } private static final ArrayList getGLs(GenotypesContext GLs) { - ArrayList genotypeLikelihoods = new ArrayList(); + ArrayList genotypeLikelihoods = new ArrayList(); // TODO -- initialize with size of GLs genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy for ( Genotype sample : GLs.iterateInSampleNameOrder() ) { @@ -364,7 +364,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { else { // all possible likelihoods for a given cell from which to choose the max final int numPaths = set.ACsetIndexToPLIndex.size() + 1; - final double[] log10ConformationLikelihoods = new double[numPaths]; + final double[] log10ConformationLikelihoods = new double[numPaths]; // TODO can be created just once, since you initialize it for ( int j = 1; j < set.log10Likelihoods.length; j++ ) { final double[] gl = genotypeLikelihoods.get(j); @@ -372,6 +372,8 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // initialize for ( int i = 0; i < numPaths; i++ ) + // TODO -- Arrays.fill? + // todo -- is this even necessary? Why not have as else below? log10ConformationLikelihoods[i] = Double.NEGATIVE_INFINITY; // deal with the AA case first @@ -417,6 +419,10 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { } private static double determineCoefficient(int PLindex, final int j, final int[] ACcounts, final int totalK) { + // todo -- arent' there a small number of fixed values that this function can adopt? + // todo -- at a minimum it'd be good to partially compute some of these in ACCounts for performance + // todo -- need to cache PLIndex -> two alleles, compute looping over each PLIndex. Note all other operations are efficient + // todo -- this can be computed once at the start of the all operations // the closed form representation generalized for multiple alleles is as follows: // AA: (2j - totalK) * (2j - totalK - 1) From a70a0f25fb9f7f791ab2b153e5c3d71cdcd1a21c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 17:55:39 -0500 Subject: [PATCH 40/44] Better debug output for SAMDataSource output the name and number of the files being loaded by the GATK instead of "coordinate sorted". --- .../sting/gatk/datasources/reads/SAMDataSource.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java index 2b163ecbd..d70c63bd2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java @@ -711,6 +711,8 @@ public class SAMDataSource { * @param validationStringency validation stringency. */ public SAMReaders(Collection readerIDs, SAMFileReader.ValidationStringency validationStringency) { + int totalNumberOfFiles = readerIDs.size(); + int readerNumber = 1; for(SAMReaderID readerID: readerIDs) { File indexFile = findIndexFile(readerID.samFile); @@ -728,8 +730,7 @@ public class SAMDataSource { reader.enableFileSource(true); reader.setValidationStringency(validationStringency); - final SAMFileHeader header = reader.getFileHeader(); - logger.debug(String.format("Sort order is: " + header.getSortOrder())); + logger.debug(String.format("Processing file (%d of %d) %s...", readerNumber++, totalNumberOfFiles, readerID.samFile)); readers.put(readerID,reader); } From a3c3d72313e199c722cb5ed0be7895c079547ace Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 12:35:36 -0500 Subject: [PATCH 41/44] Added test mode to DPP * in test mode, no @PG tags are output to the final bam file * updated pipeline test to use -test mode. * MD5s are now dependent on BWA version --- .../sting/queue/qscripts/DataProcessingPipeline.scala | 6 ++++++ .../sting/queue/pipeline/DataProcessingPipelineTest.scala | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index ccbe648d6..621afe817 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -83,6 +83,10 @@ class DataProcessingPipeline extends QScript { @Input(doc="Define the default platform for Count Covariates -- useful for techdev purposes only.", fullName="default_platform", shortName="dp", required=false) var defaultPlatform: String = "" + @Hidden + @Input(doc="Run the pipeline in test mode only", fullName = "test_mode", shortName = "test", required=false) + var testMode: Boolean = false + /**************************************************************************** * Global Variables @@ -335,6 +339,7 @@ class DataProcessingPipeline extends QScript { this.known ++= qscript.indels this.consensusDeterminationModel = cleanModelEnum this.compress = 0 + this.noPGTag = qscript.testMode; this.scatterCount = nContigs this.analysisName = queueLogDir + outBam + ".clean" this.jobName = queueLogDir + outBam + ".clean" @@ -360,6 +365,7 @@ class DataProcessingPipeline extends QScript { this.out = outBam if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString) else if (qscript.intervals != null) this.intervals :+= qscript.intervals + this.no_pg_tag = qscript.testMode this.scatterCount = nContigs this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala index 0a5784d51..7e1d09b70 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala @@ -40,8 +40,9 @@ class DataProcessingPipelineTest { " -i " + BaseTest.testDir + "exampleBAM.bam", " -D " + BaseTest.testDir + "exampleDBSNP.vcf", " -nv ", + " -test ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "a9769a85f15e77505835068353ce4788" + spec.fileMD5s += testOut -> "1f85e76de760167a77ed1d9ab4da2936" PipelineTest.executeTest(spec) } @@ -57,10 +58,11 @@ class DataProcessingPipelineTest { " -i " + BaseTest.testDir + "exampleBAM.bam", " -D " + BaseTest.testDir + "exampleDBSNP.vcf", " -nv ", + " -test ", " -bwa /home/unix/carneiro/bin/bwa", " -bwape ", " -p " + projectName).mkString - spec.fileMD5s += testOut -> "fad1fd2e69287c1beb423ce17fa464d6" + spec.fileMD5s += testOut -> "57416a0abdf9524bc92834d466529708" PipelineTest.executeTest(spec) } From 663184ee9d8560860e9551ca109a72f12c582c7c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 18:08:29 -0500 Subject: [PATCH 42/44] Added test mode to PPP * in test mode, no @PG tags are output to the final bam file * updated pipeline test to use -test mode. * MD5s updated accordingly --- .../sting/queue/qscripts/PacbioProcessingPipeline.scala | 5 +++++ .../sting/queue/pipeline/PacbioProcessingPipelineTest.scala | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala index 1d3fb2622..4896eaed3 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/PacbioProcessingPipeline.scala @@ -47,6 +47,10 @@ class PacbioProcessingPipeline extends QScript { @Input(shortName="bwastring", required=false) var bwastring: String = "" + @Hidden + @Input(shortName = "test", fullName = "test_mode", required = false) + var testMode: Boolean = false + val queueLogDir: String = ".qlog/" def script = { @@ -170,6 +174,7 @@ class PacbioProcessingPipeline extends QScript { this.input_file :+= inBam this.recal_file = inRecalFile this.out = outBam + this.no_pg_tag = testMode this.isIntermediate = false this.analysisName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration" diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala index b0ac5a6c5..50aa66367 100644 --- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala +++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala @@ -38,8 +38,9 @@ class PacbioProcessingPipelineTest { " -R " + BaseTest.testDir + "exampleFASTA.fasta", " -i " + BaseTest.testDir + "exampleBAM.bam", " -blasr ", + " -test ", " -D " + BaseTest.testDir + "exampleDBSNP.vcf").mkString - spec.fileMD5s += testOut -> "3a23c96063743ddbc35897331433e205" + spec.fileMD5s += testOut -> "f0adce660b55cb91d5f987f9a145471e" PipelineTest.executeTest(spec) } } From 5cc1e72fdbb179a54d295f835b52bf11399edc59 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 12 Dec 2011 18:41:04 -0500 Subject: [PATCH 43/44] Parallelized SelectVariants * can now use -nt with SelectVariants for significant speedup in large files * added parallelization integration tests for SelectVariants --- .../walkers/variantutils/SelectVariants.java | 8 ++++++- .../SelectVariantsIntegrationTest.java | 22 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index fc01dae9f..d20fb54aa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.samples.Sample; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -179,7 +180,7 @@ import java.util.*; * * */ -public class SelectVariants extends RodWalker { +public class SelectVariants extends RodWalker implements TreeReducible { @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); /** @@ -609,6 +610,11 @@ public class SelectVariants extends RodWalker { @Override public Integer reduce(Integer value, Integer sum) { return value + sum; } + @Override + public Integer treeReduce(Integer lhs, Integer rhs) { + return lhs + rhs; + } + public void onTraversalDone(Integer result) { logger.info(result + " records processed."); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 6e994be3a..72a07bd0e 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -115,4 +115,26 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testUsingDbsnpName--" + testFile, spec); } + + @Test + public void testParallelization() { + String testfile = validationDataLocation + "test.filtered.maf_annotated.vcf"; + String samplesFile = validationDataLocation + "SelectVariants.samples.txt"; + WalkerTestSpec spec; + + spec = new WalkerTestSpec( + baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"), + 1, + Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") + ); + executeTest("testParallelization (2 threads)--" + testfile, spec); + + spec = new WalkerTestSpec( + baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"), + 1, + Arrays.asList("d18516c1963802e92cb9e425c0b75fd6") + ); + + executeTest("testParallelization (4 threads)--" + testfile, spec); + } } From e47a113c9f18bc75564091554517a297ff9e2ca1 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Mon, 12 Dec 2011 23:02:45 -0500 Subject: [PATCH 44/44] Enabled multi-allelic SNP discovery in the UG. Needs loads of testing so do not use yet. While working in the UG engine, I removed the extraneous and unnecessary MultiallelicGenotypeLikelihoods class: now a VariantContext with PL-annotated Genotypes is passed around instead. Integration tests pass so it must all work, right? --- .../BiallelicGenotypeLikelihoods.java | 94 --------- .../walkers/genotyper/DiploidGenotype.java | 21 +- .../GenotypeLikelihoodsCalculationModel.java | 19 +- ...elGenotypeLikelihoodsCalculationModel.java | 60 ++++-- .../MultiallelicGenotypeLikelihoods.java | 52 ----- ...NPGenotypeLikelihoodsCalculationModel.java | 182 ++++++++++++------ .../genotyper/UnifiedGenotyperEngine.java | 79 +------- 7 files changed, 183 insertions(+), 324 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java deleted file mode 100644 index fbd9c1dbf..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BiallelicGenotypeLikelihoods.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.variantcontext.Allele; - -public class BiallelicGenotypeLikelihoods { - - private String sample; - private double[] GLs; - private Allele A, B; - private int depth; - - /** - * Create a new object for sample with given alleles and genotype likelihoods - * - * @param sample sample name - * @param A allele A - * @param B allele B - * @param log10AALikelihoods AA likelihoods - * @param log10ABLikelihoods AB likelihoods - * @param log10BBLikelihoods BB likelihoods - * @param depth the read depth used in creating the likelihoods - */ - public BiallelicGenotypeLikelihoods(String sample, - Allele A, - Allele B, - double log10AALikelihoods, - double log10ABLikelihoods, - double log10BBLikelihoods, - int depth) { - this.sample = sample; - this.A = A; - this.B = B; - this.GLs = new double[]{log10AALikelihoods, log10ABLikelihoods, log10BBLikelihoods}; - this.depth = depth; - } - - public String getSample() { - return sample; - } - - public double getAALikelihoods() { - return GLs[0]; - } - - public double getABLikelihoods() { - return GLs[1]; - } - - public double getBBLikelihoods() { - return GLs[2]; - } - - public double[] getLikelihoods() { - return GLs; - } - - public Allele getAlleleA() { - return A; - } - - public Allele getAlleleB() { - return B; - } - - public int getDepth() { - return depth; - } -} - diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java index 106bb1982..09936c112 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java @@ -27,13 +27,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.BaseUtils; -/** - * Created by IntelliJ IDEA. - * User: depristo - * Date: Aug 4, 2009 - * Time: 6:46:09 PM - * To change this template use File | Settings | File Templates. - */ public enum DiploidGenotype { AA ('A', 'A'), AC ('A', 'C'), @@ -110,6 +103,20 @@ public enum DiploidGenotype { return conversionMatrix[index1][index2]; } + /** + * create a diploid genotype, given 2 base indexes which may not necessarily be ordered correctly + * @param baseIndex1 base1 + * @param baseIndex2 base2 + * @return the diploid genotype + */ + public static DiploidGenotype createDiploidGenotype(int baseIndex1, int baseIndex2) { + if ( baseIndex1 == -1 ) + throw new IllegalArgumentException(baseIndex1 + " does not represent a valid base character"); + if ( baseIndex2 == -1 ) + throw new IllegalArgumentException(baseIndex2 + " does not represent a valid base character"); + return conversionMatrix[baseIndex1][baseIndex2]; + } + private static final DiploidGenotype[][] conversionMatrix = { { DiploidGenotype.AA, DiploidGenotype.AC, DiploidGenotype.AG, DiploidGenotype.AT }, { DiploidGenotype.AC, DiploidGenotype.CC, DiploidGenotype.CG, DiploidGenotype.CT }, diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java index 74c55dbfe..b30a25414 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java @@ -35,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.Map; @@ -79,19 +80,17 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable { * @param contexts stratified alignment contexts * @param contextType stratified context type * @param priors priors to use for GLs - * @param GLs hash of sample->GL to fill in * @param alternateAlleleToUse the alternate allele to use, null if not set * @param useBAQedPileup should we use the BAQed pileup or the raw one? - * @return genotype likelihoods per sample for AA, AB, BB + * @return variant context where genotypes are no-called but with GLs */ - public abstract Allele getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Map GLs, - Allele alternateAlleleToUse, - boolean useBAQedPileup); + public abstract VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup); protected int getFilteredDepth(ReadBackedPileup pileup) { int count = 0; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java index 14d647b6d..653a6f6e7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Haplotype; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -41,8 +42,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.sam.ReadUtils; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; import java.util.*; @@ -243,7 +243,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood // get deletion length int dLen = Integer.valueOf(bestAltAllele.substring(1)); // get ref bases of accurate deletion - int startIdxInReference = (int)(1+loc.getStart()-ref.getWindow().getStart()); + int startIdxInReference = 1+loc.getStart()-ref.getWindow().getStart(); //System.out.println(new String(ref.getBases())); byte[] refBases = Arrays.copyOfRange(ref.getBases(),startIdxInReference,startIdxInReference+dLen); @@ -270,19 +270,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood private final static EnumSet allowableTypes = EnumSet.of(VariantContext.Type.INDEL, VariantContext.Type.MIXED); - public Allele getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Map GLs, - Allele alternateAlleleToUse, - boolean useBAQedPileup) { + public VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup) { if ( tracker == null ) return null; - GenomeLoc loc = ref.getLocus(); Allele refAllele, altAllele; VariantContext vc = null; @@ -368,10 +366,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(), ref, hsize, numPrefBases); + // start making the VariantContext + final int endLoc = calculateEndPos(alleleList, refAllele, loc); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleleList).referenceBaseForIndel(ref.getBase()); + + // create the genotypes; no-call everyone for now + GenotypesContext genotypes = GenotypesContext.create(); + final List noCall = new ArrayList(); + noCall.add(Allele.NO_CALL); + // For each sample, get genotype likelihoods based on pileup // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them. - // initialize the GenotypeLikelihoods - GLs.clear(); for ( Map.Entry sample : contexts.entrySet() ) { AlignmentContext context = AlignmentContextUtils.stratify(sample.getValue(), contextType); @@ -384,11 +389,12 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood if (pileup != null ) { final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap()); + GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(genotypeLikelihoods); - GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), - alleleList, - genotypeLikelihoods, - getFilteredDepth(pileup))); + HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup)); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); + genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); if (DEBUG) { System.out.format("Sample:%s Alleles:%s GL:",sample.getKey(), alleleList.toString()); @@ -399,9 +405,25 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood } } - return refAllele; + return builder.genotypes(genotypes).make(); } + private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { + // for indels, stop location is one more than ref allele length + boolean hasNullAltAllele = false; + for ( Allele a : alleles ) { + if ( a.isNull() ) { + hasNullAltAllele = true; + break; + } + } + + int endLoc = loc.getStart() + refAllele.length(); + if( !hasNullAltAllele ) + endLoc--; + + return endLoc; + } public static HashMap> getIndelLikelihoodMap() { return indelLikelihoodMap.get(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java deleted file mode 100755 index 4f378b24a..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/MultiallelicGenotypeLikelihoods.java +++ /dev/null @@ -1,52 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.variantcontext.Allele; - -import java.util.ArrayList; -import java.util.List; - -/** - * Created by IntelliJ IDEA. - * User: delangel - * Date: 6/1/11 - * Time: 10:38 AM - * To change this template use File | Settings | File Templates. - */ -public class MultiallelicGenotypeLikelihoods { - private String sample; - private double[] GLs; - private List alleleList; - private int depth; - - public MultiallelicGenotypeLikelihoods(String sample, - List A, - double[] log10Likelihoods, int depth) { - /* Check for consistency between likelihood vector and number of alleles */ - int numAlleles = A.size(); - if (log10Likelihoods.length != numAlleles*(numAlleles+1)/2) - throw new StingException(("BUG: Incorrect length of GL vector when creating MultiallelicGenotypeLikelihoods object!")); - - this.sample = sample; - this.alleleList = A; - this.GLs = log10Likelihoods; - this.depth = depth; - } - - public String getSample() { - return sample; - } - - public double[] getLikelihoods() { - return GLs; - } - - public List getAlleles() { - return alleleList; - } - - public int getDepth() { - return depth; - } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index 9bdc754e9..4087443f8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -31,107 +31,147 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.baq.BAQ; +import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; -import org.broadinstitute.sting.utils.variantcontext.Allele; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; +import org.broadinstitute.sting.utils.variantcontext.*; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; +import java.util.*; public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsCalculationModel { - // the alternate allele with the largest sum of quality scores - protected Byte bestAlternateAllele = null; + private static final int MIN_QUAL_SUM_FOR_ALT_ALLELE = 50; + + private boolean ALLOW_MULTIPLE_ALLELES; private final boolean useAlleleFromVCF; protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); + ALLOW_MULTIPLE_ALLELES = UAC.MULTI_ALLELIC; useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; } - public Allele getLikelihoods(RefMetaDataTracker tracker, - ReferenceContext ref, - Map contexts, - AlignmentContextUtils.ReadOrientation contextType, - GenotypePriors priors, - Map GLs, - Allele alternateAlleleToUse, - boolean useBAQedPileup) { + public VariantContext getLikelihoods(RefMetaDataTracker tracker, + ReferenceContext ref, + Map contexts, + AlignmentContextUtils.ReadOrientation contextType, + GenotypePriors priors, + Allele alternateAlleleToUse, + boolean useBAQedPileup) { if ( !(priors instanceof DiploidSNPGenotypePriors) ) throw new StingException("Only diploid-based SNP priors are supported in the SNP GL model"); - byte refBase = ref.getBase(); - Allele refAllele = Allele.create(refBase, true); + final boolean[] basesToUse = new boolean[4]; + final byte refBase = ref.getBase(); + final int indexOfRefBase = BaseUtils.simpleBaseToBaseIndex(refBase); - // find the alternate allele with the largest sum of quality scores + // start making the VariantContext + final GenomeLoc loc = ref.getLocus(); + final List alleles = new ArrayList(); + alleles.add(Allele.create(refBase, true)); + final VariantContextBuilder builder = new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), loc.getStop(), alleles); + + // find the alternate allele(s) that we should be using if ( alternateAlleleToUse != null ) { - bestAlternateAllele = alternateAlleleToUse.getBases()[0]; + basesToUse[BaseUtils.simpleBaseToBaseIndex(alternateAlleleToUse.getBases()[0])] = true; } else if ( useAlleleFromVCF ) { - VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); + final VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles); - // ignore places where we don't have a variant - if ( vc == null ) + // ignore places where we don't have a SNP + if ( vc == null || !vc.isSNP() ) return null; - if ( !vc.isBiallelic() ) { - // for multi-allelic sites go back to the reads and find the most likely alternate allele - initializeBestAlternateAllele(refBase, contexts, useBAQedPileup); - } else { - bestAlternateAllele = vc.getAlternateAllele(0).getBases()[0]; - } + for ( Allele allele : vc.getAlternateAlleles() ) + basesToUse[BaseUtils.simpleBaseToBaseIndex(allele.getBases()[0])] = true; } else { - initializeBestAlternateAllele(refBase, contexts, useBAQedPileup); + + determineAlternateAlleles(basesToUse, refBase, contexts, useBAQedPileup); + + // how many alternate alleles are we using? + int alleleCounter = countSetBits(basesToUse); + + // if there are no non-ref alleles... + if ( alleleCounter == 0 ) { + // if we only want variants, then we don't need to calculate genotype likelihoods + if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) + return builder.make(); + + // otherwise, choose any alternate allele (it doesn't really matter) + basesToUse[indexOfRefBase == 0 ? 1 : 0] = true; + } } - // if there are no non-ref bases... - if ( bestAlternateAllele == null ) { - // if we only want variants, then we don't need to calculate genotype likelihoods - if ( UAC.OutputMode == UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY ) - return refAllele; - - // otherwise, choose any alternate allele (it doesn't really matter) - bestAlternateAllele = (byte)(refBase != 'A' ? 'A' : 'C'); + // create the alternate alleles and the allele ordering (the ordering is crucial for the GLs) + final int numAltAlleles = countSetBits(basesToUse); + final int[] alleleOrdering = new int[numAltAlleles + 1]; + alleleOrdering[0] = indexOfRefBase; + int alleleOrderingIndex = 1; + int numLikelihoods = 1; + for ( int i = 0; i < 4; i++ ) { + if ( i != indexOfRefBase && basesToUse[i] ) { + alleles.add(Allele.create(BaseUtils.baseIndexToSimpleBase(i), false)); + alleleOrdering[alleleOrderingIndex++] = i; + numLikelihoods += alleleOrderingIndex; + } } + builder.alleles(alleles); - Allele altAllele = Allele.create(bestAlternateAllele, false); + // create the genotypes; no-call everyone for now + GenotypesContext genotypes = GenotypesContext.create(); + final List noCall = new ArrayList(); + noCall.add(Allele.NO_CALL); for ( Map.Entry sample : contexts.entrySet() ) { ReadBackedPileup pileup = AlignmentContextUtils.stratify(sample.getValue(), contextType).getBasePileup(); - if( useBAQedPileup ) { pileup = createBAQedPileup( pileup ); } + if ( useBAQedPileup ) + pileup = createBAQedPileup( pileup ); // create the GenotypeLikelihoods object - DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); - int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); + final DiploidSNPGenotypeLikelihoods GL = new DiploidSNPGenotypeLikelihoods((DiploidSNPGenotypePriors)priors, UAC.PCR_error); + final int nGoodBases = GL.add(pileup, true, true, UAC.MIN_BASE_QUALTY_SCORE); if ( nGoodBases == 0 ) continue; - double[] likelihoods = GL.getLikelihoods(); + final double[] allLikelihoods = GL.getLikelihoods(); + final double[] myLikelihoods = new double[numLikelihoods]; - DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(refBase); - DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(refBase, bestAlternateAllele); - DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(bestAlternateAllele); - ArrayList aList = new ArrayList(); - aList.add(refAllele); - aList.add(altAllele); - double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ; + int myLikelihoodsIndex = 0; + for ( int i = 0; i <= numAltAlleles; i++ ) { + for ( int j = i; j <= numAltAlleles; j++ ) { + myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()]; + } + } // normalize in log space so that max element is zero. - GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(), - aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup))); + GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); + + HashMap attributes = new HashMap(); + attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(pileup)); + attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); + genotypes.add(new Genotype(sample.getKey(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); } - return refAllele; + return builder.genotypes(genotypes).make(); } - protected void initializeBestAlternateAllele(byte ref, Map contexts, boolean useBAQedPileup) { + private int countSetBits(boolean[] array) { + int counter = 0; + for ( int i = 0; i < array.length; i++ ) { + if ( array[i] ) + counter++; + } + return counter; + } + + // fills in the allelesToUse array + protected void determineAlternateAlleles(boolean[] allelesToUse, byte ref, Map contexts, boolean useBAQedPileup) { int[] qualCounts = new int[4]; for ( Map.Entry sample : contexts.entrySet() ) { @@ -139,7 +179,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC ReadBackedPileup pileup = useBAQedPileup ? createBAQedPileup( sample.getValue().getBasePileup() ) : sample.getValue().getBasePileup(); for ( PileupElement p : pileup ) { // ignore deletions - if ( p.isDeletion() || (! p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE )) + if ( p.isDeletion() || (!p.isReducedRead() && p.getQual() < UAC.MIN_BASE_QUALTY_SCORE) ) continue; final int index = BaseUtils.simpleBaseToBaseIndex(p.getBase()); @@ -149,17 +189,31 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC } } - // set the non-ref base with maximum quality score sum - int maxCount = 0; - bestAlternateAllele = null; - for ( byte altAllele : BaseUtils.BASES ) { - if ( altAllele == ref ) - continue; - int index = BaseUtils.simpleBaseToBaseIndex(altAllele); - if ( qualCounts[index] > maxCount ) { - maxCount = qualCounts[index]; - bestAlternateAllele = altAllele; + if ( ALLOW_MULTIPLE_ALLELES ) { + for ( byte altAllele : BaseUtils.BASES ) { + if ( altAllele == ref ) + continue; + int index = BaseUtils.simpleBaseToBaseIndex(altAllele); + if ( qualCounts[index] >= MIN_QUAL_SUM_FOR_ALT_ALLELE ) { + allelesToUse[index] = true; + } } + } else { + // set the non-ref base which has the maximum quality score sum + int maxCount = 0; + int indexOfMax = 0; + for ( byte altAllele : BaseUtils.BASES ) { + if ( altAllele == ref ) + continue; + int index = BaseUtils.simpleBaseToBaseIndex(altAllele); + if ( qualCounts[index] > maxCount ) { + maxCount = qualCounts[index]; + indexOfMax = index; + } + } + + if ( maxCount > 0 ) + allelesToUse[indexOfMax] = true; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3a86743de..21aaeffba 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -219,14 +219,7 @@ public class UnifiedGenotyperEngine { glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC)); } - Map GLs = new HashMap(); - - Allele refAllele = glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), GLs, alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine); - - if ( refAllele != null ) - return createVariantContextFromLikelihoods(refContext, refAllele, GLs); - else - return null; + return glcm.get().get(model).getLikelihoods(tracker, refContext, stratifiedContexts, type, getGenotypePriors(model), alternateAlleleToUse, useBAQedPileup && BAQEnabledOnCMDLine); } private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, AlignmentContext rawContext) { @@ -261,40 +254,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vc, false); } - private VariantContext createVariantContextFromLikelihoods(ReferenceContext refContext, Allele refAllele, Map GLs) { - // no-call everyone for now - List noCall = new ArrayList(); - noCall.add(Allele.NO_CALL); - - Set alleles = new LinkedHashSet(); - alleles.add(refAllele); - boolean addedAltAlleles = false; - - GenotypesContext genotypes = GenotypesContext.create(); - for ( MultiallelicGenotypeLikelihoods GL : GLs.values() ) { - if ( !addedAltAlleles ) { - addedAltAlleles = true; - // ordering important to maintain consistency - for (Allele a: GL.getAlleles()) { - alleles.add(a); - } - } - - HashMap attributes = new HashMap(); - //GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods()); - GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(GL.getLikelihoods()); - attributes.put(VCFConstants.DEPTH_KEY, GL.getDepth()); - attributes.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, likelihoods); - - genotypes.add(new Genotype(GL.getSample(), noCall, Genotype.NO_LOG10_PERROR, null, attributes, false)); - } - - GenomeLoc loc = refContext.getLocus(); - int endLoc = calculateEndPos(alleles, refAllele, loc); - - return new VariantContextBuilder("UG_call", loc.getContig(), loc.getStart(), endLoc, alleles).genotypes(genotypes).referenceBaseForIndel(refContext.getBase()).make(); - } - public VariantCallContext calculateGenotypes(VariantContext vc, final GenotypeLikelihoodsCalculationModel.Model model) { return calculateGenotypes(null, null, null, null, vc, model); } @@ -494,42 +453,6 @@ public class UnifiedGenotyperEngine { return new VariantCallContext(vcCall, confidentlyCalled(phredScaledConfidence, PofF)); } - private int calculateEndPos(Collection alleles, Allele refAllele, GenomeLoc loc) { - // TODO - temp fix until we can deal with extended events properly - // for indels, stop location is one more than ref allele length - boolean isSNP = true, hasNullAltAllele = false; - for (Allele a : alleles){ - if (a.length() != 1) { - isSNP = false; - break; - } - } - for (Allele a : alleles){ - if (a.isNull()) { - hasNullAltAllele = true; - break; - } - } - // standard deletion: ref allele length = del length. endLoc = startLoc + refAllele.length(), alt allele = null - // standard insertion: ref allele length = 0, endLos = startLoc - // mixed: want end loc = start Loc for case {A*,AT,T} but say {ATG*,A,T} : want then end loc = start loc + refAllele.length - // So, in general, end loc = startLoc + refAllele.length, except in complex substitutions where it's one less - // - // todo - this is unnecessarily complicated and is so just because of Tribble's arbitrary vc conventions, should be cleaner/simpler, - // the whole vc processing infrastructure seems too brittle and riddled with special case handling - - - int endLoc = loc.getStart(); - if ( !isSNP) { - endLoc += refAllele.length(); - if(!hasNullAltAllele) - endLoc--; - - } - - return endLoc; - } - private Map getFilteredAndStratifiedContexts(UnifiedArgumentCollection UAC, ReferenceContext refContext, AlignmentContext rawContext, final GenotypeLikelihoodsCalculationModel.Model model) { Map stratifiedContexts = null;