From 3ba918aff1f7cfa05fe2c87852cd164d9a1875cf Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Tue, 17 Jan 2012 11:05:42 -0500 Subject: [PATCH 01/13] Error message cleanup in BAM indexing code. --- .../sting/gatk/datasources/reads/GATKBAMIndex.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index dc703ff23..244438a59 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -88,7 +88,7 @@ public class GATKBAMIndex { seek(0); final byte[] buffer = readBytes(4); if (!Arrays.equals(buffer, BAM_INDEX_MAGIC)) { - throw new RuntimeException("Invalid file header in BAM index " + mFile + + throw new ReviewedStingException("Invalid file header in BAM index " + mFile + ": " + new String(buffer)); } @@ -112,7 +112,7 @@ public class GATKBAMIndex { openIndexFile(); if (referenceSequence >= sequenceCount) - throw new ReviewedStingException("Invalid sequence number " + referenceSequence); + throw new ReviewedStingException("Invalid sequence number " + referenceSequence + " in index file " + mFile); skipToSequence(referenceSequence); @@ -183,12 +183,12 @@ public class GATKBAMIndex { public int getLevelForBin(final Bin bin) { GATKBin gatkBin = new GATKBin(bin); if(gatkBin.getBinNumber() >= MAX_BINS) - throw new SAMException("Tried to get level for invalid bin."); + throw new ReviewedStingException("Tried to get level for invalid bin in index file " + mFile); for(int i = getNumIndexLevels()-1; i >= 0; i--) { if(gatkBin.getBinNumber() >= LEVEL_STARTS[i]) return i; } - throw new SAMException("Unable to find correct bin for bin "+bin); + throw new ReviewedStingException("Unable to find correct bin for bin " + bin + " in index file " + mFile); } /** @@ -352,7 +352,7 @@ public class GATKBAMIndex { fileChannel.read(buffer); } catch(IOException ex) { - throw new ReviewedStingException("Index: unable to read bytes from index file."); + throw new ReviewedStingException("Index: unable to read bytes from index file " + mFile); } } @@ -379,7 +379,7 @@ public class GATKBAMIndex { fileChannel.position(fileChannel.position() + count); } catch(IOException ex) { - throw new ReviewedStingException("Index: unable to reposition file channel."); + throw new ReviewedStingException("Index: unable to reposition file channel of index file " + mFile); } } @@ -388,7 +388,7 @@ public class GATKBAMIndex { fileChannel.position(position); } catch(IOException ex) { - throw new ReviewedStingException("Index: unable to reposition of file channel."); + throw new ReviewedStingException("Index: unable to reposition of file channel of index file " + mFile); } } From cde224746f1c4ce58862ef9891582eca671f2d71 Mon Sep 17 00:00:00 2001 From: Christopher Hartl Date: Tue, 17 Jan 2012 13:51:05 -0500 Subject: [PATCH 02/13] Bait Redesign supports baits that overlap, by picking only the start of intervals. CalibrateGenotypeLikelihoods supports using an external VCF as input for genotype likelihoods. Currently can be a per-sample VCF, but has un-implemented methods for allowing a read-group VCF to be used. Removed the old constrained genotyping code from UGE -- the trellis calculated is exactly the same as that done in the MLE AC estimate; so we should just re-use that one. --- .../genotyper/UnifiedGenotyperEngine.java | 167 ------------------ 1 file changed, 167 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 5d73e8d28..ee5aed3e5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -858,171 +858,4 @@ public class UnifiedGenotyperEngine { return calls; } - - /** - * @param vc variant context with genotype likelihoods - * @param allelesToUse bit vector describing which alternate alleles from the vc are okay to use - * @param exactAC integer array describing the AC from the exact model for the corresponding alleles - * @return genotypes - */ - public static GenotypesContext constrainedAssignGenotypes(VariantContext vc, boolean[] allelesToUse, int[] exactAC ) { - - final GenotypesContext GLs = vc.getGenotypes(); - - // samples - final List sampleIndices = GLs.getSampleNamesOrderedByName(); - - // we need to determine which of the alternate alleles (and hence the likelihoods) to use and carry forward - final int numOriginalAltAlleles = allelesToUse.length; - final List newAlleles = new ArrayList(numOriginalAltAlleles+1); - newAlleles.add(vc.getReference()); - final HashMap alleleIndexMap = new HashMap(); // need this for skipping dimensions - int[] alleleCount = new int[exactAC.length]; - for ( int i = 0; i < numOriginalAltAlleles; i++ ) { - if ( allelesToUse[i] ) { - newAlleles.add(vc.getAlternateAllele(i)); - alleleIndexMap.put(vc.getAlternateAllele(i),i); - alleleCount[i] = exactAC[i]; - } else { - alleleCount[i] = 0; - } - } - final List newAltAlleles = newAlleles.subList(1,newAlleles.size()); - final int numNewAltAlleles = newAltAlleles.size(); - ArrayList likelihoodIndexesToUse = null; - - // an optimization: if we are supposed to use all (or none in the case of a ref call) of the alleles, - // then we can keep the PLs as is; otherwise, we determine which ones to keep - final int[][] PLcache; - if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { - likelihoodIndexesToUse = new ArrayList(30); - PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles]; - - for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) { - int[] alleles = PLcache[PLindex]; - // consider this entry only if both of the alleles are good - if ( (alleles[0] == 0 || allelesToUse[alleles[0] - 1]) && (alleles[1] == 0 || allelesToUse[alleles[1] - 1]) ) - likelihoodIndexesToUse.add(PLindex); - } - } else { - PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles]; - } - - // set up the trellis dimensions - // SAMPLE x alt 1 x alt 2 x alt 3 - // todo -- check that exactAC has alt counts at [1],[2],[3] (and not [0],[1],[2]) - double[][][][] transitionTrellis = new double[sampleIndices.size()+1][exactAC[1]][exactAC[2]][exactAC[3]]; - // N x AC1 x AC2 x AC3; worst performance in multi-allelic where all alleles are moderate frequency - // capped at the MLE ACs* - // todo -- there's an optimization: not all states in the rectangular matrix will be reached, in fact - // todo -- for tT[0] we only care about tT[0][0][0][0], and for tT[1], only combinations of 0,1,2. - int idx = 1; // index of which sample we're on - int prevMaxState = 0; // the maximum state (e.g. AC) reached by the previous sample. Symmetric. (AC capping handled by logic in loop) - // iterate over each sample - for ( String sample : sampleIndices ) { - // push the likelihoods into the next possible states, that is to say - // L[state] = L[prev state] + L[genotype getting into state] - // iterate over each previous state, by dimension - // and contribute the likelihoods for transitions to this state - double[][][] prevState = transitionTrellis[idx-1]; - double[][][] thisState = transitionTrellis[idx]; - Genotype genotype = GLs.get(sample); - if ( genotype.isNoCall() || genotype.isFiltered() ) { - thisState = prevState.clone(); - } else { - double[] likelihoods = genotype.getLikelihoods().getAsVector(); - int dim1min = Math.max(0, alleleCount[0]-2*(sampleIndices.size()-idx+1)); - int dim1max = Math.min(prevMaxState,alleleCount[0]); - int dim2min = Math.max(0,alleleCount[1]-2*(sampleIndices.size()-idx+1)); - int dim2max = Math.min(prevMaxState,alleleCount[1]); - int dim3min = Math.max(0,alleleCount[2]-2*(sampleIndices.size()-idx+1)); - int dim3max = Math.min(prevMaxState,alleleCount[2]); - // cue annoying nested for loop - for ( int a1 = dim1min ; a1 <= dim1max; a1++ ) { - for ( int a2 = dim2min; a2 <= dim2max; a2++ ) { - for ( int a3 = dim3min; a3 <= dim3max; a3++ ) { - double base = prevState[a1][a2][a3]; - for ( int likIdx : likelihoodIndexesToUse ) { - int[] offsets = calculateOffsets(PLcache[likIdx]); - thisState[a1+offsets[1]][a2+offsets[2]][a3+offsets[3]] = base + likelihoods[likIdx]; - } - } - } - } - prevMaxState += 2; - } - idx++; - } - - // after all that pain, we have a fully calculated trellis. Now just march backwards from the EAC state and - // assign genotypes along the greedy path - - GenotypesContext calls = GenotypesContext.create(sampleIndices.size()); - int[] state = alleleCount; - for ( String sample : Utils.reverse(sampleIndices) ) { - --idx; - // the next state will be the maximum achievable state - Genotype g = GLs.get(sample); - if ( g.isNoCall() || ! g.hasLikelihoods() ) { - calls.add(g); - continue; - } - - // subset to the new likelihoods. These are not used except for subsetting in the context iself. - // i.e. they are not a part of the calculation. - final double[] originalLikelihoods = GLs.get(sample).getLikelihoods().getAsVector(); - double[] newLikelihoods; - if ( likelihoodIndexesToUse == null ) { - newLikelihoods = originalLikelihoods; - } else { - newLikelihoods = new double[likelihoodIndexesToUse.size()]; - int newIndex = 0; - for ( int oldIndex : likelihoodIndexesToUse ) - newLikelihoods[newIndex++] = originalLikelihoods[oldIndex]; - - // might need to re-normalize - newLikelihoods = MathUtils.normalizeFromLog10(newLikelihoods, false, true); - } - - // todo -- alter this. For ease of programming, likelihood indeces are - // todo -- used to iterate over achievable states. - double max = Double.NEGATIVE_INFINITY; - int[] bestState = null; - int[] bestAlleles = null; - int bestLikIdx = -1; - for ( int likIdx : likelihoodIndexesToUse ) { - int[] offsets = calculateOffsets(PLcache[likIdx]); - double val = transitionTrellis[idx-1][state[0]-offsets[0]][state[1]-offsets[1]][state[2]-offsets[2]]; - if ( val > max ) { - max = val; - bestState = new int[] { state[0]-offsets[0],state[1]-offsets[1],state[2]-offsets[2]}; - bestAlleles = PLcache[likIdx]; - bestLikIdx = likIdx; - } - } - state = bestState; - List gtAlleles = new ArrayList(2); - gtAlleles.add(newAlleles.get(bestAlleles[0])); - gtAlleles.add(newAlleles.get(bestAlleles[1])); - - final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(bestLikIdx, newLikelihoods); - Map attrs = new HashMap(g.getAttributes()); - if ( numNewAltAlleles == 0 ) - attrs.remove(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY); - else - attrs.put(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, GenotypeLikelihoods.fromLog10Likelihoods(newLikelihoods)); - calls.add(new Genotype(sample, gtAlleles, qual, null, attrs, false)); - - } - return calls; - } - - private static int[] calculateOffsets(int[] alleleIndeces) { - int[] offsets = new int[4]; - for ( int i = 0; i < alleleIndeces.length; i++ ) { - offsets[alleleIndeces[i]]++; - } - - return offsets; - } } From 41d70abe4e852de5cf3da11b80c8b5a0f509bbb3 Mon Sep 17 00:00:00 2001 From: Matt Hanna Date: Tue, 17 Jan 2012 14:47:53 -0500 Subject: [PATCH 04/13] At chartl's request, add the bwa aln -N and bwa aln -m parameters to the bindings. --- public/c/bwa/build_linux.sh | 2 +- public/c/bwa/bwa_gateway.cpp | 2 ++ public/c/bwa/bwa_gateway.h | 2 ++ ...tute_sting_alignment_bwa_c_BWACAligner.cpp | 36 +++++++++++++++++++ .../sting/alignment/bwa/BWAConfiguration.java | 10 ++++++ 5 files changed, 51 insertions(+), 1 deletion(-) diff --git a/public/c/bwa/build_linux.sh b/public/c/bwa/build_linux.sh index b3631a28d..8683bb377 100755 --- a/public/c/bwa/build_linux.sh +++ b/public/c/bwa/build_linux.sh @@ -1,5 +1,5 @@ #!/bin/sh -export BWA_HOME="/humgen/gsa-scr1/hanna/src/bwa-trunk/bwa" +export BWA_HOME="/humgen/gsa-scr1/hanna/src/bio-bwa/bwa" export JAVA_INCLUDE="/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include -I/broad/tools/Linux/x86_64/pkgs/jdk_1.6.0_12/include/linux" export TARGET_LIB="libbwa.so" export EXTRA_LIBS="-lc -lz -lstdc++ -lpthread" diff --git a/public/c/bwa/bwa_gateway.cpp b/public/c/bwa/bwa_gateway.cpp index 00f5aa5bc..088ee43bf 100644 --- a/public/c/bwa/bwa_gateway.cpp +++ b/public/c/bwa/bwa_gateway.cpp @@ -233,6 +233,8 @@ void BWA::set_disallow_indel_within_range(int indel_range) { options.indel_end_s void BWA::set_mismatch_penalty(int penalty) { options.s_mm = penalty; } void BWA::set_gap_open_penalty(int penalty) { options.s_gapo = penalty; } void BWA::set_gap_extension_penalty(int penalty) { options.s_gape = penalty; } +void BWA::set_mode_nonstop() { options.mode |= BWA_MODE_NONSTOP; options.max_top2 = 0x7fffffff; } +void BWA::set_max_entries_in_queue(int max_entries) { options.max_entries = max_entries; } /** * Create a sequence with a set of reasonable initial defaults. diff --git a/public/c/bwa/bwa_gateway.h b/public/c/bwa/bwa_gateway.h index 2d26ec650..62756ec2a 100644 --- a/public/c/bwa/bwa_gateway.h +++ b/public/c/bwa/bwa_gateway.h @@ -60,6 +60,8 @@ class BWA { void set_mismatch_penalty(int penalty); void set_gap_open_penalty(int penalty); void set_gap_extension_penalty(int penalty); + void set_mode_nonstop(); + void set_max_entries_in_queue(int max_entries); // Perform the alignment Alignment* generate_single_alignment(const char* bases, diff --git a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp index 1ccbef0d4..90d70d4a1 100644 --- a/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp +++ b/public/c/bwa/org_broadinstitute_sting_alignment_bwa_c_BWACAligner.cpp @@ -8,11 +8,13 @@ #include "bwa_gateway.h" #include "org_broadinstitute_sting_alignment_bwa_c_BWACAligner.h" +typedef void (BWA::*boolean_setter)(); typedef void (BWA::*int_setter)(int value); typedef void (BWA::*float_setter)(float value); static jobject convert_to_java_alignment(JNIEnv* env, const jbyte* read_bases, const jsize read_length, const Alignment& alignment); static jstring get_configuration_file(JNIEnv* env, jobject configuration, const char* field_name); +static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter); static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter); static void set_float_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, float_setter setter); static void throw_config_value_exception(JNIEnv* env, const char* field_name, const char* message); @@ -100,6 +102,10 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner if(env->ExceptionCheck()) return; set_int_configuration_param(env, configuration, "gapExtensionPenalty", bwa, &BWA::set_gap_extension_penalty); if(env->ExceptionCheck()) return; + set_boolean_configuration_param(env, configuration, "nonStopMode", bwa, &BWA::set_mode_nonstop); + if(env->ExceptionCheck()) return; + set_int_configuration_param(env, configuration, "maxEntriesInQueue", bwa, &BWA::set_max_entries_in_queue); + if(env->ExceptionCheck()) return; } JNIEXPORT jobjectArray JNICALL Java_org_broadinstitute_sting_alignment_bwa_c_BWACAligner_getPaths(JNIEnv *env, jobject instance, jlong java_bwa, jbyteArray java_bases) @@ -357,6 +363,36 @@ static jstring get_configuration_file(JNIEnv* env, jobject configuration, const return path; } +static void set_boolean_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, boolean_setter setter) { + jclass configuration_class = env->GetObjectClass(configuration); + if(configuration_class == NULL) return; + + jfieldID configuration_field = env->GetFieldID(configuration_class, field_name, "Ljava/lang/Boolean;"); + if(configuration_field == NULL) return; + + jobject boxed_value = env->GetObjectField(configuration,configuration_field); + if(env->ExceptionCheck()) return; + + if(boxed_value != NULL) { + jclass boolean_box_class = env->FindClass("java/lang/Boolean"); + if(boolean_box_class == NULL) return; + + jmethodID boolean_extractor = env->GetMethodID(boolean_box_class,"booleanValue", "()Z"); + if(boolean_extractor == NULL) return; + + jboolean value = env->CallBooleanMethod(boxed_value,boolean_extractor); + if(env->ExceptionCheck()) return; + + if(value) + (bwa->*setter)(); + + env->DeleteLocalRef(boolean_box_class); + } + + env->DeleteLocalRef(boxed_value); + env->DeleteLocalRef(configuration_class); +} + static void set_int_configuration_param(JNIEnv* env, jobject configuration, const char* field_name, BWA* bwa, int_setter setter) { jclass configuration_class = env->GetObjectClass(configuration); if(configuration_class == NULL) return; diff --git a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java index 73441cb6a..e453c7f8a 100644 --- a/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java +++ b/public/java/src/org/broadinstitute/sting/alignment/bwa/BWAConfiguration.java @@ -41,4 +41,14 @@ public class BWAConfiguration { * What is the scoring penalty for a gap extension? */ public Integer gapExtensionPenalty = null; + + /** + * Enter bwa's 'non-stop' mode (equivalent to bwa aln -N parameter). + */ + public Boolean nonStopMode = false; + + /** + * Set the max queue size that bwa will use when searching for matches (equivalent to bwa aln -m parameter). + */ + public Integer maxEntriesInQueue = null; } From 75f87db468e27b51d124bf854eea7c7b30284d9f Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Tue, 17 Jan 2012 15:02:45 -0500 Subject: [PATCH 05/13] Replacing Mills file with new gold standard indel set in the resource bundle for release with v1.5 --- .../sting/queue/qscripts/GATKResourcesBundle.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala index 8c9063c29..22ac52453 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala @@ -134,8 +134,8 @@ class GATKResourcesBundle extends QScript { addResource(new Resource("/humgen/1kg/processing/official_release/phase1/ALL.wgs.VQSR_consensus_biallelic.20101123.indels.sites.vcf", "1000G_biallelic.indels", b37, true, false)) - addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Mills_Devine_Indels_2011/ALL.wgs.indels_mills_devine_hg19_leftAligned_collapsed_double_hit.sites.vcf", - "Mills_Devine_2hit.indels", b37, true, true)) + addResource(new Resource("/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/GoldStandardIndel/gold.standard.indel.MillsAnd1000G.b37.vcf", + "Mills_and_1000G_gold_standard.indels", b37, true, true)) // // example call set for wiki tutorial From f2b0575deeb833b808a1aca04028da7733467621 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 16 Jan 2012 14:07:42 -0500 Subject: [PATCH 08/13] Detect unreasonably large allele strings (>2^16) and throw an error -- samtools can emit alleles where the ref is 42M Ns and this caused the GATK (via tribble) to hang in several places. -- Tribble was updated so we actually could read the line properly (rev. to 51 here). -- Still the parsing algorithms in the GATK aren't happy with such a long allele. Instead of optimizing the code around an improper use case I put in a limit of 2^16 bp for any allele, and throw a meaningful exception when encountered. --- .../utils/codecs/vcf/AbstractVCFCodec.java | 16 +++++++++++----- .../{tribble-46.jar => tribble-51.jar} | Bin 301252 -> 304586 bytes .../{tribble-46.xml => tribble-51.xml} | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) rename settings/repository/org.broad/{tribble-46.jar => tribble-51.jar} (87%) rename settings/repository/org.broad/{tribble-46.xml => tribble-51.xml} (51%) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index e44c10f1f..43c878b2f 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -18,6 +18,7 @@ import java.util.zip.GZIPInputStream; public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { + public final static int MAX_EXPLICIT_ALLELE_SIZE = (int)Math.pow(2, 16); protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -252,7 +253,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { // if we have don't have a header, or we have a header with no genotyping data check that we have eight columns. Otherwise check that we have nine (normal colummns + genotyping data) if (( (header == null || !header.hasGenotypingData()) && nParts != NUM_STANDARD_FIELDS) || - (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) + (header != null && header.hasGenotypingData() && nParts != (NUM_STANDARD_FIELDS + 1)) ) throw new UserException.MalformedVCF("there aren't enough columns for line " + line + " (we expected " + (header == null ? NUM_STANDARD_FIELDS : NUM_STANDARD_FIELDS + 1) + " tokens, and saw " + nParts + " )", lineNo); @@ -518,8 +519,11 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { * @param lineNo the line number for this record */ private static void checkAllele(String allele, boolean isRef, int lineNo) { - if ( allele == null || allele.length() == 0 ) - generateException("Empty alleles are not permitted in VCF records", lineNo); + if ( allele == null || allele.length() == 0 ) + generateException("Empty alleles are not permitted in VCF records", lineNo); + + if ( allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) + generateException(String.format("Allele detected with length %d, exceeding max size %d. Please remove this from the VCF file before continuing", allele.length(), MAX_EXPLICIT_ALLELE_SIZE), lineNo); if ( isSymbolicAllele(allele) ) { if ( isRef ) { @@ -572,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; + final byte ref0 = ref.getBytes()[0]; for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) continue; - if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { + if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { clipping = false; break; } @@ -589,6 +594,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { int clipping = 0; boolean stillClipping = true; + final byte[] refBytes = ref.getBytes(); while ( stillClipping ) { for ( Allele a : unclippedAlleles ) { @@ -604,7 +610,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { stillClipping = false; else if ( ref.length() == clipping ) generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) + else if ( a.getBases()[a.length()-clipping-1] != refBytes[ref.length()-clipping-1] ) stillClipping = false; } if ( stillClipping ) diff --git a/settings/repository/org.broad/tribble-46.jar b/settings/repository/org.broad/tribble-51.jar similarity index 87% rename from settings/repository/org.broad/tribble-46.jar rename to settings/repository/org.broad/tribble-51.jar index 401fcfc3a92c558e6975e80ed948831447bb6bc2..04121804f22936341a786a08d97a58fc4293f595 100644 GIT binary patch delta 19167 zcmaic34ByV@_)S}c{7ujYeGU!l5l5o5+Fdh5-`Y-0|JN<6#@hZV8RhZ1RR1Kf+E3D zT0Bvs?4lwV$#DF{6WL`w&|Og!5p;JwRsoOoT3r6$di^GupugXLKA(QCUw3tNb#--h zRdpw??sXsgi#uZcI8ApEIf!1Y%PZO%(aZ5X-bX?|E#5k`Hm@LiMqc0Sz61MZ_sJ{k z+b_R=TJPe83m4R+jj5ZNlM(I;ceS_Yc3V$$(XIDS-L1DTP06w{E#nnKlSNbYi?V2H z$T(Hzb{VS_nagwzkzg(JO}Aav82*D@wjz3m-6FNq2sNC0+C5?X-OUc5}n`tRqwFqw0V|`8eVZQ14E;*#S{3&+s z&qE$pEdu^J^V?ND45A)}h~5qYE>uLfJU+a?Asd_?bwamtzWw9!tR9vOd?u$`2oa|| z?m0oS6%%mn=E<--CxiO+F2El1^DJ%BCEH;ta}jQ8@MC zM?Tl|bx?#0?|%Hz-$8DE7Z@}^A^nj^S4-2?a%c!0B+zkzSO*a71_m6eTKS zAc{&PbdB#2a#ClCCxsG3@oXh2HW7uP_#}D{RY{a=RV|vWBzuFaQUG9DU}4mSQt_oS zS$HNOKbka900>WJvY$ddDf`ISYr!y%2-FgtLN^~#7@a1@6^E9Lv8zQT=a^27x}j91 z?&zt9mwGZSD9FW|DGc#EN5)eYL_2L~Eqs!;5F4L^dQoq?@pz1xe|d*UuW~yy%}>r2 za#>$tx^8`$Au=AeySxyhj0U33R+cF~=*#h@k^Sh>|fcR9$%pm+24Jucctf^+v8w4ICZ zbJG3v0GB-IB=7z75Pv_+h<0${Bm8)j@jqrza~M5NPZ+e*pg%b1k1kBeE{1%Pfu3S) zPdjKgzn|e2dkorZ(6a`$bX@8RnPckeY8H=VZNh6b%39sfF(#DERlIO{rTdCckMi{| z>n0w#V6Tdl;8ii0mtR<*B^rteKdEuQs|=j8YG&5Yswu6XsZc>+y&Jxqb%xJZ*4y`e z*>c}IrKw@AhWhz6btCJS)-{N9p~m^{Dj(^p09(OKaLae9e2Ex&QA_q!RP^mWx2Ael z&EnG9x|$`{FrNck6pC1k;?aRBU$iIQeGRCbiQ1a zpnX@YB9mUFBPP8@M-BRuNq?qeCjEt8S18q&UY?p~(i`-qLHkU4i|S1}PH&s^4(I;L zkH68oCY_-7OnRSAnp8{k#I#z?(qYVBXXtY#eLx3A`d;j9Yu;C5e0e9j4N<65$>PQJ zi_;cXFJ4kxHz!Rpld1a9gkt!}gjV?2q%(97ODJbX%`B77(p-~@s8}4o)0PI`=*mpd zx0|i_!h&Q&M181wd`l{Oy6xQ~NJ;y`KTR`27u3|vX_(vcYGsY;bnu+Qc57i?M@`+VB~xk}=0dgQVnMP7PXDSlF= zx*LQN7itJ9)`0FRL{6$E2Mwkn_+lJGZ5(3&$*|!bKWU!KR`TlmD5M>eRGewhD(S$| z)xstfXSj_GZD|+{2R?|Lfi0!(g+&qwGct-BqH@u z=uQ}NN;lv!TFG%Ag(!P6-Og5W-AA#R?l3E3X1Yyewo@4Q7d}PV6BYS9MU-V`?Wf3L zDIvh3?8(yd3-v5Le?LV*i-$}v2#F0T2u%rTrm(CKE-eT>MBeDC1U)t+Hq=k;s`^l*nP~h< zZiA*M|D~=5U9Bh=si#3zR~dATevuMTRDz-yCHiMFVq?(jRJv9QE#=_ND^ri5ZkNmc z9YHHnw<%Y^?(1b+uaX23*^7P07Ikq3*dwb8Mu;qKnkv~-X#{td!Pm@)C=On(EC<^ z-3<$78@>eu=RQcJ2I*icY_dHt&Q(K76inAR zYjRlKOr)9ClrX1r2yBKPy9BdFH)s~-zlOODwH6f;atSTD6otqs2F(U~Y&4Qr&||C{ zp<6U*lC^7?r-C#LO9AUW)$ zGDfTcl~U-CFK1SiD?h;z1qG8aB`Yc{Kfx*U39#Oho*;njBwa6XB{&Z!I1cZia;+dN zHVj{n(#WI)XKWZFonBzZnh6g3L!VU;?#+%hW5Zbi#kyL_V`Fo!bzRy?p=({r+Az%R zAy&|GfsciiP4p~=&;qM^AEi+%7P%jM-47NVz|oH>y^dK!n+Ypd=HxvY~xHP&QYN9`uelJ=JWl@2(oI|VnVkuU@4Y&#@ zmZI8augB0!5E~an0YWeZ%g~@36-DN!_|m4vdY7R;9rBQXbW@vz48tt5gt)LRWV*3X zvM-mEH-aR^)iy2{lpZumR58}b&9=1tpL?2@B7+bGZ)z)SL`A=kDCcqo@qgmMU{+!{ z(4zQk)ITedB$a7ZWq3-3Qn>BN8dT~}X|2=)DPhaCoRY@QJa z4TyIlEg*673bm_m$8o2xey39+Sh+JNPUdz$zsZga3_0?-o$LL^A7n@3KO@jG?}Y2F z6E@O_6HDw!#=4U;WzP52ss0jIkB?rGWVL+x#n-YY--qAj%gX5QZ?-wH<%h-DR-~fT zckE6dl&mjA?7dd$BGTSbLd5u+^-%x7?#hE9Ry+U648?BfpOU4-bhDWB%EG4Be}R9T zvc*dql_as}CM8l_zd{cc-jS-)3tuOlu607duj9w{EnOw$7%>_xH zZO|NpY8|w|g#}jU!m6t`Xd%Nb;%18tTH>Gv7u?gO-0ns$T*lQm@nbn-z1g4@2Ca0! zk&A~Sg(sIrs|{*2XpKR)8no7+b^hY%aZ?fZQVgWRXJ~29+fbmLa z7k_Ux3I4Yzo~#T{&ZWCd>{z2r4jY*;2PT+Me`t0$-6P$EXC^Dl9J#d3#HKb{jPC+W z%pGy{Rmu~-yS9uC4Vk&1eo2jZWS15vj;>aEhyhcSIx%vj5-*Iam5pAGLufFMyCH56 zg4Ky3S*w&|00a#!-E=n(+A0Sjcsrq3*u+<0ZxFo<47_&^hAFOivi6bc$=nB{+>_}i z9Y%Q;8?6T@RHgFlqZGqmFfpxa%Vt5|8ZLb~HeRMXSQ}Sk$X%nSd4pKqWgLvAks$@aBv#lDE(xOg(S1 zD$l7sqWgi9N0f1hoK+Zwt86E^%MX!R%Yzp)SffC$k~Z6t?dV8dx{5x~(;8}%7B^}JMyvFTe|JrJm+293N( z5eC${!ZKk~Is$L*IviYezea|Rz1y-WBo)$Y`IY)^yGH5ag^bwEuEd*nmoRQt$`g<& zwKA>!_);x) zFd^`e3=5g`>e#Yp-%x>FTm~=}r7V0g3f0q2jx`1)EU0C`t1P$BSD*!!2A+&z956A4 zI1jv??c~A$JMXk!Azt`8PopY`HA-|4+BlmamJ+Omh*3IlVMW`R-?J77(FBi_;P`#i zSt*tIgwo8{s1sMHFntqM*fVapRUQRz6uOU+k$Kd6>BLToD?LQXRqhm&!l#OWaCLcM zFM0@rKo5n4Na8G0R_RP(N&m2@gpy3#tKeYqdnzn1Rv9Yb=E7uiLZ8pYcJJ&E(Nl{2JFitfmiwT8({x41 zQw4vI8OkJk|5`Oixj9U3W&c^M?33HBcy6OI*xJcf7ruF954L(vx#FcAN|rxslk%e6 zfBhHkQeF(V_R8?nf4KP*SjeA5h@#JxzW#4^C_hF>v|l}=6sO5-pX18k?0%gmlnl2l zE%;nHW|N?Que>h-#F?L!L9#yO7o|Q`W}6b!>yl*ln|^Aj?19aI0*eaeZ+&hl#VnL! zq|5b5gbUp`X3Rt}pDjegdsc9qQ(M ziJIhf;%gm05Zv5Aw{hi0gEsMz0%DsUy2C}A=}xZO!Vg5{w(?GVmq9)U2RLKt9)q?S z9N|o$dtC^+AjY|w?&IA3PUIdi=s_2(Xs?4F;x_`FskFnvVa`-~6s3=GrrE`DPHu!A zb|Tp6f(8RR563(`biadkIXLL);rgcxdfK4fh?k9+JZe-)<+O^cN-9T{R!o^TrhH^& zN%6#zX`_lqPO7Mc78s*Y_(%i+8>;JMoRbx$igSa~G36zrN;>!PBgLDs!aGahegv0B{@qUQCXN~!nb*mA1^5sn}#Eink8u* zWy?uRt3)6)ZKwtDvUp^yni}>B`wy?0bc7B_yWz}OwXZgY#`@#NsS{L~Pl?c}>|GA9 z*5p8Z4}xY1d=JL=kf86OfZ`Y!|2PK5Kd{4kgE!V2oUz^<-QXX`TciPxet~%$zIYqM zej-kksgeClt!mz0JaCdSq1*#!77okR!Y5p&Ke_=r}h4bn_li2~8-e%eB7c8HkH_02NzKAfz|Y zLMT<6H^j!MSTPmV8rul;#l7)X5`qv`*wt*TNE9+&IRtbL4H~AajV^Wqd9R&<7VP73 zv;{JN*5I1biXDZ`j32cjz-_2IL}i8giX)xRYMh7Xiq!FHH!li+ zY!Prom_&Wl){ z&h(y$Eu|~DoqGojTD9UUpj$*tPzR3X!ASe)R_uFg;ef7#TyDUAu_z3{AF3qkEpN3l822jiPCr34Nk2-c?+f zuZD}UJ5=et`>)=ieq$Q~WlyQQ>_-1Npr+aOM(WF|VZ(Y}SLaEGU80IQu7(F)=-|5v z{&(M0vqLPyAn}#`2VTeOemz1oF4y7(pp!-YMtI}nOH`+S;@{LaA}qk&3&wu%0~q!L zi5DaEc=6(=YNCJor)sEeInDdK`ms$x*?ILrc~`-I?mP8_ZDsxGNA;|2;ynGU`dOwm zaB**j)k7KbfO{YzAY!?#hGj^RScf4MfkNmZCJX5E}5X6v9V=L z(T>?{_;H#RYxf*AOS7#Yf8t`TN=8Wh%a&^+Z0GOdVH%k!$wvhv^VS#8QZl- zdq^suKCGRT{cd_ed))@T=7?5fSiJdS)&Tb|SWLSj#g;@pMVvXMMT_%W!NdoTXimTD z4ed!8(eWQRrM($tSz39wq+k77I1wknz|Jvft_$j? zmOtk4V}2MG%?&Orl6tN{xMLA5=EoBLM6^TWXon`!-q4c9Gwez(T4k{57DuzW#LJJ> zVbn-#_`zWg7cc_`23<0(chCkG-9{T33k~LQ4WHP6bIr2qg_EjhEI=f>NxXGl`$BAv)+5Ey3tCsVLXijt)YZ%yV;NO5YnF&E z`AUKb!_Z)}a4ov`Rlk)e%x|lE)&Xt+aG{{*&jEPjpMizRwkM1_1GbSq(J+J!4>fEIb zwF`1bS1*}6v8KV~({>XP*oVSFpb71URy*hswtTvZkH5i`OHuP&2Us7S#;~J!&HS-)usiq3b6sbJc`;-6eK($3bb{Q7wjpHeE%x zy*M3zc^6_ozCYrGwr_@-Dh__DZ5B;WYCS|t54BVr-K|xGAi7dhEw;X^4H3r;HBlV- zP8&Bk8M4E>JscC&<-h46gd#%qncD3a8*biv> zLuVIYzZn1nXdq0WfGw3~8G87nGz+0L@puy&h{Q%8BYfd|Ez8RY+$|33i-OK_dxo8%a2XX9W6UzTJpFAtIA{^lN_>)fIVod;NC~siN@U>@ z8|0F)iRd4+#2~(u2a)4!BgYBb=W=q`3YNo~2oiZCv&hLz#_4DHQsQr~>5h zVK^TTDl8mfR``UisxUBo927+&6Z#F z>+(jRSO!b?rl98T4pPQKmH|VR<1S3i&zMn`kuirTuF6lH$Kl39=mCmXX^Q6nB05yc z;UWcwW;!xS3eLEa%W&9}vXjEgpQTiHS~j9q={ql-vf6c3h+{=r&=-B-C*j^8aLa({ z3UGNPqA#mZ>qY$<)U5`cjgYG~Sc+>Qbn7g-SseR8sjwfS0XiB67<)_!5J-6v?7*VC z4&xxU=)731@Vc#{Ocb%Y=uY~te2_fNpy`*;5K(T<8t|E6zRmcBoWB5}1Aa$<5q8&dJduy{S%YwvV{r8$Qap$N16AkHYyVIdW{Q?a@C(W_h$zkY%@Ix9_^(s=mj$_Z@W7K_5&t=tF}}Ip`x7*5he| zK6YU{I^&?TjP(=l;!_9x-9i6w&_7-D8GX*w;StRb_%jarf{FgpLI2|SR|b8}?{fy7 z7kA|9^P^1!tFD+fX-rv3#pFp)IAg@WbM>eEkLT&(it?PlrH?*UaeZyj1&|D4M0{Vp zML8u_W$80SPCvc>i0f&Z33q%B8$VM_tiY)zytC^#rZmfhk68_C%`%AUY8rAU*3X>J z=Fn{V#ttEUOW%n-E+sD9gvb7GuKA%Y?$qgVtxNciY@5xdpZM{!@9oM&0qHX%`STZ( z{zDf{IE-_|qLXTRU+2h*va;gxD>xSQEB)7`OSm1UD5jz+nyKhY2#x}l;-(y`8>r4y zLg}Kot-qe$&rlpD-9;-+#mP!Hys~EDg5_y^eK3uqR=+Cn_b!-9m}2rH+*BfP-uO{} zy=#}*N|dQYb0Y-@Thpo9P~4^xqjWOim{*CE0{!OD+1zn~I9sXXm@}|yI%5P%EC{MB znK;SnBhFMhgP<=9^q!&dCgSCDMBD(qo0f$N%Y^7__LKF z{S>v2bd*F_9U^y?R~E}l8g~dLPVe*Aq3S6KZz}3#E7tA zEJg&O-Oz(<*~6?w*SuY1KTC;+CA|E|IPcPr0srS7=Vm%XPA zpa%fJK}jG0+BxX9P+g4>d6gFgT>~agwTKEFUCN2(t@S30CeAHKPDPV$2!$*JHEn!L zznpI?uw)65__{XImt z1Q4k+&^P2qsqnaa1Ch5Bvz8G}pFB)_keU&#?AifK4fln(BD0Lkh6T+!GZZrqZu6{X zMFB@nfu0qWfmp-zR?6j@kPp$pEdGoQX{EeE+=T2?l@RLA_fy|OBQQIB)36_KdENc- z41E3XALwr%6?iPb{WJiYYHlDueIE@BqHqxKK^hD2au>?{z&3(+Qd*(o5DmfFlTz#* z3J_aL{50$Zj5mHeI(m+jqm;|*nQ!+f9q~N?(fe3$x zsL)`0rCyI{Fk#K|j3hyKq+!kVz#{WN$VUQp9Np-J0$YZShVL0IN6`ud5Le+Yp%;kO z0M%-oP&X9e5_e*$_hVA-yJ5*B7PCV4461Pr%g zHbTcKIwI6nSSM3}d;@YJmcVCF z$VEzm!0VUjU2+(NOBss|P{S9f__T}7&D(#b~AW>*bn_7AR^f9s$%#Xu)_x;o>AYSfbIrF$PJHisx zZmdj4LP2D;D~KxqSbK8cZ4{P( za#moh7!V%M?!WZ+mc$2lf=ewn&Fx3gSQnSm`fL!bA-_ZGHf*S=iBpkIxY5$S56AvaA26N3RA%p4n;$bF z6Z&h^lV7bg8ZT(0F?eAU9*dXqnst8Uw8uAG>?mFly!AT!_7nuzU?z#}Tj&V(Cq|3dgZ&ybZV$fczVROz&d%ejja4Qn43* zVK9nL!N@w@HZeI6FE2!nkyJn0PC`>9$_QJP{n>4-$IGey`oykw!KWM!Aak z-$cCm?}dhAYp*2g=yv&?C3>bEk{(#5zizvRjtP2nlyn$Vs`cxo>n6N2^ntSQ&YVoTrz| zT;_azg3PU+uUE+2U+3%NWv?Vv^aL|N9fNWWUK~^wRh-N-&^yrNyI6u1DbIgE%G8)$F)NT0YJ3{ zeo^#waldW8;`~m0-&8Dbum>~y{qJH^KmaF>s04n^jq?J~5#nfr-q{+ux_sMzZ^sD- zCmA-D@Xd|Kci6a-EK20G4PA=SJh4YSvQ$sbx_qqbmRH^Xc`Om9j-xtlwXj9~W5yA6 z)WYG*c*zvqZnU|3mv2b=!#JMj#IftZuetG|Hzm0F=SZ?WfLx8pCwc8cHg0ZQVuLPp z;?Cw1%L60+pXC_VHYCtLKnS?^;(-S9@~wnB8q zspI+^+?Sye#hG-~A?7v)Iy{In^+11Fcj0EehlQrYt>O-y9lu(F1}+&(;!e;0tmsh~ zQJgvw7dl6rU7@FnXI9wbSY7a8$V6;PoID*+e=(0irRo+bD?!)V1yyIcy z-3%pE+`Lk^Pj)vk(5XcMplESoWnf%ian&`e0#$~{U4o{^eD=fybqwDv@Z+T&bGOZ> zdlctyR$7NUYJpaGH#b(cH;Wb@tO}5EVHL=zc(4s+ipX4JGot+-8(BA41L&F?W1f`o z$M-^({&-6O*9*4**SLeSu4p^%1G~I|x->89nm&+q7mni(u2&vcerK_5Ww^O#g5bUk z;MlerRrh?`hCWe@Uu~0m)OhFf+d_yqbrj>7|CR7(z%6m=UQE$*Xe4#k`E@`zTo;dQ z)H{hhj~f0PcK$NqqMn>e#EJh_Ch%)+T-n|%T}=N4Q+L4|nDtv4f%iQADAvXsFJ5aC z=Tlraet1w_USGwqm@bL3^wBkC762~oxx|V!0lc@fs=B+as;l@2Qx&9s2UbY5xYX?s znYY@K)Zv0(U;ubGH!f_4?<_vJ)#l|tA9{G$dDL_2DF17wN&Hch@t2}!fM51~;Z~4! zU^>?&{f2{a-FHV7#Sn2C)}i+cvV}O)pt{BCwR)+;H{c$xvuwgxq}0Df5? zn8m0K0NCUlH7WNmbCw;dCT7nZ_xDNc98?RMG`dv^2QQu>X z({9(}CCgXb4m8CMSyyx%k}oh{7Op^Besc&!{#Zw6^<(%15phR=nl5($O|>lEmZf<_ z7mpSy?ptO{aQg)qj(N8Knj1$KTBy(lHze+~dtUea`*+`t5pd#zvB0mn@#NG1c%)c3 zNsAQYHV34udNW9PzFJZ?ehU7gW!L5ac?Y@f_Ia}I{ANAZ8sO=;$^DOnLn@GPYTWi~ zZfvfTpqJ)rF42rGEck>qpWl%T6OB{HU3T|M3IEicdb|a?>$!}-e2=BasiW+Kt(Blf zH{u#Hk66xwXA3Btu(hqu6c^UxCdR5Qfgx;WfLRYnfD1m{pLlOepz+sSckGe2I;Y5N z3e*)hq3-_2WL^COnkm+x<4LwP8o^#i%)T9DnhjC4AF3tcufzF_j-R1qk_?5q3Ei z1dz4W9`y~$u37P54yTTW>AANUK3zl%)FQ+(5N;9Be$D7|dvVHp4DaDPM-ucO*ZDG6 zM-g%A$k4LS7<7b%u)`gsw)y0Bp869*brom6(VToG$(_z#O1IXt$6p0TP8|*J{7_w- zy({2{ru+0nZDePwC{3Ih42JL^gb5nXV?wqr|9Ny?e?vA18IGx4Rah{lALo~J}1vA?=5b_yjeqS zJ9I@q9WB$^z7}k}Jcwm!mx$QJ*Z$Mbu55Zs)5Y<7Y-}CQVLLFoUw^y7W#_Ui>J}j# z&RIJ&%`N;*Q=Geoil2ntYtOLE&7VWIIAwHb`q+fP7)o#k{KYoALE@&P;x^dCoH}my z?^Ff%)n5ek;RJy}KYE>Axp0au*Na8@1y{+fxbH=rIx2Yo4R!-?DOP+dY@rPz5%&rL yKh~fL^4U*8)D4~av03y5@6fZJ>0s*2R zHV{b=6w9-V%6mS$Pe7ldPkA8vP!Qq!pPjwQz3BISH@}&kopR>PnKP&D+^*EkUgPw1_sne9^?Q_SqXp!5hd0uXd7JXZ`&TE_Aq|2;X z(@N8O&KTb`Gtv|3sTt4XHhomru%Ev*u%G^Pkz*GQ4-lrVszw^uRMp6^-kK^MFqa8c z%64~D-45Sjd7Zkl$jeUMlGt)*L|Uz`PN_8EH>b5E`KnVlw$3I!it%Xp=!9om=|rt` z`O`=}(cipjeYs+Z5q`KJqq8%rtU)I|misO|v8C|=jfhiczsP9h-<27E*wf^g1N6?cLlSaW<^Myyv5dI5n_G6!)|I>jp;qj(MmNO)_b+n;Oy-53;Eq(&!G( zrnzW3KWDgTri*5IXg1C9&|I2l(tHm^(gGJPbkQR2awoqp_Rtbq%B5u{E$3>Vn^sU6 z*WAUWyE$9Qa8_~MY7aGuq)PY013O?u%y5m@8;rlb-&Pia0szIW5jVU`m{AJ zfAP=_CJ<<1(kB1Kp(*~UTAHpcYVKdsFHO&+EGc@4xB`nBQxl6?QEQ7HAiqVM=|P}S z-=ZxvPEgpqIVI!o5M+*leSy*5IyV{8ycf;ChfqWdVb6Gf9jUdlzuQoX4uHN%^r=G4N05jW+l0{ zQ+&49Z5OO;uLp%~6ebR4XFW^$QSwD}N)6k?75uK}_2Y+;ca$unxQ!8SvH-TN z$ag>OqCFH(y1jR6sU9|=9yp)@_P#OpDhEW)rM4(_2HAV#tv|J)!KfKa`81shX%2Ov zGU`hA(yf4Y8xBHGz#IUmy$GwMedzrJ4WTL;N++p^KA;iwC5@zS$Tx~E(ir-U#?f^e zFHD*sVrjCdO;bcBO|y4@CFDmLnV7OA@~ud1sU7kPAlx1Zm;l>_I@qlzfowAx7>62N-XS*)QDT-%@iMcq&e!|Gn7Tc|sj>sC-D zm!cYqYZNW6Q)82Q2$Ug3ZYwgWr*Vx^QPmw)aU%8#nTfSQ+Fk&vj!aWL`N$Y?lorJ0 zOn-s^t~n(}^(pB?eVxH=@YK*x6G21mkqhmS>VRm0Wuk9CtbxZsULePr6<=Tt&=s^l zCnntC@9>-$A2X-b#>|Ov+XXNj4s*U+Jv=!vp1dS?OwLhCAC?mnk(cCA<)kq1s5f4|PjP^2A#Vw75-Vd}NZ_c`+umiOP+SjE{;Bk2m9e z5tWqTKy$B*xUrAIS4N1Hme_aWILi~olf58Eh?Y?VEvH&gICa1f4d^bc>uxZ^O6=8Y zx*gm%iq_C1S_^uvL+gFCk=BD@HbC@lqTOiQM-STc+6qd{1+@l|uaGqvrD1p>MS-Dj z1FIH7V1ZE8%<8h2{vG&stIV4QyS6Nu$BYW58lyEtAFypjPm5ZC{2X&oH|C%hU?I89(U_$#$1o)~NqYRd}_k0?FiSWu~x<05S9 zX2;vJ|L7V?#HqP$-87${czF8UL6NrACNA&&wI3|fel98Q5Q+ZcNnZcj(&)qCW&K^s zU@BVkjin5x-hb44V0XBG>K4c5TC`=HQ~I*}AJKLnd24^Io=?2p9`d2Za8lWW&i&CeC5 z!c?(mOX0Lvz0q34)wk(4ZO_7U{{<6s%9RM?OfRjo*;Ms8|jrV{hCvd;g2%1QfxHQ>=YzmjAnskSYrg?BYVOG|q z8Qgm&XS2BXY?ELZ&2_PDnM@0~$3ioT7MXOXNsCQdV$xDsutaMsFBOT)vV5dyB)hz$ zr)((_@kR%TFZuZuJzXX**Q26M9>zu}&;_RQY^l~>HW(#7HGG&wiqYbqk+}rp$2Qy8 zjJrNsr0Th}Le?1r3#?2QjuDSXEVFRh@3z3GE2TIm;$?Y%t#$^#ud=8vb?;upaJvP zRu`XF;DdIy!*X%7gl$P(#=eE2wlCXZqF+|d5z?}l0Kj&LN$_ zY-dsT754XQ^!NsSzC*8Xv9ITV-S<$-KSCCthYtq_xNjeKTg}kyPf3cBKhG-;Ekc(kyK>Bxafb_tV#bg8n>BD5mR=Sqg9|ceZBPEQk zKx|*NK`EUZp;m*Ia43mJHYVL*F{u4%g>j6T=Qw$bym3`j>qWe2Hcc5V5aD_dULFUS zfM&TB-Pl;pu|Z-`Rt-A#ii8$4QgcT3u)*JkF-*nxBJVwes=QASXxC zXs|^xp7Y2ZtlgGJ6nr`HT;U#Y&mLX+az$Nv@LnTA{%}dSs|HLGdzB)n`gN+1!WP^y zKUFUN9mj-I0SoSQIjcZK%8i{xbk*n6M1&*t|1(=Gj!+6<@-lHm=_;AAS#(g9#hb+y zrJ|}v?+`CVs@jalMJuK9s)~+@F6pXv=V#)d4%h`}M5fn9m*G8wP;+O(0JjyPz#%!aM_&-}L?}I() zBm6TH5@q>rtzOl)eYH_4sKYwBm5oXLM?Yz{5tb{bW^4g+RVVWlET=4vPvp@u zH%iMnLxduWR&Z9v* zTyhsZ;DI^iH)*p;4|>3*TRfCV5+i)Hm5bZl7`)x2hghjDbJ4^6+`-vS7wzI_xr=u5 zbB~9eejbDP$0H^^>VZfHxGdVo*)|vLchO^9MEe1g9yjR;_~d&T{t7Mjge*v}ZWg3Ycaxs7z~zWKjHO~b7Lv+C4_aVySfYVI zgdKS(L60u2MM4+{A&TX!!CFhb2UW?8E=G!cd9W5=y9XWR(a%|Qj0Re;j)z%voSv6I z-J+%D7+nYDr(5&_o#5<6Sgz^Qrpzdvn~s3agr@1~gAmq9?_~FQNwyxMrA55V=E^G; z{SA{;t8ck(h?a_ZZ>u^mL>s7;@=k{SU_uLvt&N4IV5vHzS{7<*mWIX*1%O8 zfvJJ7vp$V}g?KN-nns~EmfY7!6Yh*gaQ8Drv)Wlf%hvdy@UrE>ky^W6yqE;6o?v)J zz+a7q`&^US&=I=f*{eDQr z$H4vvu)9xSJD$XXo}$I{G>oM~Sn4yt`!Mi*7Q1@{y{pi26o1db3_V6a0M<`*!p2|} zShWGYf&HHX!f(X(O~r|{FvnXyx&yq*HudWWGIhsNyJ3>)D1~FXM`#9^#RdKFZ!kcZU( z!_c`Jo{imd8)?;W0^4^dCR&W)8Z8O2D+;PMj@4Q^n(~cet))`4*Ne5SajLj|o|cuW zif?YzIywe`Yo~Tdd7AQt0|AwN^?>%1qtY)_Y6qOIJ6_e&9Sh>+lbWf#+p3cvX;YOq z$^bHD`KM6qH=(xhQ!UrAC-#4-B|8Sk>*uwu3gIg2g64S3RTr*kJJgqrs*TsRj~shw zs-~ZHAoO?X=dzXMkl#vgp;k1rmHxHUJ~UsyKSR}?8>ydhYTqf*-*%ASHdT)|Y=h&= zgdwe8gCp}=r1Uj6>d2R->K?hTr4e0qX@>r~v+U=W>P5=SuS!~}cXevVZqlP8Y#^@= zZ2xWoPIE$-Jat}6l<95sFqyVjkFR>_VZD(v{U^Kicb&C9wpZWNP~qSIE&Wr4Y0lgF zNvG?z_w`a|zS58NOAfI{p3~opv8|sLhof3FgjnRHte*j$bu6GOa38i(kM_YV$)Hhg zh|AFk6?q^%#_~%sXC)DQhCL96rQ9&lgF^`KeG-2b()qKH&Yy*J_TVA*ID-eTvj?y9 z$040&GXQ)c(rGT2=0!kiF5ry0mhGXrb>PfJxvb(OplfyUDc2Krw)mgdw$o`aUjZU_x?`*-#|sBM&N zpS=&YvLFgD!&+L&iUlQ@fI!1S_!@2NX#+lY%$zf+89winOpuZ1gj?Elzj00X%72YC zq8eEcDHykj`U#4iTC$*|+5CA^rZu~*WbWkKOYuGJ0gL>!S(f~v_mVB1L8Rg3KlIwN zyrq^Vt?T;za_~g0fjo6Zc(x=6)BnPW+KJJ#=1iF}@Ai54urwH7m+(a!rCjHJdj5IP1ya5)eJjJEbXn;avIx&_=>eN*ZJ0p2wT<;ZH3 zBeC-x0H0kqMww>>L)bu$3nSH6Gh_Fl86zA3!Nd3qf(np!0S5nN)K*t0l_!s!8h(?Y)q#MbtYdB&&4#R~13qC4@A)wP+fi^i?j1b0~{1+1n zmJ5urEJWv^X{N*Snei6`f)@)xVh>$!81-#-Q+C=M@a|mLjPrtqd;he+Oc=N99^`=V z$M9sfYxN9;KSv3IdS^XHwFOMIR1^{gy+lcUurbMqwDirv7ruQr&e~n;2pCBMwq?2x z@*xF4m{9dVB7*&q3hOwXmZD`bB*zlSilvYd%W<-OHfro@Gyo$GgiD~Iw*@=9Il#FA zVX$qH^2x4C_|^rPJ@f|TKTMXPCX8sln=-x!5gIzXTdym9tqgY+)ITH)w;~!}Po{sbyHi3@ zNEyE(d^Zj!r;c{`p|Blp6r5~pc&qMhZTz4|o`$eHgFQYAL;5SZqP;OW_FGW#9O(BQ zEZgrv$sgpE_C}$NMf0yyFTMfG_YFCFnMjf=_8N)4x33>Nf#v6kL^t?vrw1x&7njO8 z+s)aYup7>g-}Z7=fiQvx(>`i)1i?f5J(%z@6+7VfCp=ICPkMmVQzkv_fpve7vqK(? zeTD%XHtAW`;k$e;I>LoY&Z<0YO*5=xE;`PgpXXsOxafq7UUbn*E_#_OUg4&{ap_eT zy~ZP7_rThIle4!td)uUUTy)X{PJfrP_gwToPkYKkf2V(N=>r%2(?uVeBj_WOK6cS3 z9!Ts@P5R6O0sOg(zF?^T;wip#(Pu4O{Fv*RMMzj!5S4DR=Mhee&Cj9^$P5MaAD=-d=kL644jB#>p zq0u^H1dX)d{YF4Xrdhok;l6jR^!X`tA!}@6_c)7^gCU% z=nuLMD_}k@n50|qK<=>MS^Q7_w~LVhfhH`te|K6!qbrCE4Jw^A?auV^GbgB!$z`Eq zd6qCl7-!*@FojEY>uS`iJ6^ag;o(7|sRe&_v?(GiVTni!e(P9ycUNOE7IuA0H={%R zco78z26exEFc7#bqAd{v1O{|78imJN!b=n6(r!lmx^bcwJpNmz%pE^-#*EVO^GYYA zPn!giE1@RWxPwWDoZ0ca6keAA~w;4IUiKtW_ZW>NueZb6w!<>ba*cA1j z!}<+4-6^(j$f9T}974_hKKUZ-8Wz37_&bUl-%311*)j1|6oIcQc(aP2UJ<{J#O;qG zWS}Ox=s3kVwXs#?#fO!+BNTR=YH=lQiNv#nPS{Q+Ld~^vIMxhH9J!Lptsy+x0JpXX z0kwnKkdJsrM@ZmKkhg_4t}K;-kAo|hc;S4vAQH~&bl^P$?f5WsjfR*s(+8hbSwQ*d z1fFpgHU(f<7Uw=giN`4k!Z_K>mqVaNQV|2Mq`E-Yz*|}n>MAnM*eX@l3kR|{Y|uV7 zW-8K^Ta8&RT;$Pwqahdr_LdcCcK`#nayo!&YX0=#`7`?E;tTb53d=oM-Jw1@uzs@< zM28ycd2LWb5KbKme1^eV9B$*3-k3X_XtehX02Fr+?|;dEeAO}GpYE(GgE zP?ZYn7I^Y#Gt1imcNJ@n1n|_!p3hZ-#Uepd|r78p00X zY7#xNyo~^eErG_d^wlnkM9J4URvajY8CXe8xFxeMH+K%&b+n{nzfsM*o)gogl5*M@ zIWd{>hEZHexoyJY!*4t%|M`7G#FxGrTah`!)^^Oll)z2`f zF2e=>jh2fzx=SR}-6Dfliac5+y5pV-i=c_nT)zSIQuK2H#N#l;Mq^{=k_)ag$|kM0FO;k{=^i%V?!7{(EkbyA4IE^3ZKE@wvlt4`N;^U>w%;_H zOv)>}57XRluPTAkSjdDs=yGWt#5KDn_bE{%WBbA4Q9Du`^_Rj>9sP&xxs>Z`IWq*G{EgMJw-2B(9*KdYnj|1twMd<}m zN!|IAuBh30C4discaOmkdmcvG3qf6Pw?Q=QcPC<4g|{6}=S;;ew~)kUs4H5RPdsRH-`3`A}YjNUsT_ zG>YCt;NvZr0<d-(V+X;c5DKbtvp3}k6 zMr@27$h_$3ezzB-@(L%A`MsW8C1x1)lshCF&otWE5y-8p&;I)tL_9eu2dH?q(Lg@+ zh!MW;>6u2j!T%Q^56?H+R{v*!yfoj)S3%F_3yh|!G;V>>N|m-RFgmKzXA6v>s?=(s z(NmSm78?Cj>4SyFKvin8$mp+1OBNXeRO$Uiff1>98dFr=n(ET!JBn%Se0H` zY)n$6+$Bb-Ds5R}%vPm{rN(4cTDa60q)P8DHF~I0;xc2DDlJ+T7<+b^al5K(x7--6 zN}HD(g*MqjF6hf|mK)7v#VW&_&c5cxsHSDhake=%!Y&4W6=h$H^mmz?=wDG9BTug| zoKXE2J|kJ%IR-7+vi=GOM#vSp002BI%8pDy&%B%gy@ggw$Hxme|2%2Ur8zzxHNT3o z?hE~YUy&0u;e{2L@N=Jyif>JV-C`qNGYZ12`Xz?WcX{awE0zjMiahBvtUpbe9OwzF|9qD=`=q$rNR>Rr{|Tx8(bGIn+9}_Q8d>m_(d7!V>sZl$F20P#kV8xElbPzQ#3ae&0VV zx%zIyt2XvwZc2XFp3mnC0L~fLi!zO}qThN~5vDV8f~3#oe_h zRy$zcFcRwLV)r;@2f?f;E4!@vZw$l5m7(_smX>l4VEtNOo?T}o%fhW9Ob)##&^C?R z-pN#L@5jJXJy6~D61VltQEeAmBW4nPufrQ5*BnD|DDJQN^Q}iY;oiVJi|+-jZKG5h zuHw{IbbN=amlmt)jmv;qd4Vux?Uzq;|1LIixo7`Q3!ykTbqncL|CEASxJyJy-+j)ZIoA7` z+uw^N;?yOi|L-3L^}l$ZgGIj5k?V;R1Mdoe_fa$aeVEQ@LXUg!o%)cqH+2ejJIt?OoY7u~fXGPh6 zrYJCHHW>}(>JzlIc_p%(t8?waYibvKvN;0RDv&}sy_el9LtgY7F>-_7NVofc z9^T@Kk>CJM999qfD#{9;V5oNgdUn^IH-35GStu?}g982VtSCGGoa!3$mX<45YKN5{n=3pkbx4Wzb^$R|nEzif{^L;8b~Nvr$hLJZRKW ztY|NL?RyL*#s2s&cL0iNd-6f-+#5A*t!?J8SCIU-qtBg$70#(u$O?Y@K*1{B5>RvN zwg61R*Qz>3*JHQ*S98hFw*;p7f!jy@yQW4RVsr_eK2 za3N^q>ECh4n_tC9mJl|o3r_uS-fF09PHjR^J@a$W$Q#l~RLf4;3Ve?J8r0VG41#7}`SJ`s@lR)P?dzi>?tn-Khy-wd>R;M1mAC3{HP~*IbwEJermU|4Z4flojePV1w zB2JkhaIb82usO9CHwM=42%x%i2Tc)s&Xng#O zn#N3fnIV@@LXcT@zNW8RuIP$`SOe{ChIZjfFfud4Ap?Kk>5zKsmd&%r;+$~`?0vBCII`X<$?9BCQBGre3Z&4f@&Ikmpix_xEMxXSx2NqklGFk1xEs|ubyW3J znT91JwmOG>%+)mcI79@eP}?a7%7MYW)WmSfFLyhV)%Q?L*b9)7oEnD^qCo@h-`XZu z^@qkkaZsbi4(9yvQBHa&uq-z1^`lJ;Ik2^n`X^op`2-m<>rZM(ry5SL{=E1nqw>dG z8+=3gz+Q)Y8%1Q_kB?x)DU^GEzs2sGCQlE6Gdtw9 zZ~h)UI!>X`t`2Ypq)jlw5pWb7sVAKgbqe*gdg diff --git a/settings/repository/org.broad/tribble-46.xml b/settings/repository/org.broad/tribble-51.xml similarity index 51% rename from settings/repository/org.broad/tribble-46.xml rename to settings/repository/org.broad/tribble-51.xml index bb8df5c87..b38fc4bdb 100644 --- a/settings/repository/org.broad/tribble-46.xml +++ b/settings/repository/org.broad/tribble-51.xml @@ -1,3 +1,3 @@ - + From 62801e430a409ed9857bb310911a2d0e65c8df11 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Mon, 16 Jan 2012 18:49:58 -0500 Subject: [PATCH 09/13] Bugfix for unnecessary optimization -- don't cache the ref bytes --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 43c878b2f..836ba22bf 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -522,7 +522,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { if ( allele == null || allele.length() == 0 ) generateException("Empty alleles are not permitted in VCF records", lineNo); - if ( allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) + if ( MAX_EXPLICIT_ALLELE_SIZE != -1 && allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) generateException(String.format("Allele detected with length %d, exceeding max size %d. Please remove this from the VCF file before continuing", allele.length(), MAX_EXPLICIT_ALLELE_SIZE), lineNo); if ( isSymbolicAllele(allele) ) { @@ -576,13 +576,12 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; - final byte ref0 = ref.getBytes()[0]; for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) continue; - if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { + if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { clipping = false; break; } @@ -594,7 +593,6 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { protected static int computeReverseClipping(List unclippedAlleles, String ref, int forwardClipping, int lineNo) { int clipping = 0; boolean stillClipping = true; - final byte[] refBytes = ref.getBytes(); while ( stillClipping ) { for ( Allele a : unclippedAlleles ) { @@ -610,7 +608,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { stillClipping = false; else if ( ref.length() == clipping ) generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != refBytes[ref.length()-clipping-1] ) + else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) stillClipping = false; } if ( stillClipping ) From b0560f9440054ebe893e0a7209756f085f399f16 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 17 Jan 2012 15:56:49 -0500 Subject: [PATCH 10/13] Rev. tribble to fix BED codec bug in tribble 51 --- .../{tribble-51.jar => tribble-53.jar} | Bin 304586 -> 304182 bytes .../{tribble-51.xml => tribble-53.xml} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename settings/repository/org.broad/{tribble-51.jar => tribble-53.jar} (92%) rename settings/repository/org.broad/{tribble-51.xml => tribble-53.xml} (51%) diff --git a/settings/repository/org.broad/tribble-51.jar b/settings/repository/org.broad/tribble-53.jar similarity index 92% rename from settings/repository/org.broad/tribble-51.jar rename to settings/repository/org.broad/tribble-53.jar index 04121804f22936341a786a08d97a58fc4293f595..02865df435a3e43dffaad91132c34afaec455d81 100644 GIT binary patch delta 8387 zcmahu30xIb_vg-BnD-t7JRd5X2#C0_sGykQ2Bs~pm}#OSxPYcCZvS)c%tNsMGW_n$d1t@po_p?@;p8dTp%X6u zjJ_&&0N4QR`AKX2Td6jDblM~z7b4~3k60CHMkGt5pH%~yO(nn>&TmV^aG!pP1YXh> zG6}@lyGfR+iF%NUDe(Wp#LR6_ZZbD*_@D`lYkb2b<%HcdF{2{ZsR3vqE%t?RuXa!Y zI;i6M2sOlL*Cs|3%;UaQJ)*mtG6l^o;Al(L#2k;Cpfw<2sB=XaEV_}gCfU!(Np?sV zCnpaxffHR{Fs)AO@uVh6Uq19HmoxVCO|p@unxeBWRMhn4tlGFfupO%wd7afrBR!+J z{XF=>N8kq^8&fg{uuqJ)GRE6aLjatCCQxl84oZ-outu%G=>j>LOHXnJ=*Fr+f$np5 zNPw9Rh=W-Uh=ETpH~1_d@KB;q0so^`-tSmJ=CuuOyH z4rl-?Y_QS>tBCbE`n}o#MNmx88V%MGdYv5%SWlP@cIah53H=tNsFaW!>9&dVZ`NQ7 zp|=ud8;NeWK^Z;EiTu0<6&h4(u%l|~W78E!A9M$uKr6-%kIyjrZA`9ue@Hyz$n=6y zGqhQwfbqHcgY!pE&mWkVHzPNnf!eC|qlWW|p%3kZR2`C_6Wx+^coC-Q&=%V1&;z>a z@Dl9OVK+=;z+!aR16d4w@~4j;JBfidX8O~!X5>!SVI*V;)`tZ-HF#Nvy|7P%{W`n? z2Mku+62+3B3gw$I03BY1gE|}nj|Q*l@H)Jq!(n*S2p!hE&0Fv`daJ_`7^lNKa8!qP z33`ui@52W=9E0OJoPZB?7zNpa<*SX1{euIdbT|p8bf|{YIz`FyV0F6L{#*l&#$k2oKpxq#_Sl@{Nl>1it$bGVS{!9U#Uyx+gfW8+4aaxMU<% z6)~zSbCc0c&;sx6(Agw3+5uXk*+ewg7S*O8a~DX5H0X(cz2RYa1RjSzFbw)a7W9K$ z7zp_=1m?g{SOQPLb1(wdB4s0nzYM7rkPSOv9A?au@G?wV|*8 zG`I#c;CGk}w_pxaU@p^PA#;J7MZqE&fg+5{5{zLQ&cKGMHFT&aLOU>84?-`54kD(v zq@Kj>`4GBkM@6KTbxZhRQ53X)DJ~C&haetN1PAN z^b`z0$QS3i4`-W!<_BTCk|0nI({6)HgU8q%Xr{qn#%@6i4IUq=!4Q56LJ`#)Q9-Q1 z4bU0|Vbq6Wh|CphBiF(Wz=I&>{smhhr)?$+uT7R=kZB2b#3wDc!tR$`EhWvAQM{8z z-r=||nDn#@>+0xAO4L1cm-%Yu%hvxzi5WSCb{6utH)f9M<=^~?1gBqesNI?H8Zh==pp&&z`fnV)$$Oux_`mmuCrjlj@pSjaFdyPekx2e z&hHM&@VQ^|T`6g1nvW^yOvkD|-O7^xyRhg~SdwOt5tQokKTEZ~KS!lhf|SB}<>Sm0 zcn0H0<&xp{jf2Fr(0fe}9`99|xknT_@2kQmH1unLK-$)m({A_l2cXeT9@y=k38|Vq z<1zmLBD3Es{uqEee}GM}Ni4+38SgSy<_1>z77nz@4r17fuXO^9^pf@xF{@;(*=>AT z_HiI#MDKx0jN$y;Zd~4Pmn`4!Z`)V`5yw}VeaP-J-b7F5X_%`(Ht_Eiy)Y-BD^^|Mk1lwW`ctuGwXm zlU@=dYV6FnPAf=JDY!k)?2GQ)ogo`PPktgh??XyFkF;kd(Q_j&0rcD@p){ z8%Lsu9?!x>;4&6yT+{;HB@T#&k#KPB52_n{tqjr%@XXmg%qpRvOnmO?-6Rf33ugEEdC7iHK3y zH9RNMMzOuN)lfu@NwHwxutuV6pc0I{ZDEZVv`w2G3`jf*_Kgq_m-r6Y4}xPSXejO{0>`qb(O*F`_y#BREmp!Gaijf& zC)jm)hR2zf5Ko-~+QgnC0XGCet#MN*C>1QXsES6{N-^St|9x`|nuI zZXos#tf7BGIQ%7kAInCBIgWwGcLJ9OhRJUp758-|buhc|&t;vNzj!#8wT}te2o4OWwo^VR#4jU%9h)pj zY|Uk^7rexJxn<;#|5?Ne-sxlzBH#^(9S zEjxYzJJgu=yNFuPx=K{`diIx%P@a)n*-?K9MeSxOff5?_2J0Cmp{<{@3qBHh<_e2* z$qM;)Kuz^*Jo#vJGI#sbf3h&ozkg=S{UquS`)fkmS_i z`RZ_aaCkx%s}K6iu#H-)I?Y6wwnZIlCda4c>KW6~7c10?SmF}{K2$%KO5#tdr%huw z&#HNv#Ett@{l%Q)@Nd+!PKitSQ+1kVd-(Ez1`?MM%y*b`{4G;KndhZTuYUC_RxKJ`sS^-6(|B{ScL(>~x$kN2OQytuXLu(Wy&s?9JtWX} zCT}Z&(KC69ircaxaUX7VQ_(0&&0BY}che4qqAn#CWJK+D-YLjrSW^Fb0gGuvVg zpTj3g%yKXA+Z>)NFBcm>z zY0xX6_o^%`A8Vw~2r&wBo#Ju6y-v_Hq0xJ&&tBN(~vVc zA)_x-8mhIQuj({rfM?phtFqAHHqI9$nD!{ouN9!ZvSrc{_lm}nAl<$v1Q1gD!wEm8 zuUK!SCpTPI?kn7d=rw66Dnu_N_*S; zT<_l$4e>6?9;?kbH7rzYT5Q?aw-_~by;`fuCjPUSx0L3$JddP`uSC)k-dZBE zmLTE_iHI=X`8r7KT4KfEb)uACC)Nlf=lei$%x$Gh9XIYs@6m`E$do@3}Ti!_NyYm!XT zStqlJU0%`=A_a6XNk65qP!YJ?TKalDa~=0FPo6OOsf8BfmRnZlEJsz-b4?QV0Kqa= zCkZ>`NfWzf0lHPzgK5Hlg_Rr5>xbsv#72`wk9t;WCe{k+BD)iIwR2HODlJ}tN*|e8 zhu$NNIm8!U`gNion^#MBiPV)CTq=i;u0)c19g$+h3dG8cDM&r@!G3&cqS3aV7jH|} zcyVQ=wUBu&vx_I;s~3&>$#r-$X^j^(%a|@FC=HHUMFw9YQd^OI1Ou>dm8ELv?8CqQ z8j1BA2Yt)(UX_KO*N8t|7MjGX=V2CBcp49SS@2m|c5wMv_+vJ}V3VoNj8lvX(cO32`BE?$xt80FA-*SX&gE ztOe^(wR5c!E;gcRDKz5KRt1?TK%;v-p}Z}o&^zmR(!Es=e~GY(5|NIRP=-lFw5Tz- zQxqFKN~*o+o3v*H?i?EC%buybvatU_QXu)8N!Ca2{<`i}JTYkCN2A)Svhe1+Cad#9 zB~cWtw<7Xp*1<3K*#T(O_c-#DrMeoYnX$nNF57@!#$7N;JvQ)&dy^^sl(6BlsTNzk z0((f{!aY)oTyE)W*n6+NUX9-tXwZjc?^Rjo`=zNWrvy(E+9NAVP)qY4OsrE?8c`VCs5f(2R57%?#bh1d>$cIRGf2eqGRM%*1WO2~H z2)tKi;j5j9tGx)@VtI0VBTm2faKg3{dmmN92VYI{*yVe$UDv`N)!e0`u%zq##LCV8Vp z`{%h!&g-qSZk8PjK%=!+DLyL;HGG|?CRF^2)YI(YKD|-6!af`M+2anx?4^`j9 z`(yv^f$)?i=q!!IdWu(`s4HAljnnM3&05+1+fe)PGu}AhH%GC*ry3wOdKvZ-!+RGj zhP3Uxf%wf!b8JW2>;Jw-Yc2EoDMGLM{T|vb3VPy}s+IeY7`Ab4PPc}PKJAM|>*h8L zC@p?z$@*#8xQl4i&!c^f?nwoRiDlL_+-0coiBO9c6^^CkT$z;}myxElh!xJ~@%W4^ zw+_iJ<%qr5(!}m<&2`~dYVG4ue?^Mj*d5WR-da!uJ{) z^;7lZHpHJI=T!SD`?n;ykYxEyueW0SW@}E=1*_xRb1^7|H~HIg$M)$`RE`ePpnt7+ zugbzQQ>2@j>{I~O!=hyp?PwiZif>>(1_AyX4SClj delta 8941 zcmaJm2Ur!?(sOnXx3Ls2f`CX7D@{Z(ia{l6o)H9$F$SXoBK8WFSOH^+ih?e35DO@z zsIf$%*Yb=SV~G{>VvKoFAuloJkI__gPmCuBpGIMB}w}YG3o$V1OIo3OCiMsmY{XJ(@bs{3W=_;UppRLLi|^2;_Zzsd;JuJAUzV}y5&@SX{_Tc926;FLQ# z(feH5E)(qL_dT3ruMs{l!agI^)Qo&NN3x`#hNu@VtVXe1)^`*QP`9!eZ`bkJnPakZ zk|s>e&J`>l;u19rhDH*yU)}X;YjsFzjOshA^@|6fw*uWDo?o6*zy=2u=nAn4^n(Ni z4#8msj=(_zBvgTa!gvBMGjlStCJ``>&Y3nRH#_C1wP}@zxd^IxT?Sxa7}?P;kp77U?RH`L%r3EYWZGbeY#)N#R@Ke!(MR^V&6p}y!s2NDPbs`W{M z>UU+~tRY|WQZtf$g(76k400>Cjy$HD?j4sD(cYNxG^XMBeh3x}k1L*^VCFp@y z3*G^I!3cg3hmfMtK~H!dpSX;8E<>F^BCrRicLy1-!9XDdXTn2an`U)WSgh2l}Ic0q_qg-=UWS?YG#AofDGbNh9|db612#Y= zRKj?ug3*{Fj4Tvwyo{E`f(zPq9)=)fKt=c9{7_u}RaB}ocqsnH zCtx+gF!B^y8{suV9z#1Lq`hi{;q)>1;Z%Q|@*>Ut3r24*^!n>Cg8LrXqr_h10Z=ap z-HQ`iz-U`o*d$n}t~x<~kJ6Y)OVrQ>d4|fp!Vf_k0%?OzgLj10d;l!v9B>tY5@A&k z*Z}@v5eL9+m^QxwTq5=^k;%Iar(5N1;SXeQ96bW<(yZ~F-H}l65Xcf+!d)E&=x0o^20S^a5yluHn|m7*&eO%E0U7m4 z?ouO+Me$hGG*@AU@vKGn%J1{5P+xQM`>0l^JyCcR@RZ~vT7XErM&VO;bxlS;w~oUi zJY8e_%$^Ykp`&^$V-N{bBQslx{22&VI_cFf;7oJ{!3wp#v1848q)r#k3!d!9E2c?)#mZ zzTAI1MWFJ`--!{;1DZoq(ZRfy>_neVLyx$_W|)r5E~sJ&CLj;!U|5A|7>fbj9+p5Z z@|tkjg&625U`CaP!YssE^l&#c;HFs6{pjOi80AKoO-w|47^bF<_D-LJ>rwQ#AC5O1 zJJ1U?;yPKv4?wsT12OXNJ8@B%tyL_GVc2ZN+gb}OG+Twd}@oZoGx782HPeKn$2b^+~XwI=SAW9>3+MJ`D3v%j;*e z+7WVd`((}MH*+*oU#cG?T1s{Ax7~z+KYaVKFtBF#FZay4rCkK(>ZAFRJ74Dv^$CrP zMC&41LWb-~ z2g67|iK57CcK>_g$2zVe!L~WW$ytJj#Bv2zz)A&EE>XdZO=Mdi$^mjGC{)wV1{;j9 znm`YAZ5!O*5#%iscB~49S+QD~?ak_jNG&YQ@}VO!Emq#f?K8CdF*aBRMAGnd0;7tO+kF~ye|Z^QC0(vx3 z>PkdKLYFyyek`F#YQ;vTOQQ1GCZtOnyac^Vt`r_1pe{vHyk=DHCTX82m8`>dUB&F} z(yyBS!}m)&H8RslDMa%}#Cgf2>Gs@JX_6?7oHdNqU6%|kop!0ur2)~Oox)`1BbkNV zkUFyqUrGKfuLLXQq?3}_HtEMSKLXJ$fu%6UV^*tMWJ6rW`iNcfz^F?ClsiMwtFpjQp*J zJkecV6C&KV^yrr*tFVvdqbD0&Df_eIFUlTlcNz7zZ5b}F*BH8G$mcaD&Wp=28libF z@ilr;Q>Wz!YSq+^o=nr9jpaaXGxk(bbJI@4c4-P7WRUT57Ym$8y9-C9&7{2ruyH2s zB!G)EsmK`SGKKWdW$OX{rEj&!&R~5I09BUOtCT z6f?iufy_5(wwU?!4Vo!{$hma90Ltdlu>v6TXpR78&%?0cvG~nAIz#~7=hFcKcym4- zDS-dX*SV4w&{xDv)dHF(fQAJ$!SRuc4OmED6~Wf{-HN@xkoF741^N`bM-?o>R+kU{ zN^SpD7bIn~6N{*?`g5U0y_IEPH)c^qu)H_7DC!)3MdYKCogVMi1@8D=W)Tf%MT@9a zkZwLVsFxl`t z+EPkfp=-9nlU3yDOEuxcj~DizC;XJqwUk}SqwR!e9&-MPrD{S&YqhS-h4sqU*`2yr zJ2&2|3u-Hox)ZC&r&{sbosS~k*{c?u3W!KeBvNx(`=ZLdxpZ#Y#yj2sd_*`&xhWV} zrve%%6iF>WDV}FgN)+2zpo`wYC%0U}$so1&C58Q3plf%3ko(2g(K~$bp5FeeE?D^m zSA_LhOxp@y+A;hd;CSF}=ypr3zY+M1%8n))8|j`725$It&JfSy7wFoxd=s_%{u?eu zjrzu$O?p$`lzfgzzO5$2G*>fjo7v$vb&gLtV!0jR!#tPh_VnnnXLFSqfDh-mNcv3| z(@Z`3gON?Ob2QQHZ`&LHge43k{_OA)`>IQD)m&}1bSVuJ{*GLVlfN_wQk-;SKFf4n zlGJl+RIKKF;JdZsS6whJl7+0G{wyPgxUi3x>hx1CCY8ovw(#McF+ICz^v6uZ#4dFv z1{SbP*C~7%E`KOi!|zy1yx8W|x;&>>1jp(k@?O*E?XL`4PTd^)eV=W(S$sI_YZ#}| zACE_U9riVLJ%7)xA0}$_KgSbA=p#I}W;v?5WU@n)wR8%;vD{v+Z}uaO{(6Q3?ZXDV zrK3i^g{a{R9VmrWBI=p23~?CA!_upS(}zA2m-%s*PVL2T=fk;6y95vz3ZGghJa^}qg|$gyo_ zjGx_k3qz8RZcdW3o^qy8cDVufrCp&eFJmpr>rGj~8rqx%TO~I(w@@$G4+lS{0YO*~ zaPUuI_Fr|uf<}5@7A;F2tPYt(7~C&JdG|wgd8_p_8nhZ`Q+qpB@nzFy<68m`<{(}G z?V|bJx%>=g`D>`RXj$1$tMx@5qc~FHz;s}{*XtNjWxJ>Fg*pg*-d}?&PZtvbEIk!f zjb*y&<|0HbPUo}%tnQ#>VbP-`1MA6jS(zMvD1$Xyi}>;)>MFL*r_FB9KKL&VAHACB zU0v{V2J2i*BiX2Y^kPGi9>O1uzMFQdB>*3to#+c^a{Asv>eZlm^_Q`s<70>u)!|~U zKpb=5CV8`4#k$njuXV^zLs~wZwRpIh(trq`lzTh{8v?c)1>w<)UP zuoTG5>vZP?u0wqNfhMdeJ0(vx*^V#Z_@Yl7_#k%tl+>Kvwd0A3_?in%@DA1~9QDx` z8mjIKsac~`p>V`?_CW~^VdfG&5S^NQC+AEMq|uI^U#l98jXvNTc;m0Sp!_Bm{ep1$ z?#TncdVoki_zSN6S6y)MCr;Il)!mg$Z0cRf@Jy8%-^ItP5YKNuoKxfJJ&iZxI)>VN z>veA)UC+JwUx6wur9nc0*|mSnT!z({59d@}{8SSX{Y0`pv-a`KSAHwP?B;`iUbp|M z3r0vRqzu!z_`Vd(u9fO*-z~+p6HFSa@=M9LF#`rNL`8JvC_k2OkNYA%n<-|yuv?6_ z7TVseIyvfjtigOdXI~TV)dd6MxPbm5;wF?K@AcRD1ZHI!4H3LU!YW+xZ%h8yC4LBL z{8bkm&E%wktfD72_dXkRmBTlnlvQ}laJ_m3Q45`I8`$*?dKCFBzEc>0p9}eLJ^*(w(3n!@%L)rC*E6I?IV$10L_^^VR!3pr zMm{@utv1{6AL7e?DA%=q%28uBX{hwgXi(556!_9c9Y1O#;?Heug5QiNVbIabc_h6p z03RKl4BA+w(Jy=(Mg49UW!{9M5-m_NrVax0W_-{tkuxlDBbu$eWx_viN4bS%k;KMna27T`!ZN?hDLj@hqj$Rb< z+G*S!KAex?puaSx;}7sN3D-(25_+J%_7hxy4`=!q1G`d*VNkpryQ6eAuCZM)Er5=BK&0vGK;u2kB9TaXXJ(gM<1NaEXcZzf1y`C|u zooo2&JD5LwILB$#US$^upIOrbqN85A1$$I7`_!vWb)i zyA;1FJnn6*z6zV_x@_zQ##HIsHWx9H-!zR0-{HN)j+HH||MqvRj(lkUbZOgGT`;tf V)X1Jp!NL< - + From 0c7865fdb57fc44708fcd43b24e508b7c6260bba Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 17 Jan 2012 19:18:04 -0500 Subject: [PATCH 12/13] UnitTest for reverseAlleleClipping -- No code modified yet, just implementing a unit test to ensure correctness of the existing code --- .../utils/codecs/vcf/AbstractVCFCodec.java | 1 + .../utils/codecs/vcf/VCFCodecUnitTest.java | 91 +++++++++++++++++++ 2 files changed, 92 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 836ba22bf..4726b4e00 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -617,6 +617,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { return clipping; } + /** * clip the alleles, based on the reference * diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java new file mode 100644 index 000000000..7681ed7d1 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFCodecUnitTest.java @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// our package +package org.broadinstitute.sting.utils.codecs.vcf; + + +// the imports for unit testing. + + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.*; +import org.testng.Assert; +import org.testng.annotations.BeforeSuite; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + + +public class VCFCodecUnitTest extends BaseTest { + + // -------------------------------------------------------------------------------- + // + // Provider + // + // -------------------------------------------------------------------------------- + + private class AlleleClippingTestProvider extends TestDataProvider { + final String ref; + final List alleles = new ArrayList(); + final int expectedClip; + + private AlleleClippingTestProvider(final int expectedClip, final String ref, final String ... alleles) { + super(AlleleClippingTestProvider.class); + this.ref = ref; + for ( final String allele : alleles ) + this.alleles.add(Allele.create(allele)); + this.expectedClip = expectedClip; + } + + @Override + public String toString() { + return String.format("ref=%s allele=%s reverse clip %d", ref, alleles, expectedClip); + } + } + + @DataProvider(name = "AlleleClippingTestProvider") + public Object[][] MakeAlleleClippingTest() { + // pair clipping + new AlleleClippingTestProvider(0, "ATT", "CCG"); + new AlleleClippingTestProvider(1, "ATT", "CCT"); + new AlleleClippingTestProvider(2, "ATT", "CTT"); + new AlleleClippingTestProvider(2, "ATT", "ATT"); // cannot completely clip allele + + // triplets + new AlleleClippingTestProvider(0, "ATT", "CTT", "CGG"); + new AlleleClippingTestProvider(1, "ATT", "CTT", "CGT"); // the T can go + new AlleleClippingTestProvider(2, "ATT", "CTT", "CTT"); // both Ts can go + + return AlleleClippingTestProvider.getTests(AlleleClippingTestProvider.class); + } + + + @Test(dataProvider = "AlleleClippingTestProvider") + public void TestAlleleClipping(AlleleClippingTestProvider cfg) { + int result = AbstractVCFCodec.computeReverseClipping(cfg.alleles, cfg.ref, 0, 1); + Assert.assertEquals(result, cfg.expectedClip); + } +} \ No newline at end of file From 763c81d52002cb3e6af5310951b49bc03fde54ef Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 17 Jan 2012 21:06:02 -0500 Subject: [PATCH 13/13] No longer enforce MAX_ALLELE_SIZE in VCF codec -- Instead issue a warning when a large (>1MB) record is encountered -- Optimized ref.getBytes()[i] => (byte)ref.charAt(i), which avoids an implicit O(n) allocation each iteration through computeReverseClipping() --- .../sting/utils/codecs/vcf/AbstractVCFCodec.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java index 4726b4e00..1bdee802b 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java @@ -18,7 +18,7 @@ import java.util.zip.GZIPInputStream; public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { - public final static int MAX_EXPLICIT_ALLELE_SIZE = (int)Math.pow(2, 16); + public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20); protected final static Logger log = Logger.getLogger(VCFCodec.class); protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column @@ -522,8 +522,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { if ( allele == null || allele.length() == 0 ) generateException("Empty alleles are not permitted in VCF records", lineNo); - if ( MAX_EXPLICIT_ALLELE_SIZE != -1 && allele.length() > MAX_EXPLICIT_ALLELE_SIZE ) - generateException(String.format("Allele detected with length %d, exceeding max size %d. Please remove this from the VCF file before continuing", allele.length(), MAX_EXPLICIT_ALLELE_SIZE), lineNo); + if ( MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) + log.warn(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo)); if ( isSymbolicAllele(allele) ) { if ( isRef ) { @@ -576,12 +576,13 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { public static int computeForwardClipping(List unclippedAlleles, String ref) { boolean clipping = true; + final byte ref0 = (byte)ref.charAt(0); for ( Allele a : unclippedAlleles ) { if ( a.isSymbolic() ) continue; - if ( a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0]) ) { + if ( a.length() < 1 || (a.getBases()[0] != ref0) ) { clipping = false; break; } @@ -608,7 +609,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec { stillClipping = false; else if ( ref.length() == clipping ) generateException("bad alleles encountered", lineNo); - else if ( a.getBases()[a.length()-clipping-1] != ref.getBytes()[ref.length()-clipping-1] ) + else if ( a.getBases()[a.length()-clipping-1] != ((byte)ref.charAt(ref.length()-clipping-1)) ) stillClipping = false; } if ( stillClipping )