diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java index e2d2a3d1f..e2663359b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/ContextCovariate.java @@ -126,8 +126,8 @@ public class ContextCovariate implements StandardCovariate { private BitSet contextWith(byte[] bases, int offset, int contextSize) { BitSet result = null; if (offset - contextSize + 1 >= 0) { - String context = new String(Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1)); - if (!context.contains("N")) + final byte[] context = Arrays.copyOfRange(bases, offset - contextSize + 1, offset + 1); + if (!BaseUtils.containsBase(context, BaseUtils.N)) result = BitSetUtils.bitSetFrom(context); } return result; diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 61812629c..3871ca987 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -101,6 +101,17 @@ public class BaseUtils { return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2); } + /** + * @return true iff the bases array contains at least one instance of base + */ + static public boolean containsBase(final byte[] bases, final byte base) { + for ( final byte b : bases ) { + if ( b == base ) + return true; + } + return false; + } + /** * Converts a IUPAC nucleotide code to a pair of bases * diff --git a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java index 6d3493211..98c901bcd 100644 --- a/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/BitSetUtils.java @@ -130,32 +130,32 @@ public class BitSetUtils { if (number < 0) throw new ReviewedStingException("dna conversion cannot handle negative numbers. Possible overflow?"); - int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls) - number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation + final int length = contextLengthFor(number); // the length of the context (the number of combinations is memoized, so costs zero to separate this into two method calls) + number -= combinationsFor(length - 1); // subtract the the number of combinations of the preceding context from the number to get to the quasi-canonical representation - String dna = ""; + StringBuilder dna = new StringBuilder(); while (number > 0) { // perform a simple base_10 to base_4 conversion (quasi-canonical) byte base = (byte) (number % 4); switch (base) { case 0: - dna = "A" + dna; + dna.append('A'); break; case 1: - dna = "C" + dna; + dna.append('C'); break; case 2: - dna = "G" + dna; + dna.append('G'); break; case 3: - dna = "T" + dna; + dna.append('T'); break; } number /= 4; } for (int j = dna.length(); j < length; j++) - dna = "A" + dna; // add leading A's as necessary (due to the "quasi" canonical status, see description above) + dna.append('A'); // add leading A's as necessary (due to the "quasi" canonical status, see description above) - return dna; + return dna.reverse().toString(); // make sure to reverse the string since we should have been pre-pending all along } /** @@ -178,27 +178,18 @@ public class BitSetUtils { * @return the bitset representing the dna sequence */ public static BitSet bitSetFrom(String dna) { - if (dna.length() > MAX_DNA_CONTEXT) - throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length())); + return bitSetFrom(dna.getBytes()); + } - long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set - long preContext = combinationsFor(dna.length() - 1); // the sum of all combinations that preceded the length of the dna string - for (int i = 0; i < dna.length(); i++) { + public static BitSet bitSetFrom(final byte[] dna) { + if (dna.length > MAX_DNA_CONTEXT) + throw new ReviewedStingException(String.format("DNA Length cannot be bigger than %d. dna: %s (%d)", MAX_DNA_CONTEXT, dna, dna.length)); + + final long preContext = combinationsFor(dna.length - 1); // the sum of all combinations that preceded the length of the dna string + long baseTen = 0; // the number in base_10 that we are going to use to generate the bit set + for (final byte base : dna) { baseTen *= 4; - switch (dna.charAt(i)) { - case 'A': - baseTen += 0; - break; - case 'C': - baseTen += 1; - break; - case 'G': - baseTen += 2; - break; - case 'T': - baseTen += 3; - break; - } + baseTen += BaseUtils.simpleBaseToBaseIndex(base); } return bitSetFrom(baseTen + preContext); // the number representing this DNA string is the base_10 representation plus all combinations that preceded this string length. }