diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
index 72c2b2829..311e33f8a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/RecalDataManager.java
@@ -36,6 +36,7 @@ import org.broadinstitute.sting.utils.recalibration.BaseRecalibration;
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList;
import java.util.List;
@@ -284,7 +285,7 @@ public class RecalDataManager {
public static void parseColorSpace(final GATKSAMRecord read) {
// If this is a SOLID read then we have to check if the color space is inconsistent. This is our only sign that SOLID has inserted the reference base
- if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+ if (ReadUtils.isSOLiDRead(read)) {
if (read.getAttribute(RecalDataManager.COLOR_SPACE_INCONSISTENCY_TAG) == null) { // Haven't calculated the inconsistency array yet for this read
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
@@ -382,7 +383,7 @@ public class RecalDataManager {
}
public static boolean checkNoCallColorSpace(final GATKSAMRecord read) {
- if (read.getReadGroup().getPlatform().toUpperCase().contains("SOLID")) {
+ if (ReadUtils.isSOLiDRead(read)) {
final Object attr = read.getAttribute(RecalDataManager.COLOR_SPACE_ATTRIBUTE_TAG);
if (attr != null) {
byte[] colorSpace;
@@ -611,21 +612,17 @@ public class RecalDataManager {
final Comparable[][] covariateValues_offset_x_covar = new Comparable[readLength][numRequestedCovariates];
final Comparable[] tempCovariateValuesHolder = new Comparable[readLength];
- // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
- for (int i = 0; i < numRequestedCovariates; i++) {
+ for (int i = 0; i < numRequestedCovariates; i++) { // Loop through the list of requested covariates and compute the values of each covariate for all positions in this read
requestedCovariates.get(i).getValues(gatkRead, tempCovariateValuesHolder, modelType);
- for (int j = 0; j < readLength; j++) {
- //copy values into a 2D array that allows all covar types to be extracted at once for
- //an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
- covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j];
- }
+ for (int j = 0; j < readLength; j++)
+ covariateValues_offset_x_covar[j][i] = tempCovariateValuesHolder[j]; // copy values into a 2D array that allows all covar types to be extracted at once for an offset j by doing covariateValues_offset_x_covar[j]. This avoids the need to later iterate over covar types.
}
return covariateValues_offset_x_covar;
}
/**
- * Perform a ceratin transversion (A <-> C or G <-> T) on the base.
+ * Perform a certain transversion (A <-> C or G <-> T) on the base.
*
* @param base the base [AaCcGgTt]
* @return the transversion of the base, or the input base if it's not one of the understood ones
diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
index 673b1524d..61812629c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@@ -2,57 +2,59 @@ package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-
/**
* BaseUtils contains some basic utilities for manipulating nucleotides.
*/
public class BaseUtils {
- public final static byte A = (byte)'A';
- public final static byte C = (byte)'C';
- public final static byte G = (byte)'G';
- public final static byte T = (byte)'T';
+ public final static byte A = (byte) 'A';
+ public final static byte C = (byte) 'C';
+ public final static byte G = (byte) 'G';
+ public final static byte T = (byte) 'T';
- public final static byte N = (byte)'N';
- public final static byte D = (byte)'D';
+ public final static byte N = (byte) 'N';
+ public final static byte D = (byte) 'D';
//
// todo -- we need a generalized base abstraction using the Base enum.
//
- public final static byte[] BASES = { 'A', 'C', 'G', 'T' };
- public final static byte[] EXTENDED_BASES = { 'A', 'C', 'G', 'T', 'N', 'D' };
+ public final static byte[] BASES = {'A', 'C', 'G', 'T'};
+ public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'};
public enum Base {
- A ( 'A', 0 ),
- C ( 'C', 1 ),
- G ( 'G', 2 ),
- T ( 'T', 3 );
+ A('A', 0),
+ C('C', 1),
+ G('G', 2),
+ T('T', 3);
byte b;
int index;
+
private Base(char base, int index) {
- this.b = (byte)base;
+ this.b = (byte) base;
this.index = index;
}
public byte getBase() { return b; }
- public char getBaseAsChar() { return (char)b; }
+
+ public char getBaseAsChar() { return (char) b; }
+
public int getIndex() { return index; }
public boolean sameBase(byte o) { return b == o; }
- public boolean sameBase(char o) { return b == (byte)o; }
- public boolean sameBase(int i) { return index == i; }
- }
+ public boolean sameBase(char o) { return b == (byte) o; }
+
+ public boolean sameBase(int i) { return index == i; }
+ }
// todo -- fix me (enums?)
public static final byte DELETION_INDEX = 4;
public static final byte NO_CALL_INDEX = 5; // (this is 'N')
- public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte)'G');
- public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte)'C');
- public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte)'A');
- public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte)'T');
-
+ public static int gIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'G');
+ public static int cIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'C');
+ public static int aIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'A');
+ public static int tIndex = BaseUtils.simpleBaseToBaseIndex((byte) 'T');
/// In genetics, a transition is a mutation changing a purine to another purine nucleotide (A <-> G) or
// a pyrimidine to another pyrimidine nucleotide (C <-> T).
@@ -64,28 +66,31 @@ public class BaseUtils {
/**
* Returns the base substitution type of the 2 state SNP
+ *
* @param base1
* @param base2
* @return
*/
- public static BaseSubstitutionType SNPSubstitutionType( byte base1, byte base2 ) {
+ public static BaseSubstitutionType SNPSubstitutionType(byte base1, byte base2) {
BaseSubstitutionType t = isTransition(base1, base2) ? BaseSubstitutionType.TRANSITION : BaseSubstitutionType.TRANSVERSION;
//System.out.printf("SNPSubstitutionType( char %c, char %c ) => %s%n", base1, base2, t);
return t;
}
- public static boolean isTransition( byte base1, byte base2 ) {
+ public static boolean isTransition(byte base1, byte base2) {
int b1 = simpleBaseToBaseIndex(base1);
int b2 = simpleBaseToBaseIndex(base2);
return b1 == 0 && b2 == 2 || b1 == 2 && b2 == 0 ||
- b1 == 1 && b2 == 3 || b1 == 3 && b2 == 1;
+ b1 == 1 && b2 == 3 || b1 == 3 && b2 == 1;
}
- public static boolean isTransversion( byte base1, byte base2 ) {
- return ! isTransition(base1, base2);
+ public static boolean isTransversion(byte base1, byte base2) {
+ return !isTransition(base1, base2);
}
- /** Private constructor. No instantiating this class! */
+ /**
+ * Private constructor. No instantiating this class!
+ */
private BaseUtils() {}
static public boolean basesAreEqual(byte base1, byte base2) {
@@ -96,7 +101,6 @@ public class BaseUtils {
return extendedBaseToBaseIndex(base1) == extendedBaseToBaseIndex(base2);
}
-
/**
* Converts a IUPAC nucleotide code to a pair of bases
*
@@ -163,33 +167,37 @@ public class BaseUtils {
/**
* Converts a simple base to a base index
*
- * @param base [AaCcGgTt]
+ * @param base [AaCcGgTt]
* @return 0, 1, 2, 3, or -1 if the base can't be understood
*/
static public int simpleBaseToBaseIndex(byte base) {
switch (base) {
case '*': // the wildcard character counts as an A
case 'A':
- case 'a': return 0;
+ case 'a':
+ return 0;
case 'C':
- case 'c': return 1;
+ case 'c':
+ return 1;
case 'G':
- case 'g': return 2;
+ case 'g':
+ return 2;
case 'T':
- case 't': return 3;
+ case 't':
+ return 3;
- default: return -1;
+ default:
+ return -1;
}
}
-
/**
* Converts a simple base to a base index
*
- * @param base [AaCcGgTt]
+ * @param base [AaCcGgTt]
* @return 0, 1, 2, 3, or -1 if the base can't be understood
*/
@Deprecated
@@ -197,29 +205,37 @@ public class BaseUtils {
switch (base) {
case '*': // the wildcard character counts as an A
case 'A':
- case 'a': return 0;
+ case 'a':
+ return 0;
case 'C':
- case 'c': return 1;
+ case 'c':
+ return 1;
case 'G':
- case 'g': return 2;
+ case 'g':
+ return 2;
case 'T':
- case 't': return 3;
+ case 't':
+ return 3;
- default: return -1;
+ default:
+ return -1;
}
}
static public int extendedBaseToBaseIndex(byte base) {
switch (base) {
case 'd':
- case 'D': return DELETION_INDEX;
+ case 'D':
+ return DELETION_INDEX;
case 'n':
- case 'N': return NO_CALL_INDEX;
+ case 'N':
+ return NO_CALL_INDEX;
- default: return simpleBaseToBaseIndex(base);
+ default:
+ return simpleBaseToBaseIndex(base);
}
}
@@ -232,11 +248,6 @@ public class BaseUtils {
return simpleBaseToBaseIndex(base) != -1;
}
- @Deprecated
- static public boolean isNBase(char base) {
- return isNBase((byte)base);
- }
-
static public boolean isNBase(byte base) {
return base == 'N' || base == 'n';
}
@@ -244,68 +255,83 @@ public class BaseUtils {
/**
* Converts a base index to a simple base
*
- * @param baseIndex 0, 1, 2, 3
+ * @param baseIndex 0, 1, 2, 3
* @return A, C, G, T, or '.' if the index can't be understood
*/
static public byte baseIndexToSimpleBase(int baseIndex) {
switch (baseIndex) {
- case 0: return 'A';
- case 1: return 'C';
- case 2: return 'G';
- case 3: return 'T';
- default: return '.';
+ case 0:
+ return 'A';
+ case 1:
+ return 'C';
+ case 2:
+ return 'G';
+ case 3:
+ return 'T';
+ default:
+ return '.';
}
}
@Deprecated
static public char baseIndexToSimpleBaseAsChar(int baseIndex) {
- return (char)baseIndexToSimpleBase(baseIndex);
+ return (char) baseIndexToSimpleBase(baseIndex);
}
/**
* Converts a base index to a base index representing its cross-talk partner
*
- * @param baseIndex 0, 1, 2, 3
+ * @param baseIndex 0, 1, 2, 3
* @return 1, 0, 3, 2, or -1 if the index can't be understood
*/
static public int crossTalkPartnerIndex(int baseIndex) {
switch (baseIndex) {
- case 0: return 1; // A -> C
- case 1: return 0; // C -> A
- case 2: return 3; // G -> T
- case 3: return 2; // T -> G
- default: return -1;
+ case 0:
+ return 1; // A -> C
+ case 1:
+ return 0; // C -> A
+ case 2:
+ return 3; // G -> T
+ case 3:
+ return 2; // T -> G
+ default:
+ return -1;
}
}
/**
* Converts a base to the base representing its cross-talk partner
*
- * @param base [AaCcGgTt]
+ * @param base [AaCcGgTt]
* @return C, A, T, G, or '.' if the base can't be understood
*/
@Deprecated
static public char crossTalkPartnerBase(char base) {
- return (char)baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
+ return (char) baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
}
/**
* Return the complement of a base index.
*
- * @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
+ * @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
* @return the complementary base index
*/
static public byte complementIndex(int baseIndex) {
switch (baseIndex) {
- case 0: return 3; // a -> t
- case 1: return 2; // c -> g
- case 2: return 1; // g -> c
- case 3: return 0; // t -> a
- default: return -1; // wtf?
+ case 0:
+ return 3; // a -> t
+ case 1:
+ return 2; // c -> g
+ case 2:
+ return 1; // g -> c
+ case 3:
+ return 0; // t -> a
+ default:
+ return -1; // wtf?
}
}
- /**
+ /**
* Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
*
* @param base the base [AaCcGgTt]
@@ -314,20 +340,25 @@ public class BaseUtils {
static public byte simpleComplement(byte base) {
switch (base) {
case 'A':
- case 'a': return 'T';
+ case 'a':
+ return 'T';
case 'C':
- case 'c': return 'G';
+ case 'c':
+ return 'G';
case 'G':
- case 'g': return 'C';
+ case 'g':
+ return 'C';
case 'T':
- case 't': return 'A';
- default: return base;
+ case 't':
+ return 'A';
+ default:
+ return base;
}
}
@Deprecated
static public char simpleComplement(char base) {
- return (char)simpleComplement((byte)base);
+ return (char) simpleComplement((byte) base);
}
/**
@@ -349,7 +380,7 @@ public class BaseUtils {
/**
* Complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
*
- * @param bases the byte array of bases
+ * @param bases the byte array of bases
* @return the complement of the base byte array
*/
static public byte[] simpleComplement(byte[] bases) {
@@ -382,7 +413,7 @@ public class BaseUtils {
/**
* Complement a char array of bases
*
- * @param bases the char array of bases
+ * @param bases the char array of bases
* @return the complement of the base char array
*/
@Deprecated
@@ -399,7 +430,7 @@ public class BaseUtils {
/**
* Reverse complement a String of bases. Preserves ambiguous bases.
*
- * @param bases the String of bases
+ * @param bases the String of bases
* @return the reverse complement of the String
*/
@Deprecated
@@ -407,11 +438,10 @@ public class BaseUtils {
return new String(simpleReverseComplement(bases.getBytes()));
}
-
/**
* Complement a String of bases. Preserves ambiguous bases.
*
- * @param bases the String of bases
+ * @param bases the String of bases
* @return the complement of the String
*/
@Deprecated
@@ -451,7 +481,7 @@ public class BaseUtils {
/**
* Returns the most common base in the basecounts array. To be used with pileup.getBaseCounts.
*
- * @param baseCounts counts of a,c,g,t in order.
+ * @param baseCounts counts of a,c,g,t in order.
* @return the most common base
*/
static public byte mostFrequentSimpleBase(int[] baseCounts) {
@@ -461,13 +491,13 @@ public class BaseUtils {
/**
* For the most frequent base in the sequence, return the percentage of the read it constitutes.
*
- * @param sequence the read sequence
- * @return the percentage of the read that's made up of the most frequent base
+ * @param sequence the read sequence
+ * @return the percentage of the read that's made up of the most frequent base
*/
static public double mostFrequentBaseFraction(byte[] sequence) {
int[] baseCounts = new int[4];
- for ( byte base : sequence ) {
+ for (byte base : sequence) {
int baseIndex = simpleBaseToBaseIndex(base);
if (baseIndex >= 0) {
@@ -477,7 +507,7 @@ public class BaseUtils {
int mostFrequentBaseIndex = mostFrequentBaseIndex(baseCounts);
- return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length);
+ return ((double) baseCounts[mostFrequentBaseIndex]) / ((double) sequence.length);
}
// --------------------------------------------------------------------------------
@@ -531,50 +561,50 @@ public class BaseUtils {
static public byte getRandomBase(char excludeBase) {
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
}
-
-
- /** Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
+
+ /**
+ * Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
* that for all i = 0... seq.length-1, seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
- * The sequence does not have to contain whole number of periods. For instance, "ACACACAC" has a period
- * of 2 (it has a period of 4 as well), and so does
- * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is
+ * The sequence does not have to contain whole number of periods. For instance, "ACACACAC" has a period
+ * of 2 (it has a period of 4 as well), and so does
+ * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is
* the length of the string itself, and it will always be returned if no smaller period can be found in the specified period range
* or if specified minPeriod is greater than the sequence length.
- *
+ *
* @param seq
* @return
*/
public static int sequencePeriod(byte[] seq, int minPeriod) {
- int period = ( minPeriod > seq.length ? seq.length : minPeriod );
- // we assume that bases [0,period-1] repeat themselves and check this assumption
- // until we find correct period
-
- for ( int pos = period ; pos < seq.length ; pos++ ) {
-
- int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
- // if our current hypothesis holds, base[pos] must be the same as base[offset]
-
- if ( Character.toUpperCase( seq[pos] ) !=
- Character.toUpperCase( seq[offset] )
- ) {
-
- // period we have been trying so far does not work.
- // two possibilities:
- // A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
- // in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
- // so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance
- // pos will be autoincremented and we will be checking next base
- // B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
- // hence we should first check if it matches the first base of the sequence, and to do that
- // we set period to pos (thus trying the hypothesis that bases from start up to the current one,
- // non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
- // on the next loop re-entrance after pos is autoincremented)
- if ( offset == 0 ) period = pos+1;
- else period = pos-- ;
-
- }
- }
- return period;
+ int period = (minPeriod > seq.length ? seq.length : minPeriod);
+ // we assume that bases [0,period-1] repeat themselves and check this assumption
+ // until we find correct period
+
+ for (int pos = period; pos < seq.length; pos++) {
+
+ int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
+ // if our current hypothesis holds, base[pos] must be the same as base[offset]
+
+ if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {
+
+ // period we have been trying so far does not work.
+ // two possibilities:
+ // A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
+ // in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
+ // so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance
+ // pos will be autoincremented and we will be checking next base
+ // B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
+ // hence we should first check if it matches the first base of the sequence, and to do that
+ // we set period to pos (thus trying the hypothesis that bases from start up to the current one,
+ // non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
+ // on the next loop re-entrance after pos is autoincremented)
+ if (offset == 0)
+ period = pos + 1;
+ else
+ period = pos--;
+
+ }
+ }
+ return period;
}
}