2009-04-13 08:46:23 +08:00
|
|
|
package org.broadinstitute.sting.utils;
|
|
|
|
|
|
2009-04-14 22:49:12 +08:00
|
|
|
/**
|
|
|
|
|
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
|
|
|
|
*
|
|
|
|
|
* @author Kiran Garimella
|
|
|
|
|
*/
|
2009-04-13 08:46:23 +08:00
|
|
|
public class BaseUtils {
|
2009-04-24 01:45:39 +08:00
|
|
|
/** Private constructor. No instantiating this class! */
|
|
|
|
|
private BaseUtils() {}
|
|
|
|
|
|
2009-04-14 22:49:12 +08:00
|
|
|
/**
|
|
|
|
|
* Converts a simple base to a base index
|
|
|
|
|
*
|
|
|
|
|
* @param base [AaCcGgTt]
|
|
|
|
|
* @return 0, 1, 2, 3, or -1 if the base can't be understood
|
|
|
|
|
*/
|
2009-04-13 08:46:23 +08:00
|
|
|
static public int simpleBaseToBaseIndex(char base) {
|
|
|
|
|
switch (base) {
|
|
|
|
|
case 'A':
|
|
|
|
|
case 'a': return 0;
|
|
|
|
|
|
|
|
|
|
case 'C':
|
|
|
|
|
case 'c': return 1;
|
|
|
|
|
|
|
|
|
|
case 'G':
|
|
|
|
|
case 'g': return 2;
|
|
|
|
|
|
|
|
|
|
case 'T':
|
|
|
|
|
case 't': return 3;
|
|
|
|
|
|
|
|
|
|
default: return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-04-14 22:49:12 +08:00
|
|
|
/**
|
|
|
|
|
* Converts a base index to a simple base
|
|
|
|
|
*
|
|
|
|
|
* @param baseIndex 0, 1, 2, 3
|
|
|
|
|
* @return A, C, G, T, or '.' if the index can't be understood
|
|
|
|
|
*/
|
2009-04-13 08:46:23 +08:00
|
|
|
static public char baseIndexToSimpleBase(int baseIndex) {
|
|
|
|
|
switch (baseIndex) {
|
|
|
|
|
case 0: return 'A';
|
|
|
|
|
case 1: return 'C';
|
|
|
|
|
case 2: return 'G';
|
|
|
|
|
case 3: return 'T';
|
|
|
|
|
default: return '.';
|
|
|
|
|
}
|
|
|
|
|
}
|
2009-04-14 22:49:12 +08:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Converts a base index to a base index representing its cross-talk partner
|
|
|
|
|
*
|
|
|
|
|
* @param baseIndex 0, 1, 2, 3
|
|
|
|
|
* @return 1, 0, 3, 2, or -1 if the index can't be understood
|
|
|
|
|
*/
|
|
|
|
|
static public int crossTalkPartnerIndex(int baseIndex) {
|
|
|
|
|
switch (baseIndex) {
|
|
|
|
|
case 0: return 1; // A -> C
|
|
|
|
|
case 1: return 0; // C -> A
|
|
|
|
|
case 2: return 3; // G -> T
|
|
|
|
|
case 3: return 2; // T -> G
|
|
|
|
|
default: return -1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Converts a base to the base representing its cross-talk partner
|
2009-04-22 06:25:33 +08:00
|
|
|
*
|
2009-04-14 22:49:12 +08:00
|
|
|
* @param base [AaCcGgTt]
|
|
|
|
|
* @return C, A, T, G, or '.' if the base can't be understood
|
|
|
|
|
*/
|
|
|
|
|
static public char crossTalkPartnerBase(char base) {
|
|
|
|
|
return baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
|
|
|
|
|
}
|
2009-04-22 06:25:33 +08:00
|
|
|
|
2009-05-15 02:57:48 +08:00
|
|
|
/**
|
|
|
|
|
* Return the complement of a base index.
|
|
|
|
|
*
|
|
|
|
|
* @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
|
|
|
|
|
* @return the complementary base index
|
|
|
|
|
*/
|
|
|
|
|
static public byte complementIndex(int baseIndex) {
|
|
|
|
|
switch (baseIndex) {
|
|
|
|
|
case 0: return 3; // a -> t
|
|
|
|
|
case 1: return 2; // c -> g
|
|
|
|
|
case 2: return 1; // g -> c
|
|
|
|
|
case 3: return 0; // t -> a
|
|
|
|
|
default: return -1; // wtf?
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Return the complement of a base.
|
|
|
|
|
*
|
|
|
|
|
* @param base the base [AaCcGgTt]
|
|
|
|
|
* @return the complementary base
|
|
|
|
|
*/
|
2009-04-22 06:25:33 +08:00
|
|
|
static public byte simpleComplement(char base) {
|
|
|
|
|
switch (base) {
|
|
|
|
|
case 'A':
|
|
|
|
|
case 'a': return 'T';
|
|
|
|
|
case 'C':
|
|
|
|
|
case 'c': return 'G';
|
|
|
|
|
case 'G':
|
|
|
|
|
case 'g': return 'C';
|
|
|
|
|
case 'T':
|
|
|
|
|
case 't': return 'A';
|
|
|
|
|
default: return '.';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-15 02:57:48 +08:00
|
|
|
/**
|
2009-05-22 03:39:39 +08:00
|
|
|
* Reverse complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
|
|
|
|
|
*
|
2009-05-15 02:57:48 +08:00
|
|
|
* @param bases the byte array of bases
|
|
|
|
|
* @return the reverse complement of the base byte array
|
|
|
|
|
*/
|
2009-04-22 06:25:33 +08:00
|
|
|
static public byte[] simpleReverseComplement(byte[] bases) {
|
|
|
|
|
byte[] rcbases = new byte[bases.length];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < bases.length; i++) {
|
|
|
|
|
rcbases[i] = simpleComplement((char) bases[bases.length - 1]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rcbases;
|
|
|
|
|
}
|
2009-05-22 02:30:45 +08:00
|
|
|
|
2009-05-22 03:39:39 +08:00
|
|
|
/**
|
|
|
|
|
* Reverse complement a String of bases. Preserves ambiguous bases.
|
|
|
|
|
*
|
|
|
|
|
* @param bases the String of bases
|
|
|
|
|
* @return the reverse complement of the String
|
|
|
|
|
*/
|
|
|
|
|
static public String simpleReverseComplement(String bases) {
|
|
|
|
|
char[] rcbases = new char[bases.length()];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < bases.length(); i++) {
|
|
|
|
|
char base = bases.charAt(bases.length() - 1);
|
|
|
|
|
char rcbase = (base == 'N' || base == '.') ? base : (char) simpleComplement(base);
|
|
|
|
|
|
|
|
|
|
rcbases[i] = rcbase;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new String(rcbases);
|
|
|
|
|
}
|
|
|
|
|
|
2009-05-22 02:30:45 +08:00
|
|
|
/**
|
|
|
|
|
* Reverse a byte array of bases
|
|
|
|
|
* @param bases the byte array of bases
|
|
|
|
|
* @return the reverse of the base byte array
|
|
|
|
|
*/
|
|
|
|
|
static public byte[] reverse(byte[] bases) {
|
|
|
|
|
byte[] rcbases = new byte[bases.length];
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < bases.length; i++) {
|
|
|
|
|
rcbases[i] = bases[bases.length - 1];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return rcbases;
|
|
|
|
|
}
|
2009-04-13 08:46:23 +08:00
|
|
|
}
|