/* * The Broad Institute * SOFTWARE COPYRIGHT NOTICE AGREEMENT * This software and its documentation are copyright 2008 by the * Broad Institute/Massachusetts Institute of Technology. All rights are reserved. * * This software is supplied without any warranty or guaranteed support whatsoever. * Neither the Broad Institute nor MIT can be responsible for its use, misuse, * or functionality. */ package edu.mit.broad.sam; /** * Utilty methods. */ final class SAMUtils { private static final byte COMPRESSED_EQUAL_LOW = 0; private static final byte COMPRESSED_A_LOW = 1; private static final byte COMPRESSED_C_LOW = 2; private static final byte COMPRESSED_G_LOW = 4; private static final byte COMPRESSED_T_LOW = 8; private static final byte COMPRESSED_N_LOW = 15; private static final byte COMPRESSED_EQUAL_HIGH = COMPRESSED_EQUAL_LOW << 4; private static final byte COMPRESSED_A_HIGH = COMPRESSED_A_LOW << 4; private static final byte COMPRESSED_C_HIGH = COMPRESSED_C_LOW << 4; private static final byte COMPRESSED_G_HIGH = COMPRESSED_G_LOW << 4; private static final byte COMPRESSED_T_HIGH = (byte)(COMPRESSED_T_LOW << 4); private static final byte COMPRESSED_N_HIGH = (byte)(COMPRESSED_N_LOW << 4); private SAMUtils() { } static int unpackInt16(final byte[] buffer, final int offset) { return ((buffer[offset] & 0xFF) | ((buffer[offset+1] & 0xFF) << 8)); } static int unpackInt32(final byte[] buffer, final int offset) { return ((buffer[offset] & 0xFF) | ((buffer[offset+1] & 0xFF) << 8) | ((buffer[offset+2] & 0xFF) << 16) | ((buffer[offset+3] & 0xFF) << 24)); } /** * Convert from a byte array containing =AaCcGgTtNn, to a byte array half as long, * with =, A, C, G, T converted to 0, 1, 2, 4, 8, 15 * @param readBases * @return */ static byte[] bytesToCompressedBases(final byte[] readBases) { final byte[] compressedBases = new byte[(readBases.length + 1)/2]; int i; for (i = 1; i < readBases.length; i+=2) { compressedBases[i/2] = (byte)(charToCompressedBaseHigh(readBases[i-1]) | charToCompressedBaseLow(readBases[i])); } // Last nybble if (i == readBases.length) { compressedBases[i/2] = charToCompressedBaseHigh((char)readBases[i-1]); } return compressedBases; } static byte[] compressedBasesToBytes(final int length, final byte[] compressedBases, final int compressedOffset) { final byte[] ret = new byte[length]; int i; for (i = 1; i < length; i+=2) { ret[i-1] = compressedBaseToByteHigh(compressedBases[i/2 + compressedOffset]); ret[i] = compressedBaseToByteLow(compressedBases[i/2 + compressedOffset]); } // Last nybble if (i == length) { ret[i-1] = compressedBaseToByteHigh(compressedBases[i/2 + compressedOffset]); } return ret; } /** * * @param base One of =AaCcGgTtNn * @return nybble-encoded equivalent */ private static byte charToCompressedBaseLow(final int base) { switch (base) { case '=': return COMPRESSED_EQUAL_LOW; case 'a': case 'A': return COMPRESSED_A_LOW; case 'c': case 'C': return COMPRESSED_C_LOW; case 'g': case 'G': return COMPRESSED_G_LOW; case 't': case 'T': return COMPRESSED_T_LOW; case 'n': case 'N': case '.': return COMPRESSED_N_LOW; default: throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); } } private static byte charToCompressedBaseHigh(final int base) { switch (base) { case '=': return COMPRESSED_EQUAL_HIGH; case 'a': case 'A': return COMPRESSED_A_HIGH; case 'c': case 'C': return COMPRESSED_C_HIGH; case 'g': case 'G': return COMPRESSED_G_HIGH; case 't': case 'T': return COMPRESSED_T_HIGH; case 'n': case 'N': case '.': return COMPRESSED_N_HIGH; default: throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); } } /** * * @param base One of COMPRESSED_* * @return one of ACGTN= */ private static byte compressedBaseToByteLow(final int base) { switch (base & 0xf) { case COMPRESSED_EQUAL_LOW: return '='; case COMPRESSED_A_LOW: return 'A'; case COMPRESSED_C_LOW: return 'C'; case COMPRESSED_G_LOW: return 'G'; case COMPRESSED_T_LOW: return 'T'; case COMPRESSED_N_LOW: return 'N'; default: throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); } } private static byte compressedBaseToByteHigh(final int base) { switch ((byte)(base & 0xf0)) { case COMPRESSED_EQUAL_HIGH: return '='; case COMPRESSED_A_HIGH: return 'A'; case COMPRESSED_C_HIGH: return 'C'; case COMPRESSED_G_HIGH: return 'G'; case COMPRESSED_T_HIGH: return 'T'; case COMPRESSED_N_HIGH: return 'N'; default: throw new IllegalArgumentException("Bad byte passed to charToCompressedBase: " + base); } } static String bytesToHexString(final byte[] data) { final char[] chars = new char[2 * data.length]; for (int i = 0; i < data.length; i++) { final byte b = data[i]; chars[2*i] = toHexDigit((b >> 4) & 0xF); chars[2*i+1] = toHexDigit(b & 0xF); } return new String(chars); } static byte[] hexStringToBytes(final String s) throws NumberFormatException { if (s.length() % 2 != 0) { throw new NumberFormatException("Hex representation of byte string does not have even number of hex chars: " + s); } final byte[] ret = new byte[s.length() / 2]; for (int i = 0; i < ret.length; ++i) { ret[i] = (byte) (fromHexDigit(s.charAt(i * 2)) << 4 + fromHexDigit(s.charAt(i * 2 + 1))); } return ret; } static String phredToFastq(final byte[] data) { if (data == null) { return null; } return phredToFastq(data, 0, data.length); } static String phredToFastq(final byte[] buffer, final int offset, final int length) { final char[] chars = new char[length]; for (int i = 0; i < length; i++) { chars[i] = phredToFastq(buffer[offset+i] & 0xFF); } return new String(chars); } static char phredToFastq(final int phredScore) { if (phredScore < 0 || phredScore > 63) { throw new IllegalArgumentException("Cannot encode phred score: " + phredScore); } return (char) (33 + phredScore); } static byte[] fastqToPhred(final String fastq) { if (fastq == null) { return null; } final int length = fastq.length(); final byte[] scores = new byte[length]; for (int i = 0; i < length; i++) { scores[i] = (byte) fastqToPhred(fastq.charAt(i)); } return scores; } static int fastqToPhred(final char ch) { if (ch < 33 || ch > 126) { throw new IllegalArgumentException("Invalid fastq character: " + ch); } return (ch - 33); } private static char toHexDigit(final int value) { return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10)); } private static int fromHexDigit(final char c) throws NumberFormatException { final int ret = Character.digit(c, 16); if (ret == -1) { throw new NumberFormatException("Not a valid hex digit: " + c); } return ret; } /** * calculate the bin given an alignment in [beg,end) * Copied from SAM spec. */ static int reg2bin(final int beg, int end) { --end; if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14); if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17); if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20); if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23); if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26); return 0; } }