BaseUtils has more clear distinction between byte and char routines. All char routines are @Depreciated now. Please use bytes. Better organization of reverse(), now in Utils not BaseUtils.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3400 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-05-20 14:05:13 +00:00
parent 6ce3835622
commit 727822adb4
17 changed files with 114 additions and 80 deletions

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.alignment;
import net.sf.samtools.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
/**
* Represents an alignment of a read to a site in the reference genome.
@ -203,7 +204,7 @@ public class Alignment {
read.setCigar(alignment.getCigar());
if(alignment.isNegativeStrand()) {
read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases()));
read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities()));
read.setBaseQualities(Utils.reverse(read.getBaseQualities()));
}
read.setAttribute("NM",alignment.getEditDistance());
read.setAttribute("MD",alignment.getMismatchingPositions());

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.alignment.bwa;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.alignment.reference.packing.PackUtils;
import org.broadinstitute.sting.alignment.reference.bwt.BWT;
import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter;
@ -173,7 +174,7 @@ public class BWTFiles {
writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile);
// Write the encoded files for the reverse version of this reference sequence.
byte[] reverseReferenceSequence = BaseUtils.reverse(normalizedReferenceSequence);
byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence);
rpacFile = File.createTempFile("bwt",".rpac");
rbwtFile = File.createTempFile("bwt",".rbwt");

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.alignment.bwa.BWAAligner;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.Alignment;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;
import java.util.*;
@ -133,7 +134,7 @@ public class BWAJavaAligner extends BWAAligner {
List<Alignment> successfulMatches = new ArrayList<Alignment>();
Byte[] uncomplementedBases = normalizeBases(read.getReadBases());
Byte[] complementedBases = normalizeBases(BaseUtils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases())));
Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases())));
List<LowerBound> forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT);
List<LowerBound> reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT);

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.commandline.Argument;
import net.sf.samtools.SAMRecord;
@ -69,7 +70,7 @@ public class BamToFastqWalker extends ReadWalker<Integer, Integer> {
} else {
out.println(BaseUtils.simpleReverseComplement(read.getReadString()));
out.println("+");
out.println(BaseUtils.reverse(read.getBaseQualityString()));
out.println(Utils.reverse(read.getBaseQualityString()));
}
if ( sqbw != null ) {

View File

@ -102,7 +102,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
// calculate the sum of quality scores for each base
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup();
List<Pair<String,Integer>> all_events = pileup.getEventStringsWithCounts(BaseUtils.charSeq2byteSeq(ref));
List<Pair<String,Integer>> all_events = pileup.getEventStringsWithCounts(Utils.charSeq2byteSeq(ref));
for ( Pair<String,Integer> p : all_events ) {
if ( p.second > bestIndelCount ) {
bestIndelCount = p.second;

View File

@ -841,8 +841,8 @@ public class MultiSampleCaller extends LocusWalker<MultiSampleCaller.MultiSample
char PickAlt(char ref, double[] allele_likelihoods)
{
Integer[] perm = MathUtils.sortPermutation(allele_likelihoods);
if (perm[3] != BaseUtils.simpleBaseToBaseIndex(ref)) { return BaseUtils.baseIndexToSimpleBase(perm[3]); }
else { return BaseUtils.baseIndexToSimpleBase(perm[2]); }
if (perm[3] != BaseUtils.simpleBaseToBaseIndex(ref)) { return BaseUtils.baseIndexToSimpleBaseAsChar(perm[3]); }
else { return BaseUtils.baseIndexToSimpleBaseAsChar(perm[2]); }
}
double Compute_discovery_lod(char ref, ClassicGenotypeLikelihoods[] genotype_likelihoods)

View File

@ -359,7 +359,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
public char getSecondaryBase ( SAMRecord read, int offset ) {
return BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex( ( (byte[]) read.getAttribute("SQ") )[offset] ) );
return BaseUtils.baseIndexToSimpleBaseAsChar(QualityUtils.compressedQualityToBaseIndex( ( (byte[]) read.getAttribute("SQ") )[offset] ) );
}
public boolean baseIsUsable ( RefMetaDataTracker tracker, ReferenceContext ref, ReadBackedPileup pileup, AlignmentContext context ) {

View File

@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.commandline.Argument;
import net.sf.samtools.SAMRecord;
@ -75,7 +76,7 @@ public class PairedQualityScoreCountsWalker extends ReadWalker<Pair<byte[],Boole
}
private Pair<byte[],Boolean> getCorrectlyOrientedBaseQualities(SAMRecord read) {
byte[] quals = read.getReadNegativeStrandFlag() ? BaseUtils.reverse(read.getBaseQualities()) : read.getBaseQualities();
byte[] quals = read.getReadNegativeStrandFlag() ? Utils.reverse(read.getBaseQualities()) : read.getBaseQualities();
return new Pair<byte[], Boolean>(quals, read.getFirstOfPairFlag());
}

View File

@ -100,18 +100,18 @@ public class ReadErrorRateWalker extends ReadWalker<boolean[], ReadErrorRateColl
if (useNextBestBase || useNextRandomBase || useNonNextBestBase) {
byte nextBestBase;
if (useNextBestBase) {
nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle]));
nextBestBase = BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle]));
} else if (useNonNextBestBase) {
nextBestBase = bases[cycle];
Random generator = new Random();
while (nextBestBase == bases[cycle] || nextBestBase == (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle]))) {
nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
while (nextBestBase == bases[cycle] || nextBestBase == BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(sq[cycle]))) {
nextBestBase = BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
}
} else {
nextBestBase = bases[cycle];
Random generator = new Random();
while (nextBestBase == bases[cycle]) {
nextBestBase = (byte) BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
nextBestBase = BaseUtils.baseIndexToSimpleBase(generator.nextInt(4));
}
}

View File

@ -131,7 +131,7 @@ public class SnpCallRateByCoverageWalker extends LocusWalker<List<String>, Strin
coverage,
((float) coverage)/((float) reads.size()),
goodIterations,
BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()),
(char)BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()),
call == null ? "./." : call.getGenotypeString(),
vcCall.getGenotypeString(),
call == null ? 0 : call.getType() == vcCall.getType() ? 1 : 0,

View File

@ -96,7 +96,7 @@ public class ComputeConfusionMatrix extends LocusWalker<Integer, Integer> {
}
}
String fwAltBase = String.format("%c", BaseUtils.baseIndexToSimpleBase(altBaseIndex));
String fwAltBase = String.format("%c", (char)BaseUtils.baseIndexToSimpleBase(altBaseIndex));
//String rcAltBase = BaseUtils.simpleComplement(fwAltBase);
for (int readIndex = 0; readIndex < context.getReads().size(); readIndex++) {

View File

@ -9,6 +9,7 @@ import net.sf.samtools.SAMRecord;
import java.io.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
/**
* Created by IntelliJ IDEA.
@ -68,7 +69,7 @@ public class BamToFastq extends CommandLineProgram {
} else {
out.println(BaseUtils.simpleReverseComplement(read.getReadString()));
out.println('+');
out.println(BaseUtils.reverse(read.getBaseQualityString()));
out.println(Utils.reverse(read.getBaseQualityString()));
}
}
inReader.close();

View File

@ -8,6 +8,9 @@ import java.util.Random;
* BaseUtils contains some basic utilities for manipulating nucleotides.
*/
public class BaseUtils {
//
// todo -- we need a generalized base abstraction using the Base enum.
//
public final static char[] BASES = { 'A', 'C', 'G', 'T' };
public final static char[] EXTENDED_BASES = { 'A', 'C', 'G', 'T', 'N', 'D' };
@ -79,7 +82,7 @@ public class BaseUtils {
private BaseUtils() {}
static public boolean basesAreEqual(byte base1, byte base2) {
return simpleBaseToBaseIndex((char)base1) == simpleBaseToBaseIndex((char)base2);
return simpleBaseToBaseIndex(base1) == simpleBaseToBaseIndex(base2);
}
@ -89,6 +92,7 @@ public class BaseUtils {
* @param code
* @return 0, 1, 2, 3, or -1 if the base can't be understood
*/
@Deprecated
static public char[] iupacToBases(char code) {
char[] bases = new char[2];
switch (code) {
@ -151,6 +155,7 @@ public class BaseUtils {
* @param base [AaCcGgTt]
* @return 0, 1, 2, 3, or -1 if the base can't be understood
*/
@Deprecated
static public int simpleBaseToBaseIndex(char base) {
switch (base) {
case '*': // the wildcard character counts as an A
@ -170,6 +175,7 @@ public class BaseUtils {
}
}
@Deprecated
static public int extendedBaseToBaseIndex(char base) {
switch (base) {
case 'd':
@ -185,6 +191,7 @@ public class BaseUtils {
return simpleBaseToBaseIndex((char)base);
}
@Deprecated
static public boolean isRegularBase(char base) {
return simpleBaseToBaseIndex(base) != -1;
}
@ -193,6 +200,7 @@ public class BaseUtils {
return isRegularBase((char)base);
}
@Deprecated
static public boolean isNBase(char base) {
return isNBase((byte)base);
}
@ -201,14 +209,13 @@ public class BaseUtils {
return base == 'N';
}
/**
* Converts a base index to a simple base
*
* @param baseIndex 0, 1, 2, 3
* @return A, C, G, T, or '.' if the index can't be understood
*/
static public char baseIndexToSimpleBase(int baseIndex) {
static public byte baseIndexToSimpleBase(int baseIndex) {
switch (baseIndex) {
case 0: return 'A';
case 1: return 'C';
@ -218,6 +225,11 @@ public class BaseUtils {
}
}
@Deprecated
static public char baseIndexToSimpleBaseAsChar(int baseIndex) {
return (char)baseIndexToSimpleBase(baseIndex);
}
/**
* Converts a base index to a base index representing its cross-talk partner
*
@ -240,8 +252,9 @@ public class BaseUtils {
* @param base [AaCcGgTt]
* @return C, A, T, G, or '.' if the base can't be understood
*/
@Deprecated
static public char crossTalkPartnerBase(char base) {
return baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
return (char)baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
}
/**
@ -260,7 +273,6 @@ public class BaseUtils {
}
}
public static byte getSecondBase(final SAMRecord read, int offset) {
byte base2 = '.'; // todo -- what should the default char really be?
@ -268,7 +280,7 @@ public class BaseUtils {
byte[] compressedQuals = (byte[]) read.getAttribute("SQ");
if (offset != -1 && compressedQuals != null && compressedQuals.length == read.getReadLength()) {
base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset]));
base2 = BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset]));
}
}
else if (read.getAttribute("E2") != null) {
@ -290,6 +302,7 @@ public class BaseUtils {
* @param base the base [AaCcGgTt]
* @return the transition of the base, or the input base if it's not one of the understood ones
*/
// todo -- are these right? Put into recalibator if really color space specific
static public char transition(char base) {
switch (base) {
case 'A':
@ -310,6 +323,7 @@ public class BaseUtils {
* @param base the base [AaCcGgTt]
* @return the transversion of the base, or the input base if it's not one of the understood ones
*/
// todo -- are these right? Put into recalibator if really color space specific
static public char transversion(char base) {
switch (base) {
case 'A':
@ -330,7 +344,7 @@ public class BaseUtils {
* @param base the base [AaCcGgTt]
* @return the complementary base, or the input base if it's not one of the understood ones
*/
static public char simpleComplement(char base) {
static public byte simpleComplement(byte base) {
switch (base) {
case 'A':
case 'a': return 'T';
@ -344,6 +358,11 @@ public class BaseUtils {
}
}
@Deprecated
static public char simpleComplement(char base) {
return (char)simpleComplement((byte)base);
}
/**
* Reverse complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
*
@ -382,6 +401,7 @@ public class BaseUtils {
* @param bases the char array of bases
* @return the reverse complement of the char byte array
*/
@Deprecated
static public char[] simpleReverseComplement(char[] bases) {
char[] rcbases = new char[bases.length];
@ -398,6 +418,7 @@ public class BaseUtils {
* @param bases the char array of bases
* @return the complement of the base char array
*/
@Deprecated
static public char[] simpleComplement(char[] bases) {
char[] rcbases = new char[bases.length];
@ -429,47 +450,6 @@ public class BaseUtils {
return new String(simpleComplement(bases.getBytes()));
}
/**
* Reverse a byte array of bases
*
* @param bases the byte array of bases
* @return the reverse of the base byte array
*/
static public byte[] reverse(byte[] bases) {
byte[] rcbases = new byte[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = bases[bases.length - i - 1];
}
return rcbases;
}
/**
* Reverse an int array of bases
*
* @param bases the int array of bases
* @return the reverse of the base int array
*/
static public int[] reverse(int[] bases) {
int[] rcbases = new int[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = bases[bases.length - i - 1];
}
return rcbases;
}
/**
* Reverse (NOT reverse-complement!!) a string
*
* @param bases input string
* @return the reversed string
*/
static public String reverse(String bases) {
return new String( reverse( bases.getBytes() )) ;
}
/**
* For the most frequent base in the sequence, return the percentage of the read it constitutes.
@ -498,6 +478,12 @@ public class BaseUtils {
return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length);
}
// --------------------------------------------------------------------------------
//
// random bases
//
// --------------------------------------------------------------------------------
/**
* Return a random base index (A=0, C=1, G=2, T=3).
*
@ -529,7 +515,7 @@ public class BaseUtils {
*
* @return a random base (A, C, G, T)
*/
static public char getRandomBase() {
static public byte getRandomBase() {
return getRandomBase('.');
}
@ -539,7 +525,7 @@ public class BaseUtils {
* @param excludeBase the base to exclude
* @return a random base, excluding the one specified (A, C, G, T)
*/
static public char getRandomBase(char excludeBase) {
static public byte getRandomBase(char excludeBase) {
return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
}
@ -587,15 +573,6 @@ public class BaseUtils {
}
return period;
}
public static byte[] charSeq2byteSeq(char[] seqIn) {
byte[] seqOut = new byte[seqIn.length];
for ( int i = 0; i < seqIn.length; i++ ) {
seqOut[i] = (byte)seqIn[i];
}
return seqOut;
}
}
/* code snippet for testing sequencePeriod():

View File

@ -215,6 +215,6 @@ public class QualityUtils {
* @return the reverse of the quality array
*/
static public byte[] reverseQualityArray( byte[] quals ) {
return BaseUtils.reverse(quals); // no sense in duplicating functionality
return Utils.reverse(quals); // no sense in duplicating functionality
}
}

View File

@ -420,6 +420,56 @@ public class Utils {
return "{" + join(", ", pairs) + "}";
}
/**
* Reverse a byte array of bases
*
* @param bases the byte array of bases
* @return the reverse of the base byte array
*/
static public byte[] reverse(byte[] bases) {
byte[] rcbases = new byte[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = bases[bases.length - i - 1];
}
return rcbases;
}
/**
* Reverse an int array of bases
*
* @param bases the int array of bases
* @return the reverse of the base int array
*/
static public int[] reverse(int[] bases) {
int[] rcbases = new int[bases.length];
for (int i = 0; i < bases.length; i++) {
rcbases[i] = bases[bases.length - i - 1];
}
return rcbases;
}
/**
* Reverse (NOT reverse-complement!!) a string
*
* @param bases input string
* @return the reversed string
*/
static public String reverse(String bases) {
return new String( reverse( bases.getBytes() )) ;
}
public static byte[] charSeq2byteSeq(char[] seqIn) {
byte[] seqOut = new byte[seqIn.length];
for ( int i = 0; i < seqIn.length; i++ ) {
seqOut[i] = (byte)seqIn[i];
}
return seqOut;
}
}

View File

@ -74,7 +74,7 @@ public class DupUtils {
}
private static Pair<Byte, Byte> baseProbs2BaseAndQual(double[] probs, int maxQScore) {
char bestBase = 0;
byte bestBase = 0;
double bestProb = Double.NEGATIVE_INFINITY;
double sumProbs = 0;
@ -101,13 +101,13 @@ public class DupUtils {
// System.out.printf("encoded Q %2d%n", qual);
// }
return new Pair<Byte, Byte>((byte)bestBase, qual);
return new Pair<Byte, Byte>(bestBase, qual);
}
private static void print4BaseQuals(String header, double[] probs) {
System.out.printf("%s log10(P(b)) is ", header);
for ( int i = 0; i < 4; i++ ) {
System.out.printf("%c=%+.8f ", BaseUtils.baseIndexToSimpleBase(i), probs[i]);
System.out.printf("%c=%+.8f ", (char)BaseUtils.baseIndexToSimpleBase(i), probs[i]);
}
System.out.printf("%n");
}

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.utils.pileup.*;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.Utils;
public class AlignmentUtils {
@ -348,7 +349,7 @@ public class AlignmentUtils {
public static byte [] getQualsInCycleOrder(SAMRecord read) {
if ( isReadUnmapped(read) || ! read.getReadNegativeStrandFlag() ) return read.getBaseQualities();
return BaseUtils.reverse(read.getBaseQualities());
return Utils.reverse(read.getBaseQualities());
}
/** Takes the alignment of the read sequence <code>readSeq</code> to the reference sequence <code>refSeq</code>