From 727822adb45a9b4da8fac00716e3585b2bfeffa7 Mon Sep 17 00:00:00 2001 From: depristo Date: Thu, 20 May 2010 14:05:13 +0000 Subject: [PATCH] BaseUtils has more clear distinction between byte and char routines. All char routines are @Depreciated now. Please use bytes. Better organization of reverse(), now in Utils not BaseUtils. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3400 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/alignment/Alignment.java | 3 +- .../sting/alignment/bwa/BWTFiles.java | 3 +- .../alignment/bwa/java/BWAJavaAligner.java | 3 +- .../gatk/walkers/fasta/BamToFastqWalker.java | 3 +- .../SimpleIndelCalculationModel.java | 2 +- .../multisamplecaller/MultiSampleCaller.java | 4 +- ...seTransitionTableCalculatorJavaWalker.java | 2 +- .../PairedQualityScoreCountsWalker.java | 3 +- .../walkers/ReadErrorRateWalker.java | 8 +- .../walkers/SnpCallRateByCoverageWalker.java | 2 +- .../diagnostics/ComputeConfusionMatrix.java | 2 +- .../sting/playground/tools/BamToFastq.java | 3 +- .../broadinstitute/sting/utils/BaseUtils.java | 95 +++++++------------ .../sting/utils/QualityUtils.java | 2 +- .../org/broadinstitute/sting/utils/Utils.java | 50 ++++++++++ .../sting/utils/duplicates/DupUtils.java | 6 +- .../sting/utils/sam/AlignmentUtils.java | 3 +- 17 files changed, 114 insertions(+), 80 deletions(-) diff --git a/java/src/org/broadinstitute/sting/alignment/Alignment.java b/java/src/org/broadinstitute/sting/alignment/Alignment.java index 1bc1465fa..49ae57f0a 100644 --- a/java/src/org/broadinstitute/sting/alignment/Alignment.java +++ b/java/src/org/broadinstitute/sting/alignment/Alignment.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.alignment; import net.sf.samtools.*; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; /** * Represents an alignment of a read to a site in the reference genome. @@ -203,7 +204,7 @@ public class Alignment { read.setCigar(alignment.getCigar()); if(alignment.isNegativeStrand()) { read.setReadBases(BaseUtils.simpleReverseComplement(read.getReadBases())); - read.setBaseQualities(BaseUtils.reverse(read.getBaseQualities())); + read.setBaseQualities(Utils.reverse(read.getBaseQualities())); } read.setAttribute("NM",alignment.getEditDistance()); read.setAttribute("MD",alignment.getMismatchingPositions()); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java b/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java index ac1f046bd..c79ffb4a3 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/BWTFiles.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.alignment.bwa; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.alignment.reference.packing.PackUtils; import org.broadinstitute.sting.alignment.reference.bwt.BWT; import org.broadinstitute.sting.alignment.reference.bwt.BWTWriter; @@ -173,7 +174,7 @@ public class BWTFiles { writeEncodedReferenceSequence(normalizedReferenceSequence,pacFile,bwtFile,saFile); // Write the encoded files for the reverse version of this reference sequence. - byte[] reverseReferenceSequence = BaseUtils.reverse(normalizedReferenceSequence); + byte[] reverseReferenceSequence = Utils.reverse(normalizedReferenceSequence); rpacFile = File.createTempFile("bwt",".rpac"); rbwtFile = File.createTempFile("bwt",".rbwt"); diff --git a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java index 95544fb9e..81186c53e 100644 --- a/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java +++ b/java/src/org/broadinstitute/sting/alignment/bwa/java/BWAJavaAligner.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.alignment.bwa.BWAAligner; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.Alignment; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import java.io.File; import java.util.*; @@ -133,7 +134,7 @@ public class BWAJavaAligner extends BWAAligner { List successfulMatches = new ArrayList(); Byte[] uncomplementedBases = normalizeBases(read.getReadBases()); - Byte[] complementedBases = normalizeBases(BaseUtils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases()))); + Byte[] complementedBases = normalizeBases(Utils.reverse(BaseUtils.simpleReverseComplement(read.getReadBases()))); List forwardLowerBounds = LowerBound.create(uncomplementedBases,forwardBWT); List reverseLowerBounds = LowerBound.create(complementedBases,reverseBWT); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java index cc0b7fe13..5ddfb9dfc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/fasta/BamToFastqWalker.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.commandline.Argument; import net.sf.samtools.SAMRecord; @@ -69,7 +70,7 @@ public class BamToFastqWalker extends ReadWalker { } else { out.println(BaseUtils.simpleReverseComplement(read.getReadString())); out.println("+"); - out.println(BaseUtils.reverse(read.getBaseQualityString())); + out.println(Utils.reverse(read.getBaseQualityString())); } if ( sqbw != null ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java index 56212c79f..a1500557c 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SimpleIndelCalculationModel.java @@ -102,7 +102,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel { // calculate the sum of quality scores for each base ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup(); - List> all_events = pileup.getEventStringsWithCounts(BaseUtils.charSeq2byteSeq(ref)); + List> all_events = pileup.getEventStringsWithCounts(Utils.charSeq2byteSeq(ref)); for ( Pair p : all_events ) { if ( p.second > bestIndelCount ) { bestIndelCount = p.second; diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java b/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java index 473602584..fdc1a61cb 100644 --- a/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/multisamplecaller/MultiSampleCaller.java @@ -841,8 +841,8 @@ public class MultiSampleCaller extends LocusWalker getCorrectlyOrientedBaseQualities(SAMRecord read) { - byte[] quals = read.getReadNegativeStrandFlag() ? BaseUtils.reverse(read.getBaseQualities()) : read.getBaseQualities(); + byte[] quals = read.getReadNegativeStrandFlag() ? Utils.reverse(read.getBaseQualities()) : read.getBaseQualities(); return new Pair(quals, read.getFirstOfPairFlag()); } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java index b57f85f69..92aca4643 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/ReadErrorRateWalker.java @@ -100,18 +100,18 @@ public class ReadErrorRateWalker extends ReadWalker, Strin coverage, ((float) coverage)/((float) reads.size()), goodIterations, - BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()), + (char)BaseUtils.baseIndexToSimpleBase(ref.getBaseIndex()), call == null ? "./." : call.getGenotypeString(), vcCall.getGenotypeString(), call == null ? 0 : call.getType() == vcCall.getType() ? 1 : 0, diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/ComputeConfusionMatrix.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/ComputeConfusionMatrix.java index 9f4238dd9..a4c8318b9 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/ComputeConfusionMatrix.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/diagnostics/ComputeConfusionMatrix.java @@ -96,7 +96,7 @@ public class ComputeConfusionMatrix extends LocusWalker { } } - String fwAltBase = String.format("%c", BaseUtils.baseIndexToSimpleBase(altBaseIndex)); + String fwAltBase = String.format("%c", (char)BaseUtils.baseIndexToSimpleBase(altBaseIndex)); //String rcAltBase = BaseUtils.simpleComplement(fwAltBase); for (int readIndex = 0; readIndex < context.getReads().size(); readIndex++) { diff --git a/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java b/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java index f1080c065..7abc5d160 100644 --- a/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java +++ b/java/src/org/broadinstitute/sting/playground/tools/BamToFastq.java @@ -9,6 +9,7 @@ import net.sf.samtools.SAMRecord; import java.io.*; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; /** * Created by IntelliJ IDEA. @@ -68,7 +69,7 @@ public class BamToFastq extends CommandLineProgram { } else { out.println(BaseUtils.simpleReverseComplement(read.getReadString())); out.println('+'); - out.println(BaseUtils.reverse(read.getBaseQualityString())); + out.println(Utils.reverse(read.getBaseQualityString())); } } inReader.close(); diff --git a/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/java/src/org/broadinstitute/sting/utils/BaseUtils.java index cdbda9a6a..b1c2b3cc5 100644 --- a/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -8,6 +8,9 @@ import java.util.Random; * BaseUtils contains some basic utilities for manipulating nucleotides. */ public class BaseUtils { + // + // todo -- we need a generalized base abstraction using the Base enum. + // public final static char[] BASES = { 'A', 'C', 'G', 'T' }; public final static char[] EXTENDED_BASES = { 'A', 'C', 'G', 'T', 'N', 'D' }; @@ -79,7 +82,7 @@ public class BaseUtils { private BaseUtils() {} static public boolean basesAreEqual(byte base1, byte base2) { - return simpleBaseToBaseIndex((char)base1) == simpleBaseToBaseIndex((char)base2); + return simpleBaseToBaseIndex(base1) == simpleBaseToBaseIndex(base2); } @@ -89,6 +92,7 @@ public class BaseUtils { * @param code * @return 0, 1, 2, 3, or -1 if the base can't be understood */ + @Deprecated static public char[] iupacToBases(char code) { char[] bases = new char[2]; switch (code) { @@ -151,6 +155,7 @@ public class BaseUtils { * @param base [AaCcGgTt] * @return 0, 1, 2, 3, or -1 if the base can't be understood */ + @Deprecated static public int simpleBaseToBaseIndex(char base) { switch (base) { case '*': // the wildcard character counts as an A @@ -170,6 +175,7 @@ public class BaseUtils { } } + @Deprecated static public int extendedBaseToBaseIndex(char base) { switch (base) { case 'd': @@ -185,6 +191,7 @@ public class BaseUtils { return simpleBaseToBaseIndex((char)base); } + @Deprecated static public boolean isRegularBase(char base) { return simpleBaseToBaseIndex(base) != -1; } @@ -193,6 +200,7 @@ public class BaseUtils { return isRegularBase((char)base); } + @Deprecated static public boolean isNBase(char base) { return isNBase((byte)base); } @@ -201,14 +209,13 @@ public class BaseUtils { return base == 'N'; } - /** * Converts a base index to a simple base * * @param baseIndex 0, 1, 2, 3 * @return A, C, G, T, or '.' if the index can't be understood */ - static public char baseIndexToSimpleBase(int baseIndex) { + static public byte baseIndexToSimpleBase(int baseIndex) { switch (baseIndex) { case 0: return 'A'; case 1: return 'C'; @@ -218,6 +225,11 @@ public class BaseUtils { } } + @Deprecated + static public char baseIndexToSimpleBaseAsChar(int baseIndex) { + return (char)baseIndexToSimpleBase(baseIndex); + } + /** * Converts a base index to a base index representing its cross-talk partner * @@ -240,8 +252,9 @@ public class BaseUtils { * @param base [AaCcGgTt] * @return C, A, T, G, or '.' if the base can't be understood */ + @Deprecated static public char crossTalkPartnerBase(char base) { - return baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base))); + return (char)baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base))); } /** @@ -260,7 +273,6 @@ public class BaseUtils { } } - public static byte getSecondBase(final SAMRecord read, int offset) { byte base2 = '.'; // todo -- what should the default char really be? @@ -268,7 +280,7 @@ public class BaseUtils { byte[] compressedQuals = (byte[]) read.getAttribute("SQ"); if (offset != -1 && compressedQuals != null && compressedQuals.length == read.getReadLength()) { - base2 = (byte) BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset])); + base2 = BaseUtils.baseIndexToSimpleBase(QualityUtils.compressedQualityToBaseIndex(compressedQuals[offset])); } } else if (read.getAttribute("E2") != null) { @@ -290,6 +302,7 @@ public class BaseUtils { * @param base the base [AaCcGgTt] * @return the transition of the base, or the input base if it's not one of the understood ones */ + // todo -- are these right? Put into recalibator if really color space specific static public char transition(char base) { switch (base) { case 'A': @@ -310,6 +323,7 @@ public class BaseUtils { * @param base the base [AaCcGgTt] * @return the transversion of the base, or the input base if it's not one of the understood ones */ + // todo -- are these right? Put into recalibator if really color space specific static public char transversion(char base) { switch (base) { case 'A': @@ -330,7 +344,7 @@ public class BaseUtils { * @param base the base [AaCcGgTt] * @return the complementary base, or the input base if it's not one of the understood ones */ - static public char simpleComplement(char base) { + static public byte simpleComplement(byte base) { switch (base) { case 'A': case 'a': return 'T'; @@ -344,6 +358,11 @@ public class BaseUtils { } } + @Deprecated + static public char simpleComplement(char base) { + return (char)simpleComplement((byte)base); + } + /** * Reverse complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form) * @@ -382,6 +401,7 @@ public class BaseUtils { * @param bases the char array of bases * @return the reverse complement of the char byte array */ + @Deprecated static public char[] simpleReverseComplement(char[] bases) { char[] rcbases = new char[bases.length]; @@ -398,6 +418,7 @@ public class BaseUtils { * @param bases the char array of bases * @return the complement of the base char array */ + @Deprecated static public char[] simpleComplement(char[] bases) { char[] rcbases = new char[bases.length]; @@ -429,47 +450,6 @@ public class BaseUtils { return new String(simpleComplement(bases.getBytes())); } - /** - * Reverse a byte array of bases - * - * @param bases the byte array of bases - * @return the reverse of the base byte array - */ - static public byte[] reverse(byte[] bases) { - byte[] rcbases = new byte[bases.length]; - - for (int i = 0; i < bases.length; i++) { - rcbases[i] = bases[bases.length - i - 1]; - } - - return rcbases; - } - - /** - * Reverse an int array of bases - * - * @param bases the int array of bases - * @return the reverse of the base int array - */ - static public int[] reverse(int[] bases) { - int[] rcbases = new int[bases.length]; - - for (int i = 0; i < bases.length; i++) { - rcbases[i] = bases[bases.length - i - 1]; - } - - return rcbases; - } - - /** - * Reverse (NOT reverse-complement!!) a string - * - * @param bases input string - * @return the reversed string - */ - static public String reverse(String bases) { - return new String( reverse( bases.getBytes() )) ; - } /** * For the most frequent base in the sequence, return the percentage of the read it constitutes. @@ -498,6 +478,12 @@ public class BaseUtils { return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length); } + // -------------------------------------------------------------------------------- + // + // random bases + // + // -------------------------------------------------------------------------------- + /** * Return a random base index (A=0, C=1, G=2, T=3). * @@ -529,7 +515,7 @@ public class BaseUtils { * * @return a random base (A, C, G, T) */ - static public char getRandomBase() { + static public byte getRandomBase() { return getRandomBase('.'); } @@ -539,7 +525,7 @@ public class BaseUtils { * @param excludeBase the base to exclude * @return a random base, excluding the one specified (A, C, G, T) */ - static public char getRandomBase(char excludeBase) { + static public byte getRandomBase(char excludeBase) { return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase))); } @@ -587,15 +573,6 @@ public class BaseUtils { } return period; } - - public static byte[] charSeq2byteSeq(char[] seqIn) { - byte[] seqOut = new byte[seqIn.length]; - for ( int i = 0; i < seqIn.length; i++ ) { - seqOut[i] = (byte)seqIn[i]; - } - return seqOut; - } - } /* code snippet for testing sequencePeriod(): diff --git a/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/java/src/org/broadinstitute/sting/utils/QualityUtils.java index 827b5916a..86537ce4f 100755 --- a/java/src/org/broadinstitute/sting/utils/QualityUtils.java +++ b/java/src/org/broadinstitute/sting/utils/QualityUtils.java @@ -215,6 +215,6 @@ public class QualityUtils { * @return the reverse of the quality array */ static public byte[] reverseQualityArray( byte[] quals ) { - return BaseUtils.reverse(quals); // no sense in duplicating functionality + return Utils.reverse(quals); // no sense in duplicating functionality } } diff --git a/java/src/org/broadinstitute/sting/utils/Utils.java b/java/src/org/broadinstitute/sting/utils/Utils.java index 547858fd5..9780acc9f 100755 --- a/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/java/src/org/broadinstitute/sting/utils/Utils.java @@ -420,6 +420,56 @@ public class Utils { return "{" + join(", ", pairs) + "}"; } + + /** + * Reverse a byte array of bases + * + * @param bases the byte array of bases + * @return the reverse of the base byte array + */ + static public byte[] reverse(byte[] bases) { + byte[] rcbases = new byte[bases.length]; + + for (int i = 0; i < bases.length; i++) { + rcbases[i] = bases[bases.length - i - 1]; + } + + return rcbases; + } + + /** + * Reverse an int array of bases + * + * @param bases the int array of bases + * @return the reverse of the base int array + */ + static public int[] reverse(int[] bases) { + int[] rcbases = new int[bases.length]; + + for (int i = 0; i < bases.length; i++) { + rcbases[i] = bases[bases.length - i - 1]; + } + + return rcbases; + } + + /** + * Reverse (NOT reverse-complement!!) a string + * + * @param bases input string + * @return the reversed string + */ + static public String reverse(String bases) { + return new String( reverse( bases.getBytes() )) ; + } + + public static byte[] charSeq2byteSeq(char[] seqIn) { + byte[] seqOut = new byte[seqIn.length]; + for ( int i = 0; i < seqIn.length; i++ ) { + seqOut[i] = (byte)seqIn[i]; + } + return seqOut; + } } diff --git a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java index 09d914b3a..4f04bbecc 100644 --- a/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java +++ b/java/src/org/broadinstitute/sting/utils/duplicates/DupUtils.java @@ -74,7 +74,7 @@ public class DupUtils { } private static Pair baseProbs2BaseAndQual(double[] probs, int maxQScore) { - char bestBase = 0; + byte bestBase = 0; double bestProb = Double.NEGATIVE_INFINITY; double sumProbs = 0; @@ -101,13 +101,13 @@ public class DupUtils { // System.out.printf("encoded Q %2d%n", qual); // } - return new Pair((byte)bestBase, qual); + return new Pair(bestBase, qual); } private static void print4BaseQuals(String header, double[] probs) { System.out.printf("%s log10(P(b)) is ", header); for ( int i = 0; i < 4; i++ ) { - System.out.printf("%c=%+.8f ", BaseUtils.baseIndexToSimpleBase(i), probs[i]); + System.out.printf("%c=%+.8f ", (char)BaseUtils.baseIndexToSimpleBase(i), probs[i]); } System.out.printf("%n"); } diff --git a/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java b/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java index 9b0dcaf8b..d3d519395 100644 --- a/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java +++ b/java/src/org/broadinstitute/sting/utils/sam/AlignmentUtils.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.*; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.Utils; public class AlignmentUtils { @@ -348,7 +349,7 @@ public class AlignmentUtils { public static byte [] getQualsInCycleOrder(SAMRecord read) { if ( isReadUnmapped(read) || ! read.getReadNegativeStrandFlag() ) return read.getBaseQualities(); - return BaseUtils.reverse(read.getBaseQualities()); + return Utils.reverse(read.getBaseQualities()); } /** Takes the alignment of the read sequence readSeq to the reference sequence refSeq