From 83e1454a11493f84b6bc3a324a117bd3f18e121a Mon Sep 17 00:00:00 2001 From: kiran Date: Thu, 21 May 2009 20:35:31 +0000 Subject: [PATCH] Added a method to determine the fraction of a sequence that's taken up by the most frequent base. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@781 348d0f76-0448-11de-a6fe-93d51630548a --- .../broadinstitute/sting/utils/BaseUtils.java | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 9519d175e..29335b333 100644 --- a/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -160,4 +160,31 @@ public class BaseUtils { return rcbases; } + + /** + * For the most frequent base in the sequence, return the percentage of the read it constitutes. + * + * @param sequence the read sequence + * @return the percentage of the read that's made up of the most frequent base + */ + static public double mostFrequentBaseFraction(byte[] sequence) { + int[] baseCounts = new int[4]; + + for ( byte base : sequence ) { + int baseIndex = simpleBaseToBaseIndex((char) base); + + if (baseIndex >= 0) { + baseCounts[baseIndex]++; + } + } + + int mostFrequentBaseIndex = 0; + for (int baseIndex = 1; baseIndex < 4; baseIndex++) { + if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) { + mostFrequentBaseIndex = baseIndex; + } + } + + return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length); + } }