Added a method to determine the fraction of a sequence that's taken up by the most frequent base.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@781 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2009-05-21 20:35:31 +00:00
parent bdf772f017
commit 83e1454a11
1 changed files with 27 additions and 0 deletions

View File

@ -160,4 +160,31 @@ public class BaseUtils {
return rcbases;
}
/**
* For the most frequent base in the sequence, return the percentage of the read it constitutes.
*
* @param sequence the read sequence
* @return the percentage of the read that's made up of the most frequent base
*/
static public double mostFrequentBaseFraction(byte[] sequence) {
int[] baseCounts = new int[4];
for ( byte base : sequence ) {
int baseIndex = simpleBaseToBaseIndex((char) base);
if (baseIndex >= 0) {
baseCounts[baseIndex]++;
}
}
int mostFrequentBaseIndex = 0;
for (int baseIndex = 1; baseIndex < 4; baseIndex++) {
if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) {
mostFrequentBaseIndex = baseIndex;
}
}
return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length);
}
}