Added a method to determine the fraction of a sequence that's taken up by the most frequent base.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@781 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
bdf772f017
commit
83e1454a11
|
|
@ -160,4 +160,31 @@ public class BaseUtils {
|
||||||
|
|
||||||
return rcbases;
|
return rcbases;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For the most frequent base in the sequence, return the percentage of the read it constitutes.
|
||||||
|
*
|
||||||
|
* @param sequence the read sequence
|
||||||
|
* @return the percentage of the read that's made up of the most frequent base
|
||||||
|
*/
|
||||||
|
static public double mostFrequentBaseFraction(byte[] sequence) {
|
||||||
|
int[] baseCounts = new int[4];
|
||||||
|
|
||||||
|
for ( byte base : sequence ) {
|
||||||
|
int baseIndex = simpleBaseToBaseIndex((char) base);
|
||||||
|
|
||||||
|
if (baseIndex >= 0) {
|
||||||
|
baseCounts[baseIndex]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int mostFrequentBaseIndex = 0;
|
||||||
|
for (int baseIndex = 1; baseIndex < 4; baseIndex++) {
|
||||||
|
if (baseCounts[baseIndex] > baseCounts[mostFrequentBaseIndex]) {
|
||||||
|
mostFrequentBaseIndex = baseIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ((double) baseCounts[mostFrequentBaseIndex])/((double) sequence.length);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue