RR bug: when determining the most common base at a position, break ties by which base has the highest sum of base qualities. Otherwise, sites with 1 Q2 N and 1 Q30 C are ending up as Ns in the consensus. I think perhaps we don't even care about which base has the most observations - it should just be determined by which has the highest sum of base qualities - but I'm not sure that's what users would expect.

This commit is contained in:
Eric Banks 2012-09-24 21:46:14 -04:00
parent 9464dfdbf2
commit 11a71e0390
1 changed files with 13 additions and 9 deletions

View File

@ -7,7 +7,7 @@ import java.util.EnumMap;
import java.util.Map;
/**
* An object to keep track of the number of occurences of each base and it's quality.
* An object to keep track of the number of occurrences of each base and it's quality.
*
* User: depristo
* Date: 4/8/11
@ -83,8 +83,6 @@ import java.util.Map;
}
}
@Ensures("result >= 0")
public int getCount(byte base) {
return getCount(BaseIndex.byteToBase(base));
@ -183,7 +181,7 @@ import java.util.Map;
public BaseIndex baseIndexWithMostCounts() {
BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (BaseIndex i : counts.keySet())
if (counts.get(i) > counts.get(maxI))
if (hasHigherCount(i, maxI))
maxI = i;
return maxI;
}
@ -192,17 +190,23 @@ import java.util.Map;
public BaseIndex baseIndexWithMostCountsWithoutIndels() {
BaseIndex mostCounts = MAX_BASE_INDEX_WITH_NO_COUNTS;
for (BaseIndex index : counts.keySet())
if (index.isNucleotide() && counts.get(index) > counts.get(mostCounts))
if (index.isNucleotide() && hasHigherCount(index, mostCounts))
mostCounts = index;
return mostCounts;
}
private boolean hasHigherCount(final BaseIndex targetIndex, final BaseIndex testIndex) {
final int targetCount = counts.get(targetIndex);
final int testCount = counts.get(testIndex);
return ( targetCount > testCount || (targetCount == testCount && sumQuals.get(targetIndex) > sumQuals.get(testIndex)) );
}
@Ensures("result >=0")
public int totalCountWithoutIndels() {
int sum = 0;
for (BaseIndex index : counts.keySet())
if (index.isNucleotide())
sum += counts.get(index);
for (Map.Entry<BaseIndex, Integer> entry : counts.entrySet())
if (entry.getKey().isNucleotide())
sum += entry.getValue();
return sum;
}
@ -222,6 +226,6 @@ import java.util.Map;
}
public Object[] countsArray() {
return (Object []) counts.values().toArray();
return counts.values().toArray();
}
}