Minor fixes to avoid crashes vs CG indel files:
- Add count for complex events, not just insertions and deletions
- Handle correctly cases of large indels falling out of bounds of histogram array: added a count of indels ouf of bounds and avoid exceptions.

2) Cosmetic fix for R script assessing UG calling performance: draw red y=x line on top of Simulated vs Estimated AC to get a better view of under/over-estimation of AC.



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4758 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
delangel 2010-11-30 21:08:25 +00:00
parent af84462f3e
commit 2ac938fe4e
2 changed files with 27 additions and 7 deletions

View File

@ -49,9 +49,10 @@ for ( i in 1:(dim(results)[1]) ) {
results[i,]$specificity = x$specificity
}
for ( depth in DEPTHS )
for ( depth in DEPTHS ) {
boxplot(called.AC ~ sim.AC, data = subset(d, called.DP == depth * NS), main = paste("Depth of coverage ", depth), xlab = "Simulation AC", ylab = "Called AC", outwex=0.5, col = "cornflowerblue")
abline(a=0,b=1,col="red",lwd=3)
}
print(results)
par(mfcol=c(2,1))

View File

@ -45,7 +45,7 @@ public class IndelStatistics extends VariantEvaluator {
private static final int INDEL_SIZE_LIMIT = 100;
private static final int NUM_SCALAR_COLUMNS = 8;
private static final int NUM_SCALAR_COLUMNS = 10;
static int len2Index(int ind) {
return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS;
@ -67,6 +67,9 @@ public class IndelStatistics extends VariantEvaluator {
COLUMN_KEYS[5] = "number_het_deletions";
COLUMN_KEYS[6] = "number_homozygous_deletions";
COLUMN_KEYS[7] = "number of homozygous reference sites";
COLUMN_KEYS[8] = "number of complex events";
COLUMN_KEYS[9] = "number of long indels";
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
}
@ -132,7 +135,7 @@ public class IndelStatistics extends VariantEvaluator {
*/
public void incrValue(VariantContext vc) {
int eventLength = 0;
boolean isInsertion = false;
boolean isInsertion = false, isDeletion = false;
if ( vc.isInsertion() ) {
eventLength = vc.getAlternateAllele(0).length();
@ -141,8 +144,18 @@ public class IndelStatistics extends VariantEvaluator {
} else if ( vc.isDeletion() ) {
indelSummary.get(ALL_SAMPLES_KEY)[2]++;
eventLength = -vc.getReference().length();
isDeletion = true;
}
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
else {
indelSummary.get(ALL_SAMPLES_KEY)[8]++;
}
// make sure event doesn't overstep array boundaries
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
else
indelSummary.get(ALL_SAMPLES_KEY)[9]++;
for( final String sample : vc.getGenotypes().keySet() ) {
if ( indelSummary.containsKey(sample) ) {
@ -153,10 +166,16 @@ public class IndelStatistics extends VariantEvaluator {
if (isInsertion) {
indelSummary.get(sample)[1]++;
}
else
else if (isDeletion)
indelSummary.get(sample)[2]++;
else
indelSummary.get(sample)[8]++;
// update histogram
indelSummary.get(sample)[len2Index(eventLength)]++;
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
indelSummary.get(sample)[len2Index(eventLength)]++;
else
indelSummary.get(sample)[9]++;
if (g.isHet())
if (isInsertion)