Minor fixes to avoid crashes vs CG indel files: - Add count for complex events, not just insertions and deletions - Handle correctly cases of large indels falling out of bounds of histogram array: added a count of indels ouf of bounds and avoid exceptions. 2) Cosmetic fix for R script assessing UG calling performance: draw red y=x line on top of Simulated vs Estimated AC to get a better view of under/over-estimation of AC. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4758 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
af84462f3e
commit
2ac938fe4e
|
|
@ -49,9 +49,10 @@ for ( i in 1:(dim(results)[1]) ) {
|
||||||
results[i,]$specificity = x$specificity
|
results[i,]$specificity = x$specificity
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( depth in DEPTHS )
|
for ( depth in DEPTHS ) {
|
||||||
boxplot(called.AC ~ sim.AC, data = subset(d, called.DP == depth * NS), main = paste("Depth of coverage ", depth), xlab = "Simulation AC", ylab = "Called AC", outwex=0.5, col = "cornflowerblue")
|
boxplot(called.AC ~ sim.AC, data = subset(d, called.DP == depth * NS), main = paste("Depth of coverage ", depth), xlab = "Simulation AC", ylab = "Called AC", outwex=0.5, col = "cornflowerblue")
|
||||||
|
abline(a=0,b=1,col="red",lwd=3)
|
||||||
|
}
|
||||||
print(results)
|
print(results)
|
||||||
|
|
||||||
par(mfcol=c(2,1))
|
par(mfcol=c(2,1))
|
||||||
|
|
|
||||||
|
|
@ -45,7 +45,7 @@ public class IndelStatistics extends VariantEvaluator {
|
||||||
|
|
||||||
|
|
||||||
private static final int INDEL_SIZE_LIMIT = 100;
|
private static final int INDEL_SIZE_LIMIT = 100;
|
||||||
private static final int NUM_SCALAR_COLUMNS = 8;
|
private static final int NUM_SCALAR_COLUMNS = 10;
|
||||||
|
|
||||||
static int len2Index(int ind) {
|
static int len2Index(int ind) {
|
||||||
return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS;
|
return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS;
|
||||||
|
|
@ -67,6 +67,9 @@ public class IndelStatistics extends VariantEvaluator {
|
||||||
COLUMN_KEYS[5] = "number_het_deletions";
|
COLUMN_KEYS[5] = "number_het_deletions";
|
||||||
COLUMN_KEYS[6] = "number_homozygous_deletions";
|
COLUMN_KEYS[6] = "number_homozygous_deletions";
|
||||||
COLUMN_KEYS[7] = "number of homozygous reference sites";
|
COLUMN_KEYS[7] = "number of homozygous reference sites";
|
||||||
|
COLUMN_KEYS[8] = "number of complex events";
|
||||||
|
COLUMN_KEYS[9] = "number of long indels";
|
||||||
|
|
||||||
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
|
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
|
||||||
COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
|
COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
|
||||||
}
|
}
|
||||||
|
|
@ -132,7 +135,7 @@ public class IndelStatistics extends VariantEvaluator {
|
||||||
*/
|
*/
|
||||||
public void incrValue(VariantContext vc) {
|
public void incrValue(VariantContext vc) {
|
||||||
int eventLength = 0;
|
int eventLength = 0;
|
||||||
boolean isInsertion = false;
|
boolean isInsertion = false, isDeletion = false;
|
||||||
|
|
||||||
if ( vc.isInsertion() ) {
|
if ( vc.isInsertion() ) {
|
||||||
eventLength = vc.getAlternateAllele(0).length();
|
eventLength = vc.getAlternateAllele(0).length();
|
||||||
|
|
@ -141,8 +144,18 @@ public class IndelStatistics extends VariantEvaluator {
|
||||||
} else if ( vc.isDeletion() ) {
|
} else if ( vc.isDeletion() ) {
|
||||||
indelSummary.get(ALL_SAMPLES_KEY)[2]++;
|
indelSummary.get(ALL_SAMPLES_KEY)[2]++;
|
||||||
eventLength = -vc.getReference().length();
|
eventLength = -vc.getReference().length();
|
||||||
|
isDeletion = true;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[8]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure event doesn't overstep array boundaries
|
||||||
|
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
|
||||||
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
|
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[9]++;
|
||||||
|
|
||||||
|
|
||||||
for( final String sample : vc.getGenotypes().keySet() ) {
|
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||||
if ( indelSummary.containsKey(sample) ) {
|
if ( indelSummary.containsKey(sample) ) {
|
||||||
|
|
@ -153,10 +166,16 @@ public class IndelStatistics extends VariantEvaluator {
|
||||||
if (isInsertion) {
|
if (isInsertion) {
|
||||||
indelSummary.get(sample)[1]++;
|
indelSummary.get(sample)[1]++;
|
||||||
}
|
}
|
||||||
else
|
else if (isDeletion)
|
||||||
indelSummary.get(sample)[2]++;
|
indelSummary.get(sample)[2]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[8]++;
|
||||||
|
|
||||||
// update histogram
|
// update histogram
|
||||||
|
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
|
||||||
indelSummary.get(sample)[len2Index(eventLength)]++;
|
indelSummary.get(sample)[len2Index(eventLength)]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[9]++;
|
||||||
|
|
||||||
if (g.isHet())
|
if (g.isHet())
|
||||||
if (isInsertion)
|
if (isInsertion)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue