Output improvements to GenotypeConcordance calculations
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1331 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
72dda0b85c
commit
56f769f2ce
|
|
@ -21,16 +21,12 @@ import java.util.ArrayList;
|
||||||
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
public class GenotypeConcordance extends BasicVariantAnalysis implements GenotypeAnalysis {
|
||||||
private String dbName;
|
private String dbName;
|
||||||
|
|
||||||
private static final int TRUTH_REF = 0;
|
private static final int REF = 0;
|
||||||
private static final int TRUTH_VAR_HET = 1;
|
private static final int VAR_HET = 1;
|
||||||
private static final int TRUTH_VAR_HOM = 2;
|
private static final int VAR_HOM = 2;
|
||||||
private static final int TRUTH_UNKNOWN = 3;
|
private static final int UNKNOWN = 3;
|
||||||
|
private static final int NO_CALL = 3; // synonym
|
||||||
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
|
private static final String[] TRUTH_NAMES = {"IS_REF", "IS_VAR_HET", "IS_VAR_HOM", "UNKNOWN"};
|
||||||
|
|
||||||
private static final int CALL_REF = 0;
|
|
||||||
private static final int CALL_VAR_HET = 1;
|
|
||||||
private static final int CALL_VAR_HOM = 2;
|
|
||||||
private static final int NO_CALL = 3;
|
|
||||||
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
|
private static final String[] CALL_NAMES = {"CALLED_REF", "CALLED_VAR_HET", "CALLED_VAR_HOM", "NO_CALL"};
|
||||||
|
|
||||||
private int[][] table = new int[4][4];
|
private int[][] table = new int[4][4];
|
||||||
|
|
@ -54,29 +50,29 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
|
|
||||||
int truthIndex, callIndex;
|
int truthIndex, callIndex;
|
||||||
if ( chip == null )
|
if ( chip == null )
|
||||||
truthIndex = TRUTH_UNKNOWN;
|
truthIndex = UNKNOWN;
|
||||||
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
|
else if ( chip.isReference() && Utils.countOccurrences(ref, chip.getGenotype().get(0)) == chip.getGenotype().get(0).length() )
|
||||||
truthIndex = TRUTH_REF;
|
truthIndex = REF;
|
||||||
else if ( isHet(chip) )
|
else if ( isHet(chip) )
|
||||||
truthIndex = TRUTH_VAR_HET;
|
truthIndex = VAR_HET;
|
||||||
else
|
else
|
||||||
truthIndex = TRUTH_VAR_HOM;
|
truthIndex = VAR_HOM;
|
||||||
|
|
||||||
// todo -- FIXME on countOccurences
|
// todo -- FIXME on countOccurences
|
||||||
if ( eval == null )
|
if ( eval == null )
|
||||||
callIndex = NO_CALL;
|
callIndex = NO_CALL;
|
||||||
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
|
else if ( eval.isReference() && Utils.countOccurrences(ref, eval.getGenotype().get(0)) == eval.getGenotype().get(0).length() )
|
||||||
callIndex = CALL_REF;
|
callIndex = REF;
|
||||||
else if ( isHet(eval) )
|
else if ( isHet(eval) )
|
||||||
callIndex = CALL_VAR_HET;
|
callIndex = VAR_HET;
|
||||||
else
|
else
|
||||||
callIndex = CALL_VAR_HOM;
|
callIndex = VAR_HOM;
|
||||||
|
|
||||||
if ( chip != null || eval != null ) {
|
if ( chip != null || eval != null ) {
|
||||||
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
//System.out.printf("TEST: %d/%d %s vs. %s%n", truthIndex, callIndex, chip, eval);
|
||||||
table[truthIndex][callIndex]++;
|
table[truthIndex][callIndex]++;
|
||||||
truth_totals[truthIndex]++;
|
truth_totals[truthIndex]++;
|
||||||
calls_totals[callIndex]++;
|
if ( callIndex != NO_CALL ) calls_totals[callIndex]++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,53 +82,84 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void addCalledGenotypeConcordance(List<String> s) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("CALLED_GENOTYPE_CONCORDANCE\t");
|
||||||
|
for ( int i = 0; i < 4; i++ ) {
|
||||||
|
int nConcordantCallsI = table[i][i];
|
||||||
|
String value = "N/A";
|
||||||
|
if ( i != UNKNOWN )
|
||||||
|
value = String.format("%s\t", cellPercent(nConcordantCallsI, calls_totals[i]-table[UNKNOWN][i]));
|
||||||
|
sb.append(value);
|
||||||
|
}
|
||||||
|
s.add(sb.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many overall calls where made that aren't NO_CALLS or UNKNOWNS?
|
||||||
|
*/
|
||||||
|
private int getNCalled() {
|
||||||
|
int n = 0;
|
||||||
|
for ( int i = 0; i < 4; i++ )
|
||||||
|
for ( int j = 0; j < 4; j++ )
|
||||||
|
if ( i != NO_CALL && j != NO_CALL ) n += table[i][j];
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addOverallStats(List<String> s) {
|
||||||
|
int nConcordantRefCalls = table[REF][REF];
|
||||||
|
int nConcordantHetCalls = table[VAR_HET][VAR_HET];
|
||||||
|
int nConcordantVarHomCalls = table[VAR_HOM][VAR_HOM];
|
||||||
|
int nVarCalls = table[VAR_HOM][VAR_HET] + table[VAR_HOM][VAR_HOM] + table[VAR_HET][VAR_HET] + table[VAR_HET][VAR_HOM];
|
||||||
|
int nConcordantVarCalls = nConcordantHetCalls + nConcordantVarHomCalls;
|
||||||
|
int nConcordantCalls = nConcordantRefCalls + nConcordantVarCalls;
|
||||||
|
int nTrueVar = truth_totals[VAR_HET] + truth_totals[VAR_HOM];
|
||||||
|
int nCalled = getNCalled();
|
||||||
|
s.add(String.format("VARIANT_SENSITIVITY %s", cellPercent(nVarCalls, nTrueVar)));
|
||||||
|
s.add(String.format("VARIANT_CONCORDANCE %s", cellPercent(nConcordantVarCalls, nVarCalls)));
|
||||||
|
s.add(String.format("OVERALL_CONCORDANCE %s", cellPercent(nConcordantCalls, nCalled)));
|
||||||
|
}
|
||||||
|
|
||||||
public List<String> done() {
|
public List<String> done() {
|
||||||
List<String> s = new ArrayList<String>();
|
List<String> s = new ArrayList<String>();
|
||||||
s.add(String.format("name %s", dbName));
|
s.add(String.format("name %s", dbName));
|
||||||
s.add(String.format("\t\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
s.add(String.format("TRUTH_STATE\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL\t\tTOTALS\tTRUE_GENOTYPE_CONCORDANCE\tGENOTYPE_SENSITIVITY"));
|
||||||
for (int i=0; i < 4; i++) {
|
for (int i=0; i < 4; i++) {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
sb.append(TRUTH_NAMES[i] + "\t");
|
sb.append(String.format("%15s ", TRUTH_NAMES[i]));
|
||||||
for (int j=0; j < 4; j++)
|
for (int j=0; j < 4; j++)
|
||||||
sb.append(table[i][j] +" (" + cellPercent(table[i][j], truth_totals[i]) + ")\t\t");
|
sb.append(String.format("%9d ", table[i][j]));
|
||||||
sb.append(truth_totals[i]);
|
sb.append(String.format("%9d ", truth_totals[i]));
|
||||||
if ( i == TRUTH_VAR_HET || i == TRUTH_VAR_HOM ) {
|
if ( i == VAR_HET || i == VAR_HOM ) {
|
||||||
sb.append("\t"+cellPercent(table[i][i], table[i][CALL_REF]+table[i][CALL_VAR_HET]+table[i][CALL_VAR_HOM]) + "\t\t\t");
|
sb.append(String.format("\t%s\t\t", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
|
||||||
sb.append(cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i]));
|
sb.append(String.format("%s", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
|
||||||
|
} else {
|
||||||
|
sb.append("\tN/A\t\t\tN/A");
|
||||||
}
|
}
|
||||||
s.add(sb.toString());
|
s.add(sb.toString());
|
||||||
}
|
}
|
||||||
s.add("VARIANT_SENSITIVITY: " + cellPercent(table[TRUTH_VAR_HET][CALL_VAR_HET]+table[TRUTH_VAR_HET][CALL_VAR_HOM]+table[TRUTH_VAR_HOM][CALL_VAR_HET]+table[TRUTH_VAR_HOM][CALL_VAR_HOM], truth_totals[TRUTH_VAR_HET]+truth_totals[TRUTH_VAR_HOM]));
|
|
||||||
s.add("\n");
|
addCalledGenotypeConcordance(s);
|
||||||
s.add(String.format("\t\tCALLED_REF\tCALLED_VAR_HET\tCALLED_VAR_HOM\tNO_CALL"));
|
addOverallStats(s);
|
||||||
for (int i=0; i < 4; i++) {
|
|
||||||
StringBuffer sb = new StringBuffer();
|
|
||||||
sb.append(TRUTH_NAMES[i] + "\t");
|
|
||||||
for (int j=0; j < 4; j++)
|
|
||||||
sb.append(table[i][j] + " (" + cellPercent(table[i][j], calls_totals[j]) + ")\t\t");
|
|
||||||
s.add(sb.toString());
|
|
||||||
}
|
|
||||||
s.add(String.format("TOTALS\t%d\t\t%d\t\t%d\t\t%d", calls_totals[CALL_REF], calls_totals[CALL_VAR_HET], calls_totals[CALL_VAR_HOM], calls_totals[NO_CALL]));
|
|
||||||
s.add("\n");
|
|
||||||
for (int i=0; i < 4; i++) {
|
for (int i=0; i < 4; i++) {
|
||||||
for (int j=0; j < 4; j++) {
|
for (int j=0; j < 4; j++) {
|
||||||
s.add(TRUTH_NAMES[i]+"_"+CALL_NAMES[j]+"_COUNT "+table[i][j]);
|
s.add(String.format("%s_%s_%s %d", TRUTH_NAMES[i], CALL_NAMES[j], "NO_SITES", table[i][j]));
|
||||||
s.add(TRUTH_NAMES[i]+"_"+CALL_NAMES[j]+"_PERCENT_OF_TRUTH "+cellPercent(table[i][j], truth_totals[i]));
|
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_TRUTH", cellPercent(table[i][j], truth_totals[i])));
|
||||||
s.add(TRUTH_NAMES[i]+"_"+CALL_NAMES[j]+"_PERCENT_OF_CALLS "+cellPercent(table[i][j], calls_totals[j]));
|
s.add(String.format("%s_%s_%s %s", TRUTH_NAMES[i], CALL_NAMES[j], "PERCENT_OF_CALLS", cellPercent(table[i][j], calls_totals[j])));
|
||||||
}
|
}
|
||||||
if ( i == TRUTH_VAR_HET || i == TRUTH_VAR_HOM ) {
|
if ( i == VAR_HET || i == VAR_HOM ) {
|
||||||
s.add(TRUTH_NAMES[i]+"_TRUE_GENOTYPE_CONCORDANCE "+cellPercent(table[i][i], table[i][CALL_REF]+table[i][CALL_VAR_HET]+table[i][CALL_VAR_HOM]));
|
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "TRUE_GENOTYPE_CONCORDANCE", cellPercent(table[i][i], table[i][REF]+table[i][VAR_HET]+table[i][VAR_HOM])));
|
||||||
s.add(TRUTH_NAMES[i]+"_GENOTYPE_SENSITIVITY "+cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i]));
|
s.add(String.format("%s_%s %s", TRUTH_NAMES[i], "GENOTYPE_SENSITIVITY", cellPercent(truth_totals[i]-table[i][NO_CALL], truth_totals[i])));
|
||||||
} }
|
}
|
||||||
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String cellPercent(int count, int total) {
|
private static String cellPercent(int count, int total) {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
if ( total == 0 )
|
total = Math.max(total, 0);
|
||||||
sb.append(0);
|
sb.append(String.format("%.2f", (100.0*count)/total));
|
||||||
else
|
|
||||||
sb.append(100*count/total);
|
|
||||||
sb.append("%");
|
sb.append("%");
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -147,4 +174,4 @@ public class GenotypeConcordance extends BasicVariantAnalysis implements Genotyp
|
||||||
|
|
||||||
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
|
return genotype.get(0).charAt(0) != genotype.get(0).charAt(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -217,7 +217,7 @@ public class VariantEvalWalker extends RefWalker<Integer, Integer> {
|
||||||
stream.printf("%sAnalysis class %s%n", COMMENT_STRING, analysis );
|
stream.printf("%sAnalysis class %s%n", COMMENT_STRING, analysis );
|
||||||
stream.printf("%sAnalysis time %s%n", COMMENT_STRING, now );
|
stream.printf("%sAnalysis time %s%n", COMMENT_STRING, now );
|
||||||
for ( String line : analysis.done()) {
|
for ( String line : analysis.done()) {
|
||||||
stream.printf("%s %s%n", COMMENT_STRING, line);
|
stream.printf("%s%s%n", COMMENT_STRING, line);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue