From 534486a2549451d3101b37fa6fe519a28ac9ba59 Mon Sep 17 00:00:00 2001 From: chartl Date: Thu, 17 Sep 2009 14:00:25 +0000 Subject: [PATCH] Output formatting changed: - summary output now reported as a percentage rather than proportion; 2 sigfigs - fixed minor bug where FNR was calculated over total calls rather than total variant sites - column headers are_now_contiguous_strings - spacing fixed - "No Call" separated from "Ref Call" as its own column git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1644 348d0f76-0448-11de-a6fe-93d51630548a --- .../PooledGenotypeConcordance.java | 59 ++++++++++++------- 1 file changed, 37 insertions(+), 22 deletions(-) diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java index cfc6a4d96..b0237a8ec 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/PooledGenotypeConcordance.java @@ -89,9 +89,9 @@ public class PooledGenotypeConcordance extends BasicVariantAnalysis implements G } class PooledConcordanceTable { - private final int CALL_INDECES = 5; + private final int CALL_INDECES = 6; private final int TRUTH_INDECES = 4; - private final int NO_CALL = 0; + private final int NO_CALL = 5; private final int REF_CALL = 0; // synonym private final int VARIANT_CALL_NONHAPMAP = 1; private final int VARIANT_CALL_MATCH = 2; @@ -299,35 +299,40 @@ class PooledConcordanceTable { nFullHapmapRefSites += table[TRUTH_REF][callIndex]; } - int nRefsCalledCorrectly = table[TRUTH_REF][NO_CALL]; + int nRefsCalledCorrectly = table[TRUTH_REF][REF_CALL]; + int nHapmapRefsNotCalled = table[TRUTH_REF][NO_CALL]; int nRefsCalledAsSNP = table[TRUTH_REF][VARIANT_CALL]; int nSNPsCalledCorrectly = table[TRUTH_VAR][VARIANT_CALL_MATCH]; int nSNPsCalledIncorrectly = table[TRUTH_VAR][VARIANT_CALL_MISMATCH]; int nSNPsCalledAsRef = table[TRUTH_VAR][REF_CALL]; + int nHapmapSNPsNotCalled = table[TRUTH_VAR][NO_CALL]; int nSNPsOnHapmapSNP = table[TRUTH_VAR][VARIANT_CALL_MATCH] + table[TRUTH_VAR][VARIANT_CALL_MISMATCH]; int nSNPsAtNonHapmap = table[NO_TRUTH_DATA][VARIANT_CALL_NONHAPMAP]; int nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals = table[TRUTH_UNKNOWN][VARIANT_CALL_UNKNOWN]; - out.add(String.format("| Total Number of SNP Calls: %d", nSNPCallSites)); - out.add(String.format("| Total SNP calls on non-Hapmap sites %d", nSNPsAtNonHapmap)); - out.add(String.format("| Number of Hapmap Sites: %d", nHapmapSites)); + out.add(String.format("| Total Number of SNP Calls:\t\t%d", nSNPCallSites)); + out.add(String.format("| Total SNP calls on non-Hapmap sites\t\t%d", nSNPsAtNonHapmap)); + out.add(String.format("| Number of Hapmap Sites:\t\t%d", nHapmapSites)); out.add("| Data on Hapmap Reference Sites"); - out.add(String.format("| \t+ Sites where all Hapmap chips were ref: %d", nFullHapmapRefSites)); - out.add(String.format("| \t\t- Reference sites correctly called: %d (%f)", nRefsCalledCorrectly, ((double)nRefsCalledCorrectly/nFullHapmapRefSites))); - out.add(String.format("| \t\t- Reference sites called as variant: %d (%f)", nRefsCalledAsSNP, ((double)nRefsCalledAsSNP/nFullHapmapRefSites))); + out.add(String.format("| \t+ Sites where all Hapmap chips were ref: \t\t%d", nFullHapmapRefSites)); + out.add(String.format("| \t\t- Reference sites correctly called:\t\t%d\t(%d%%)", nRefsCalledCorrectly, divideToPercent(nRefsCalledCorrectly, nFullHapmapRefSites))); + out.add(String.format("| \t\t- Reference sites called as variant:\t\t%d\t(%d%%)", nRefsCalledAsSNP, divideToPercent(nRefsCalledAsSNP, nFullHapmapRefSites))); + out.add(String.format("| \t\t- Reference sites not confidently called:\t%d\t(%f)", nHapmapRefsNotCalled, ((double)nHapmapRefsNotCalled)/nFullHapmapRefSites)); out.add(String.format("| \t+ Sites where all seen Hapmap chips were ref, but not all Hapmap chips available: %d", nUnknownHapmapSites)); - out.add(String.format("| \t\t- Putative reference sites called ref: %d (%f)", table[TRUTH_UNKNOWN][REF_CALL], ((double)table[TRUTH_UNKNOWN][REF_CALL]/nUnknownHapmapSites))); - out.add(String.format("| \t\t- Putative reference sites called SNP: %d (%f)", nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, ((double)nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals/nUnknownHapmapSites))); + out.add(String.format("| \t\t- Putative reference sites called ref:\t\t\t%d\t(%d%%)", table[TRUTH_UNKNOWN][REF_CALL], divideToPercent(table[TRUTH_UNKNOWN][REF_CALL], nUnknownHapmapSites))); + out.add(String.format("| \t\t- Putative reference sites called SNP:\t\t\t%d\t(%d%%)", nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, divideToPercent(nSNPsAtHapmapWithRefDataOnSubsetOfIndividuals, nUnknownHapmapSites))); + out.add(String.format("| \t\t- Putative reference sites not confidently called:\t%d\t(%d%%)", table[TRUTH_UNKNOWN][NO_CALL], divideToPercent(table[TRUTH_UNKNOWN][NO_CALL], nUnknownHapmapSites))); out.add("| Data on Hapmap Variant Sites"); - out.add(String.format("| \t+ Number of Hapmap SNP Sites: %d", nHapmapSNPSites)); - out.add(String.format("| \t\t- SNP sites incorrectly called ref: %d (%f)", nSNPsCalledAsRef, ((double)nSNPsCalledAsRef/nHapmapSNPSites))); - out.add(String.format("| \t+ SNP calls on Hapmap SNP Sites: %d", nSNPsOnHapmapSNP)); - out.add(String.format("| \t\t- SNP sites correctly called SNP: %d (%f)", nSNPsCalledCorrectly, ((double)nSNPsCalledCorrectly/nSNPsOnHapmapSNP))); - out.add(String.format("| \t\t- SNP sites called a different base: %d (%f)", nSNPsCalledIncorrectly, ((double)nSNPsCalledIncorrectly/nSNPsOnHapmapSNP))); - out.add(String.format("| Calls on reference N: %d", variantCallsAtRefN)); + out.add(String.format("| \t+ Number of Hapmap SNP Sites:\t\t\t%d", nHapmapSNPSites)); + out.add(String.format("| \t\t- SNP sites incorrectly called ref:\t%d\t(%d%%)", nSNPsCalledAsRef, divideToPercent(nSNPsCalledAsRef, nHapmapSNPSites))); + out.add(String.format("| \t\t- SNP sites not confidently called:\t%d\t(%d%%)", nHapmapSNPsNotCalled, divideToPercent(nHapmapSNPsNotCalled, nHapmapSNPSites))); + out.add(String.format("| \t+ SNP calls on Hapmap SNP Sites:\t\t%d", nSNPsOnHapmapSNP)); + out.add(String.format("| \t\t- SNP sites correctly called SNP:\t%d\t(%d%%)", nSNPsCalledCorrectly, divideToPercent(nSNPsCalledCorrectly, nSNPsOnHapmapSNP))); + out.add(String.format("| \t\t- SNP sites called a different base:\t%d\t(%d%%)", nSNPsCalledIncorrectly, divideToPercent(nSNPsCalledIncorrectly, nSNPsOnHapmapSNP))); + out.add(String.format("| Calls on reference N:\t\t%d", variantCallsAtRefN)); out.add("----------------------- Output By Allele Frequency ------------------------"); out.add(""); - out.add("FREQUENCY \tFALSE POSITIVES\tTRUE NEGATIVES\tFALSE NEGATIVES\tTRUE POSITIVES\tMISCALLS\tFALSE NEGATIVE RATE\tFALSE POSITIVE RATE"); + out.add("FREQUENCY \tFALSE_POSITIVES\tTRUE_NEGATIVES\tFALSE_NEGATIVES\tTRUE_POSITIVES\tMISCALLS\tNO_CALLS\tFALSE_NEGATIVE_RATE\tFALSE_POSITIVE_RATE"); for( int i = 0; i < getLargestOutputAlleleFrequencyIndex(); i ++) { double freq = freqIndexToFrequency(i); int nRefsCalledAsSNPFreq = tableByHMFrequency[i][TRUTH_REF][VARIANT_CALL]; @@ -335,11 +340,17 @@ class PooledConcordanceTable { int nSNPsCalledIncorrectlyFreq = tableByHMFrequency[i][TRUTH_VAR][VARIANT_CALL_MISMATCH]; int nSNPsCalledCorrectlyFreq = tableByHMFrequency[i][TRUTH_VAR][VARIANT_CALL_MATCH]; int nSNPsCalledAsRefFreq = tableByHMFrequency[i][TRUTH_VAR][REF_CALL]; + int nNoCallsAtFreq = tableByHMFrequency[i][TRUTH_VAR][NO_CALL] + tableByHMFrequency[i][TRUTH_REF][NO_CALL]; int nSnpsCalledAtFreq = nSNPsCalledIncorrectlyFreq + nSNPsCalledCorrectlyFreq + nSNPsCalledAsRefFreq; - double fnrate = ((double) nSNPsCalledAsRefFreq / nSnpsCalledAtFreq); - double fprate = ((double) nRefsCalledAsSNPFreq / (nRefsCalledAsSNPFreq + nRefsCalledCorrectFreq)); - out.add(String.format("%f\t%d\t\t%d\t\t%d\t\t\t%d\t%d\t\t%f\t\t\t%f", - freq, nRefsCalledAsSNPFreq, nRefsCalledCorrectFreq, nSNPsCalledAsRefFreq, nSNPsCalledCorrectlyFreq, nSNPsCalledIncorrectlyFreq, fnrate, fprate)); + int nSNPsAtFreq = 0; + for( int j = 0; j < CALL_INDECES; j ++ ) { + nSNPsAtFreq += tableByHMFrequency[i][TRUTH_VAR][j]; + } + int nSNPsNoCallFreq = tableByHMFrequency[i][TRUTH_VAR][NO_CALL]; + double fnrate = ((double) (nSNPsNoCallFreq + nSNPsCalledAsRefFreq) / (nSNPsAtFreq)); + double fprate = ((double) nRefsCalledAsSNPFreq / (nRefsCalledAsSNPFreq + nRefsCalledCorrectFreq + tableByHMFrequency[i][TRUTH_REF][NO_CALL])); + out.add(String.format("%f\t%d\t\t%d\t\t%d\t\t\t%d\t%d\t\t%d\t\t%f\t\t\t%f", + freq, nRefsCalledAsSNPFreq, nRefsCalledCorrectFreq, nSNPsCalledAsRefFreq, nSNPsCalledCorrectlyFreq, nSNPsCalledIncorrectlyFreq, nNoCallsAtFreq, fnrate, fprate)); } return out; @@ -357,4 +368,8 @@ class PooledConcordanceTable { return freqIndex; } + + public int divideToPercent( int numerator, int denominator ) { + return (int) Math.floor(100.0*((double) numerator)/ denominator ); + } }