From 8b2d387643604e0e96a042de2bd1c9c287cabd5e Mon Sep 17 00:00:00 2001 From: chartl Date: Fri, 15 Oct 2010 19:07:43 +0000 Subject: [PATCH] Added in an eval module that calculates the dispersion histograms between eval and comp (e.g. M_{i,j} = # of times eval observed to have AC i, comp AC j -- for af it's i/100 vs j/100 ) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4507 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval/GenotypeConcordance.java | 10 +- .../AlleleFrequencyComparison.java | 134 ++++++++++++++++++ 2 files changed, 139 insertions(+), 5 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AlleleFrequencyComparison.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java index 7c24a356e..02a576598 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/GenotypeConcordance.java @@ -404,15 +404,15 @@ public class GenotypeConcordance extends VariantEvaluator implements StandardEva } private int getAC(VariantContext vc) { - if ( vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass().isAssignableFrom(List.class) ) { + if ( List.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) { return ((List) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY)).get(0); - } else if ( vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass().isAssignableFrom(Integer.class)) { + } else if ( Integer.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass())) { return (Integer) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY); - } else if ( vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass().isAssignableFrom(String.class)) { + } else if ( String.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) { // two ways of parsing String ac = (String) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY); if ( ac.startsWith("[") ) { - return Integer.parseInt(ac.replaceAll("[","").replaceAll("]","")); + return Integer.parseInt(ac.replaceAll("\\[","").replaceAll("\\]","")); } else { try { return Integer.parseInt(ac); @@ -421,7 +421,7 @@ public class GenotypeConcordance extends VariantEvaluator implements StandardEva } } } else { - throw new UserException(String.format("The format of the AC field does not appear to be of integer-list or String format")); + throw new UserException(String.format("The format of the AC field does not appear to be of integer-list or String format, class was %s",vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass())); } } } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AlleleFrequencyComparison.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AlleleFrequencyComparison.java new file mode 100755 index 000000000..16331ac03 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval/AlleleFrequencyComparison.java @@ -0,0 +1,134 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFConstants; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.report.tags.Analysis; +import org.broadinstitute.sting.utils.report.tags.DataPoint; +import org.broadinstitute.sting.utils.report.utils.TableType; + +import java.util.List; + +/** + * + */ + +@Analysis(name = "Allele Frequency Comparison", description = "Compare allele frequency and counts between eval and comp") +public class AlleleFrequencyComparison extends VariantEvaluator { + private static int MAX_AC_COUNT = 100; // todo -- command line argument? + + @DataPoint(description="Counts of eval frequency versus comp frequency") + AFTable afTable = new AFTable(); + + @DataPoint(description="Counts of eval AC versus comp AC") + ACTable acTable = new ACTable(MAX_AC_COUNT); + + public boolean enabled() { return true; } + + public int getComparisonOrder() { return 2; } + + public String getName() { return "Allele Frequency Comparison"; } + + public AlleleFrequencyComparison(VariantEvalWalker parent) { + super(parent); + } + + public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) { + if ( ! (isValidVC(eval) && isValidVC(comp)) ) { + return null; + } else { + afTable.update(getAF(eval),getAF(comp)); + acTable.update(getAC(eval),getAC(comp)); + } + + return null; // there is nothing interesting + } + + private static boolean isValidVC(final VariantContext vc) { + return (vc != null && !vc.isFiltered() && vc.getAlternateAlleles().size() == 1 && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) && vc.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)); + } + + private static double getAF(VariantContext vc) { + return ((List) vc.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY)).get(0); + } + + private static int getAC(VariantContext vc) { + return ((List) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY)).get(0); + } +} + +class AFTable implements TableType { + + protected int[][] afCounts = new int[101][101]; + + public Object[] getRowKeys() { + String[] afKeys = new String[101]; + for ( int f = 0; f < 101; f ++ ) { + afKeys[f] = String.format("%.2f",(f+0.0)/100.0); + } + + return afKeys; + } + + public Object[] getColumnKeys() { + return getRowKeys(); // nice thing about symmetric tables + } + + public Object getCell(int i, int j) { + return afCounts[i][j]; + } + + public String getName() { + return "Allele Frequency Concordance"; + } + + public void update(double eval, double comp) { + afCounts[af2index(eval)][af2index(comp)]++; + } + + private int af2index(double d) { + return (int) Math.round(100*d); + } +} + +class ACTable implements TableType { + protected int[][] acCounts; + protected int maxAC; + + public ACTable(int acMaximum) { + maxAC = acMaximum; + acCounts = new int[acMaximum+1][acMaximum+1]; + } + + public Object[] getRowKeys() { + String[] acKeys = new String[maxAC+1]; + for ( int i = 0 ; i <= maxAC ; i ++ ) { + acKeys[i] = String.format("%d",i); + } + + return acKeys; + } + + public Object[] getColumnKeys() { + return getRowKeys(); + } + + public Object getCell(int i, int j) { + return acCounts[i][j]; + } + + public String getName() { + return "Allele Counts Concordance"; + } + + public void update(int eval, int comp) { + eval = eval > maxAC ? maxAC : eval; + comp = comp > maxAC ? maxAC : comp; + + acCounts[eval][comp]++; + } + +}