From 58f0ecff89ace63e874538574b1e490a73d2c30c Mon Sep 17 00:00:00 2001 From: kiran Date: Thu, 27 Jan 2011 22:09:59 +0000 Subject: [PATCH] Fixes to support evaluations with TableType elements - each such object now gets a separate entry in the output table. Added codon degeneracy stratification. Handle null elements in reports (useful for debugging). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5101 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/report/GATKReportColumn.java | 8 +- .../sting/gatk/report/GATKReportTable.java | 4 +- .../newvarianteval/NewVariantEvalWalker.java | 92 ++++++++++++----- .../evaluators/GenotypeConcordance.java | 25 +++-- .../MendelianViolationEvaluator.java | 20 ++-- .../evaluators/SimpleMetricsByAC.java | 16 ++- .../stratifications/Degeneracy.java | 98 +++++++++++++++++++ .../util/NewEvaluationContext.java | 40 ++++---- 8 files changed, 232 insertions(+), 71 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/Degeneracy.java diff --git a/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index ecec286aa..440597754 100755 --- a/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -66,10 +66,12 @@ public class GATKReportColumn extends TreeMap { int maxWidth = columnName.length(); for (Object obj : this.values()) { - int width = obj.toString().length(); + if (obj != null) { + int width = obj.toString().length(); - if (width > maxWidth) { - maxWidth = width; + if (width > maxWidth) { + maxWidth = width; + } } } diff --git a/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java b/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java index c5203f254..ab08e65fa 100755 --- a/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java +++ b/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java @@ -108,7 +108,7 @@ public class GATKReportTable { * @return true if the name is valid, false if otherwise */ private boolean isValidName(String name) { - Pattern p = Pattern.compile("[^a-zA-Z0-9_]"); + Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]"); Matcher m = p.matcher(name); return !m.find(); @@ -546,7 +546,7 @@ public class GATKReportTable { Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey); if (needsPadding) { out.printf(" "); } - out.printf(columnWidths.get(columnName), obj.toString()); + out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString()); needsPadding = true; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java index e8a84e075..eba88c972 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/NewVariantEvalWalker.java @@ -17,6 +17,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; +import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche; +import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.StandardEval; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.RequiredStratification; @@ -30,12 +32,12 @@ import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.Stat import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.classloader.PluginManager; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.report.utils.TableType; import org.broadinstitute.sting.utils.vcf.VCFUtils; +import java.io.File; import java.io.PrintStream; import java.lang.reflect.Field; import java.util.*; @@ -84,6 +86,15 @@ public class NewVariantEvalWalker extends RodWalker implements @Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false) protected double MIN_PHASE_QUALITY = 10.0; + @Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false) + protected String FAMILY_STRUCTURE; + + @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false) + protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50; + + @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false) + private String TRANCHE_FILENAME = null; + // Variables private Set jexlExpressions = new TreeSet(); private Set compNames = new TreeSet(); @@ -317,14 +328,7 @@ public class NewVariantEvalWalker extends RodWalker implements for (Field field : datamap.keySet()) { field.setAccessible(true); - if (field.get(vei) instanceof TableType) { - TableType t = (TableType) field.get(vei); - - for ( Object o : t.getColumnKeys() ) { - String c = (String) o; - table.addColumn(c, 0.0); - } - } else { + if (! (field.get(vei) instanceof TableType) ) { table.addColumn(field.getName(), 0.0); } } @@ -381,6 +385,16 @@ public class NewVariantEvalWalker extends RodWalker implements // Initialize select expressions jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)); + // Add select expressions for anything in the tranches file + if ( TRANCHE_FILENAME != null ) { + // we are going to build a few select names automatically from the tranches file + for ( Tranche t : Tranche.readTraches(new File(TRANCHE_FILENAME)) ) { + logger.info("Adding select for all variant above the pCut of : " + t); + SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod)); + SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr)); + } + } + // Initialize the set of stratifications and evaluations to use stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE); Set> evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE); @@ -617,6 +631,22 @@ public class NewVariantEvalWalker extends RodWalker implements return MIN_PHASE_QUALITY; } + /** + * Return the family structure to be used with the MendelianViolationEvaluator module + * @return the family structure string + */ + public String getFamilyStructure() { + return FAMILY_STRUCTURE; + } + + /** + * Return the mendelian violation qual threshold to be used with the MendelianViolationEvaluator module + * @return the mendelian violation qual threshold + */ + public double getMendelianViolationQualThreshold() { + return MENDELIAN_VIOLATION_QUAL_THRESHOLD; + } + /** * Collect relevant information from each variant in the supplied VCFs */ @@ -703,12 +733,10 @@ public class NewVariantEvalWalker extends RodWalker implements for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) { ve.finalizeEvaluation(); - GATKReportTable table = report.getTable(ve.getClass().getSimpleName()); AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); Map datamap = scanner.getData(); - // handle TableTypes for (Field field : datamap.keySet()) { try { field.setAccessible(true); @@ -716,6 +744,26 @@ public class NewVariantEvalWalker extends RodWalker implements if (field.get(ve) instanceof TableType) { TableType t = (TableType) field.get(ve); + String subTableName = ve.getClass().getSimpleName() + "." + field.getName(); + String subTableDesc = datamap.get(field).description(); + + report.addTable(subTableName, subTableDesc); + GATKReportTable table = report.getTable(subTableName); + + table.addPrimaryKey("entry", false); + table.addColumn(subTableName, subTableName); + + for ( VariantStratifier vs : stratificationObjects ) { + String columnName = vs.getClass().getSimpleName(); + + table.addColumn(columnName, "unknown"); + } + + for ( Object o : t.getColumnKeys() ) { + String c = (String) o; + table.addColumn(c, 0.0); + } + for (int row = 0; row < t.getRowKeys().length; row++) { String r = (String) t.getRowKeys()[row]; @@ -732,27 +780,19 @@ public class NewVariantEvalWalker extends RodWalker implements table.set(newStateKey, c, t.getCell(row, col)); } } - } - } catch (IllegalAccessException e) { - throw new StingException("IllegalAccessException: " + e); - } - } + } else { + GATKReportTable table = report.getTable(ve.getClass().getSimpleName()); - for ( VariantStratifier vs : stratificationObjects ) { - String columnName = vs.getClass().getSimpleName(); + for ( VariantStratifier vs : stratificationObjects ) { + String columnName = vs.getClass().getSimpleName(); - table.set(stateKey.toString(), columnName, stateKey.get(vs.getClass().getSimpleName())); - } + table.set(stateKey.toString(), columnName, stateKey.get(vs.getClass().getSimpleName())); + } - for (Field field : datamap.keySet()) { - try { - field.setAccessible(true); - - if ( !(field.get(ve) instanceof TableType) ) { table.set(stateKey.toString(), field.getName(), field.get(ve)); } } catch (IllegalAccessException e) { - throw new ReviewedStingException("Unable to access a data field in a VariantEval analysis module: " + e); + throw new StingException("IllegalAccessException: " + e); } } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/GenotypeConcordance.java index 354b61d9c..558a7d1d3 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/GenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/GenotypeConcordance.java @@ -246,7 +246,7 @@ public class GenotypeConcordance extends VariantEvaluator { private boolean warnedAboutValidationData = false; - public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) { + public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { String interesting = null; // sanity check that we at least have either eval or validation data @@ -321,7 +321,10 @@ public class GenotypeConcordance extends VariantEvaluator { sampleStats.incrValue(sample, truth, called); if ( evalAC != null && validationAC != null) { alleleCountStats.incrValue(evalAC,truth,called); - alleleCountStats.incrValue(validationAC,truth,called); + + //System.err.println(evalAC + " " + validationAC); + + //alleleCountStats.incrValue(validationAC,truth,called); } } } @@ -461,12 +464,18 @@ class SampleStats implements TableType { * @return a list of objects, in this case strings, that are the column names */ public Object[] getColumnKeys() { - return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call", - "n_ref/ref","n_ref/het","n_ref/hom", - "total_true_het","%_het/het","n_het/no-call", - "n_het/ref","n_het/het","n_het/hom", - "total_true_hom","%_hom/hom","n_hom/no-call", - "n_hom/ref","n_hom/het","n_hom/hom"}; +// return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call", +// "n_ref/ref","n_ref/het","n_ref/hom", +// "total_true_het","%_het/het","n_het/no-call", +// "n_het/ref","n_het/het","n_het/hom", +// "total_true_hom","%_hom/hom","n_hom/no-call", +// "n_hom/ref","n_hom/het","n_hom/hom"}; + return new String[]{"total_true_ref","pct_ref_vs_ref","n_ref_vs_no-call", + "n_ref_vs_ref","n_ref_vs_het","n_ref_vs_hom", + "total_true_het","pct_het_vs_het","n_het_vs_no-call", + "n_het_vs_ref","n_het_vs_het","n_het_vs_hom", + "total_true_hom","pct_hom_vs_hom","n_hom_vs_no-call", + "n_hom_vs_ref","n_hom_vs_het","n_hom_vs_hom"}; } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/MendelianViolationEvaluator.java index 7cb38bc92..01b711d7b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/MendelianViolationEvaluator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/MendelianViolationEvaluator.java @@ -6,6 +6,7 @@ import org.broad.tribble.util.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -60,6 +61,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { long KidHomVar_ParentHomRef; TrioStructure trio; + double mendelianViolationQualThreshold; private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)"); @@ -82,25 +84,21 @@ public class MendelianViolationEvaluator extends VariantEvaluator { } // todo: fix + public void initialize(NewVariantEvalWalker walker) { + trio = parseTrioDescription(walker.getFamilyStructure()); + mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold(); + } - //public MendelianViolationEvaluator(VariantEvalWalker parent) { - //super(parent); - - //if (enabled()) { - //trio = parseTrioDescription(parent.FAMILY_STRUCTURE); - //parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s", - //parent.FAMILY_STRUCTURE, trio.mom, trio.dad, trio.child)); - //} - //} public boolean enabled() { //return getVEWalker().FAMILY_STRUCTURE != null; - return false; + return true; } private double getQThreshold() { //return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred - return 0.0; + return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred + //return 0.0; } public String getName() { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SimpleMetricsByAC.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SimpleMetricsByAC.java index e1c6fefad..0a24f829e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SimpleMetricsByAC.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/evaluators/SimpleMetricsByAC.java @@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker; +import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.Degeneracy; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.Sample; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis; import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint; @@ -171,8 +172,19 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval @Override public boolean stateIsApplicable(StateKey stateKey) { - String className = Sample.class.getSimpleName(); + String sampleClassName = Sample.class.getSimpleName(); + String degeneracyClassName = Degeneracy.class.getSimpleName(); - return !(stateKey.containsKey(className) && !stateKey.get(className).equalsIgnoreCase("all")); + //return !(stateKey.containsKey(sampleClassName) && !stateKey.get(sampleClassName).equalsIgnoreCase("all")); + + if (stateKey.containsKey(sampleClassName) && !stateKey.get(sampleClassName).equalsIgnoreCase("all")) { + return false; + } + + if (stateKey.containsKey(degeneracyClassName) && !stateKey.get(degeneracyClassName).equalsIgnoreCase("all")) { + return false; + } + + return true; } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/Degeneracy.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/Degeneracy.java new file mode 100755 index 000000000..92d67df94 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/stratifications/Degeneracy.java @@ -0,0 +1,98 @@ +package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Set; + +public class Degeneracy extends VariantStratifier { + private ArrayList states; + + private HashMap degeneracies; + + @Override + public void initialize(Set jexlExpressions, Set compNames, Set knownNames, Set evalNames, Set sampleNames) { + states = new ArrayList(); + states.add("1-fold"); + states.add("2-fold"); + states.add("3-fold"); + states.add("4-fold"); + states.add("6-fold"); + states.add("all"); + + degeneracies = new HashMap(); + degeneracies.put("Ile", "3-fold"); + degeneracies.put("Leu", "6-fold"); + degeneracies.put("Val", "4-fold"); + degeneracies.put("Phe", "2-fold"); + degeneracies.put("Met", "1-fold"); + degeneracies.put("Cys", "2-fold"); + degeneracies.put("Ala", "4-fold"); + degeneracies.put("Gly", "4-fold"); + degeneracies.put("Pro", "4-fold"); + degeneracies.put("Thr", "4-fold"); + degeneracies.put("Ser", "6-fold"); + degeneracies.put("Tyr", "2-fold"); + degeneracies.put("Try", "1-fold"); + degeneracies.put("Trp", "1-fold"); + degeneracies.put("Gln", "2-fold"); + degeneracies.put("Asn", "2-fold"); + degeneracies.put("His", "2-fold"); + degeneracies.put("Glu", "2-fold"); + degeneracies.put("Asp", "2-fold"); + degeneracies.put("Lys", "2-fold"); + degeneracies.put("Arg", "6-fold"); + degeneracies.put("Stop", "3-fold"); + } + + public ArrayList getAllStates() { + return states; + } + + public ArrayList getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String sampleName) { + ArrayList relevantStates = new ArrayList(); + + relevantStates.add("all"); + + if (eval != null && eval.isVariant()) { + String type = null; + String aa = null; + + if (eval.getAttributeAsString("refseq.functionalClass") != null) { + type = eval.getAttributeAsString("refseq.functionalClass"); + aa = eval.getAttributeAsString("refseq.variantAA"); + } else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) { + int annotationId = 1; + String key; + + do { + key = String.format("refseq.functionalClass_%d", annotationId); + + String newtype = eval.getAttributeAsString(key); + + if ( newtype != null && + ( type == null || + ( type.equals("silent") && !newtype.equals("silent") ) || + ( type.equals("missense") && newtype.equals("nonsense") ) ) + ) { + type = newtype; + + String aakey = String.format("refseq.variantAA_%d", annotationId); + aa = eval.getAttributeAsString(aakey); + } + + annotationId++; + } while (eval.getAttributeAsString(key) != null); + } + + if (aa != null && degeneracies.containsKey(aa)) { + relevantStates.add(degeneracies.get(aa)); + } + } + + return relevantStates; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java index 11079870e..ed0e01a5e 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/newvarianteval/util/NewEvaluationContext.java @@ -52,29 +52,31 @@ public class NewEvaluationContext extends HashMap { public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) { for ( VariantEvaluator evaluation : evaluationInstances.values() ) { - // we always call update0 in case the evaluation tracks things like number of bases covered - //evaluation.update0(tracker, ref, context); + synchronized ( evaluation ) { + // we always call update0 in case the evaluation tracks things like number of bases covered + //evaluation.update0(tracker, ref, context); - // the other updateN methods don't see a null context - if ( tracker == null ) - continue; + // the other updateN methods don't see a null context + if ( tracker == null ) + continue; - // now call the single or paired update function - switch ( evaluation.getComparisonOrder() ) { - case 1: - if (eval != null) { - evaluation.update1(eval, tracker, ref, context); - } + // now call the single or paired update function + switch ( evaluation.getComparisonOrder() ) { + case 1: + if (eval != null) { + evaluation.update1(eval, tracker, ref, context); + } - break; - case 2: - if (eval != null) { - evaluation.update2(eval, comp, tracker, ref, context); - } + break; + case 2: + if (eval != null) { + evaluation.update2(eval, comp, tracker, ref, context); + } - break; - default: - throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); + break; + default: + throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation); + } } } }