Fixes to support evaluations with TableType elements - each such object now gets a separate entry in the output table. Added codon degeneracy stratification. Handle null elements in reports (useful for debugging).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5101 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kiran 2011-01-27 22:09:59 +00:00
parent a264b16358
commit 58f0ecff89
8 changed files with 232 additions and 71 deletions

View File

@ -66,10 +66,12 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
int maxWidth = columnName.length();
for (Object obj : this.values()) {
int width = obj.toString().length();
if (obj != null) {
int width = obj.toString().length();
if (width > maxWidth) {
maxWidth = width;
if (width > maxWidth) {
maxWidth = width;
}
}
}

View File

@ -108,7 +108,7 @@ public class GATKReportTable {
* @return true if the name is valid, false if otherwise
*/
private boolean isValidName(String name) {
Pattern p = Pattern.compile("[^a-zA-Z0-9_]");
Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]");
Matcher m = p.matcher(name);
return !m.find();
@ -546,7 +546,7 @@ public class GATKReportTable {
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
if (needsPadding) { out.printf(" "); }
out.printf(columnWidths.get(columnName), obj.toString());
out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
needsPadding = true;
}

View File

@ -17,6 +17,8 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.Tranche;
import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.StandardEval;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.RequiredStratification;
@ -30,12 +32,12 @@ import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.Stat
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.vcf.VCFUtils;
import java.io.File;
import java.io.PrintStream;
import java.lang.reflect.Field;
import java.util.*;
@ -84,6 +86,15 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
protected double MIN_PHASE_QUALITY = 10.0;
@Argument(shortName="family", doc="If provided, genotypes in will be examined for mendelian violations: this argument is a string formatted as dad+mom=child where these parameters determine which sample names are examined", required=false)
protected String FAMILY_STRUCTURE;
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
private String TRANCHE_FILENAME = null;
// Variables
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions = new TreeSet<VariantContextUtils.JexlVCMatchExp>();
private Set<String> compNames = new TreeSet<String>();
@ -317,14 +328,7 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
for (Field field : datamap.keySet()) {
field.setAccessible(true);
if (field.get(vei) instanceof TableType) {
TableType t = (TableType) field.get(vei);
for ( Object o : t.getColumnKeys() ) {
String c = (String) o;
table.addColumn(c, 0.0);
}
} else {
if (! (field.get(vei) instanceof TableType) ) {
table.addColumn(field.getName(), 0.0);
}
}
@ -381,6 +385,16 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
// Initialize select expressions
jexlExpressions.addAll(VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS));
// Add select expressions for anything in the tranches file
if ( TRANCHE_FILENAME != null ) {
// we are going to build a few select names automatically from the tranches file
for ( Tranche t : Tranche.readTraches(new File(TRANCHE_FILENAME)) ) {
logger.info("Adding select for all variant above the pCut of : " + t);
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
SELECT_NAMES.add(String.format("FDR-%.2f", t.fdr));
}
}
// Initialize the set of stratifications and evaluations to use
stratificationObjects = initializeStratificationObjects(NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
Set<Class<? extends VariantEvaluator>> evaluationObjects = initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
@ -617,6 +631,22 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
return MIN_PHASE_QUALITY;
}
/**
* Return the family structure to be used with the MendelianViolationEvaluator module
* @return the family structure string
*/
public String getFamilyStructure() {
return FAMILY_STRUCTURE;
}
/**
* Return the mendelian violation qual threshold to be used with the MendelianViolationEvaluator module
* @return the mendelian violation qual threshold
*/
public double getMendelianViolationQualThreshold() {
return MENDELIAN_VIOLATION_QUAL_THRESHOLD;
}
/**
* Collect relevant information from each variant in the supplied VCFs
*/
@ -703,12 +733,10 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
ve.finalizeEvaluation();
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
Map<Field, DataPoint> datamap = scanner.getData();
// handle TableTypes
for (Field field : datamap.keySet()) {
try {
field.setAccessible(true);
@ -716,6 +744,26 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
if (field.get(ve) instanceof TableType) {
TableType t = (TableType) field.get(ve);
String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
String subTableDesc = datamap.get(field).description();
report.addTable(subTableName, subTableDesc);
GATKReportTable table = report.getTable(subTableName);
table.addPrimaryKey("entry", false);
table.addColumn(subTableName, subTableName);
for ( VariantStratifier vs : stratificationObjects ) {
String columnName = vs.getClass().getSimpleName();
table.addColumn(columnName, "unknown");
}
for ( Object o : t.getColumnKeys() ) {
String c = (String) o;
table.addColumn(c, 0.0);
}
for (int row = 0; row < t.getRowKeys().length; row++) {
String r = (String) t.getRowKeys()[row];
@ -732,27 +780,19 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
table.set(newStateKey, c, t.getCell(row, col));
}
}
}
} catch (IllegalAccessException e) {
throw new StingException("IllegalAccessException: " + e);
}
}
} else {
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
for ( VariantStratifier vs : stratificationObjects ) {
String columnName = vs.getClass().getSimpleName();
for ( VariantStratifier vs : stratificationObjects ) {
String columnName = vs.getClass().getSimpleName();
table.set(stateKey.toString(), columnName, stateKey.get(vs.getClass().getSimpleName()));
}
table.set(stateKey.toString(), columnName, stateKey.get(vs.getClass().getSimpleName()));
}
for (Field field : datamap.keySet()) {
try {
field.setAccessible(true);
if ( !(field.get(ve) instanceof TableType) ) {
table.set(stateKey.toString(), field.getName(), field.get(ve));
}
} catch (IllegalAccessException e) {
throw new ReviewedStingException("Unable to access a data field in a VariantEval analysis module: " + e);
throw new StingException("IllegalAccessException: " + e);
}
}
}

View File

@ -246,7 +246,7 @@ public class GenotypeConcordance extends VariantEvaluator {
private boolean warnedAboutValidationData = false;
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, NewEvaluationContext group) {
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
String interesting = null;
// sanity check that we at least have either eval or validation data
@ -321,7 +321,10 @@ public class GenotypeConcordance extends VariantEvaluator {
sampleStats.incrValue(sample, truth, called);
if ( evalAC != null && validationAC != null) {
alleleCountStats.incrValue(evalAC,truth,called);
alleleCountStats.incrValue(validationAC,truth,called);
//System.err.println(evalAC + " " + validationAC);
//alleleCountStats.incrValue(validationAC,truth,called);
}
}
}
@ -461,12 +464,18 @@ class SampleStats implements TableType {
* @return a list of objects, in this case strings, that are the column names
*/
public Object[] getColumnKeys() {
return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call",
"n_ref/ref","n_ref/het","n_ref/hom",
"total_true_het","%_het/het","n_het/no-call",
"n_het/ref","n_het/het","n_het/hom",
"total_true_hom","%_hom/hom","n_hom/no-call",
"n_hom/ref","n_hom/het","n_hom/hom"};
// return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call",
// "n_ref/ref","n_ref/het","n_ref/hom",
// "total_true_het","%_het/het","n_het/no-call",
// "n_het/ref","n_het/het","n_het/hom",
// "total_true_hom","%_hom/hom","n_hom/no-call",
// "n_hom/ref","n_hom/het","n_hom/hom"};
return new String[]{"total_true_ref","pct_ref_vs_ref","n_ref_vs_no-call",
"n_ref_vs_ref","n_ref_vs_het","n_ref_vs_hom",
"total_true_het","pct_het_vs_het","n_het_vs_no-call",
"n_het_vs_ref","n_het_vs_het","n_het_vs_hom",
"total_true_hom","pct_hom_vs_hom","n_hom_vs_no-call",
"n_hom_vs_ref","n_hom_vs_het","n_hom_vs_hom"};
}

View File

@ -6,6 +6,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -60,6 +61,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
long KidHomVar_ParentHomRef;
TrioStructure trio;
double mendelianViolationQualThreshold;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
@ -82,25 +84,21 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
}
// todo: fix
public void initialize(NewVariantEvalWalker walker) {
trio = parseTrioDescription(walker.getFamilyStructure());
mendelianViolationQualThreshold = walker.getMendelianViolationQualThreshold();
}
//public MendelianViolationEvaluator(VariantEvalWalker parent) {
//super(parent);
//if (enabled()) {
//trio = parseTrioDescription(parent.FAMILY_STRUCTURE);
//parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s",
//parent.FAMILY_STRUCTURE, trio.mom, trio.dad, trio.child));
//}
//}
public boolean enabled() {
//return getVEWalker().FAMILY_STRUCTURE != null;
return false;
return true;
}
private double getQThreshold() {
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
return 0.0;
return mendelianViolationQualThreshold / 10; // we aren't 10x scaled in the GATK a la phred
//return 0.0;
}
public String getName() {

View File

@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.Degeneracy;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.Sample;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
@ -171,8 +172,19 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
@Override
public boolean stateIsApplicable(StateKey stateKey) {
String className = Sample.class.getSimpleName();
String sampleClassName = Sample.class.getSimpleName();
String degeneracyClassName = Degeneracy.class.getSimpleName();
return !(stateKey.containsKey(className) && !stateKey.get(className).equalsIgnoreCase("all"));
//return !(stateKey.containsKey(sampleClassName) && !stateKey.get(sampleClassName).equalsIgnoreCase("all"));
if (stateKey.containsKey(sampleClassName) && !stateKey.get(sampleClassName).equalsIgnoreCase("all")) {
return false;
}
if (stateKey.containsKey(degeneracyClassName) && !stateKey.get(degeneracyClassName).equalsIgnoreCase("all")) {
return false;
}
return true;
}
}

View File

@ -0,0 +1,98 @@
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
public class Degeneracy extends VariantStratifier {
private ArrayList<String> states;
private HashMap<String, String> degeneracies;
@Override
public void initialize(Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions, Set<String> compNames, Set<String> knownNames, Set<String> evalNames, Set<String> sampleNames) {
states = new ArrayList<String>();
states.add("1-fold");
states.add("2-fold");
states.add("3-fold");
states.add("4-fold");
states.add("6-fold");
states.add("all");
degeneracies = new HashMap<String, String>();
degeneracies.put("Ile", "3-fold");
degeneracies.put("Leu", "6-fold");
degeneracies.put("Val", "4-fold");
degeneracies.put("Phe", "2-fold");
degeneracies.put("Met", "1-fold");
degeneracies.put("Cys", "2-fold");
degeneracies.put("Ala", "4-fold");
degeneracies.put("Gly", "4-fold");
degeneracies.put("Pro", "4-fold");
degeneracies.put("Thr", "4-fold");
degeneracies.put("Ser", "6-fold");
degeneracies.put("Tyr", "2-fold");
degeneracies.put("Try", "1-fold");
degeneracies.put("Trp", "1-fold");
degeneracies.put("Gln", "2-fold");
degeneracies.put("Asn", "2-fold");
degeneracies.put("His", "2-fold");
degeneracies.put("Glu", "2-fold");
degeneracies.put("Asp", "2-fold");
degeneracies.put("Lys", "2-fold");
degeneracies.put("Arg", "6-fold");
degeneracies.put("Stop", "3-fold");
}
public ArrayList<String> getAllStates() {
return states;
}
public ArrayList<String> getRelevantStates(ReferenceContext ref, VariantContext comp, String compName, VariantContext eval, String sampleName) {
ArrayList<String> relevantStates = new ArrayList<String>();
relevantStates.add("all");
if (eval != null && eval.isVariant()) {
String type = null;
String aa = null;
if (eval.getAttributeAsString("refseq.functionalClass") != null) {
type = eval.getAttributeAsString("refseq.functionalClass");
aa = eval.getAttributeAsString("refseq.variantAA");
} else if (eval.getAttributeAsString("refseq.functionalClass_1") != null) {
int annotationId = 1;
String key;
do {
key = String.format("refseq.functionalClass_%d", annotationId);
String newtype = eval.getAttributeAsString(key);
if ( newtype != null &&
( type == null ||
( type.equals("silent") && !newtype.equals("silent") ) ||
( type.equals("missense") && newtype.equals("nonsense") ) )
) {
type = newtype;
String aakey = String.format("refseq.variantAA_%d", annotationId);
aa = eval.getAttributeAsString(aakey);
}
annotationId++;
} while (eval.getAttributeAsString(key) != null);
}
if (aa != null && degeneracies.containsKey(aa)) {
relevantStates.add(degeneracies.get(aa));
}
}
return relevantStates;
}
}

View File

@ -52,29 +52,31 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
public void apply(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantContext comp, VariantContext eval) {
for ( VariantEvaluator evaluation : evaluationInstances.values() ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
//evaluation.update0(tracker, ref, context);
synchronized ( evaluation ) {
// we always call update0 in case the evaluation tracks things like number of bases covered
//evaluation.update0(tracker, ref, context);
// the other updateN methods don't see a null context
if ( tracker == null )
continue;
// the other updateN methods don't see a null context
if ( tracker == null )
continue;
// now call the single or paired update function
switch ( evaluation.getComparisonOrder() ) {
case 1:
if (eval != null) {
evaluation.update1(eval, tracker, ref, context);
}
// now call the single or paired update function
switch ( evaluation.getComparisonOrder() ) {
case 1:
if (eval != null) {
evaluation.update1(eval, tracker, ref, context);
}
break;
case 2:
if (eval != null) {
evaluation.update2(eval, comp, tracker, ref, context);
}
break;
case 2:
if (eval != null) {
evaluation.update2(eval, comp, tracker, ref, context);
}
break;
default:
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
break;
default:
throw new ReviewedStingException("BUG: Unexpected evaluation order " + evaluation);
}
}
}
}