Cleanup in VariantEval. Example of molten VariantEval output

-- Moved a variety of useful formatting routines for ratios, percentages, etc, into VariantEvalator.java so everyone can share.  Code updated to use these routines where appropriate
-- Added variantWasSingleton() to VariantEvaluator, which can be used to determine if a site, even after subsetting to specific samples, was a singleton in the original full VCF
-- TableType, which used to be an interface, is now an abstract class, allowing us to implement some generally functionality and avoid duplication.
-- This included creating a getRowName() function that used to be hardcoded as "row" but how can be overridden.
-- #### This allows us implement molten tables, which are vastly easier to use than multi-row data sets.  See IndelHistogram class (in later commit) for example of molten VE output
This commit is contained in:
Mark DePristo 2012-03-22 21:14:44 -04:00
parent e4d49357ce
commit bcf80cc7b3
10 changed files with 75 additions and 67 deletions

View File

@ -93,6 +93,7 @@ import java.util.*;
*/
@Reference(window=@Window(start=-50, stop=50))
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
public static final String IS_SINGLETON_KEY = "ISSINGLETON";
@Output
protected PrintStream out;
@ -494,7 +495,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
if (field.get(ve) instanceof TableType) {
TableType t = (TableType) field.get(ve);
String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
final DataPoint dataPointAnn = datamap.get(field);
GATKReportTable table;
@ -509,17 +510,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
table.addColumn(vs.getName(), "unknown");
}
table.addColumn("row", "unknown");
for ( Object o : t.getColumnKeys() ) {
String c;
if (o instanceof String) {
c = (String) o;
} else {
c = o.toString();
}
table.addColumn(t.getRowName(), "unknown");
for ( final Object o : t.getColumnKeys() ) {
final String c = o.toString();
table.addColumn(c, 0.0);
}
} else {
@ -527,7 +521,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
for (int row = 0; row < t.getRowKeys().length; row++) {
String r = (String) t.getRowKeys()[row];
final String r = t.getRowKeys()[row].toString();
for ( VariantStratifier vs : stratificationObjects ) {
final String columnName = vs.getName();
@ -535,17 +529,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
}
for (int col = 0; col < t.getColumnKeys().length; col++) {
String c;
if (t.getColumnKeys()[col] instanceof String) {
c = (String) t.getColumnKeys()[col];
} else {
c = t.getColumnKeys()[col].toString();
}
String newStateKey = stateKey.toString() + r;
final String c = t.getColumnKeys()[col].toString();
final String newStateKey = stateKey.toString() + r;
table.set(newStateKey, c, t.getCell(row, col));
table.set(newStateKey, "row", r);
table.set(newStateKey, t.getRowName(), r);
}
}
} else {

View File

@ -30,7 +30,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
@DataPoint(description = "Number of variants per base", format = "%.8f")
public double variantRatePerBp = 0;
@DataPoint(description = "Number of snp loci", format = "%d")
public long nSNPs = 0;
@DataPoint(description = "Number of mnp loci", format = "%d")
@ -47,7 +46,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)", format = "%d")
public long nMixed = 0;
@DataPoint(description = "Number of no calls loci", format = "%d")
public long nNoCalls = 0;
@DataPoint(description = "Number of het loci", format = "%d")
@ -72,8 +70,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
public double indelRate = 0;
@DataPoint(description = "indel rate per base pair", format = "%.2f")
public double indelRatePerBp = 0;
@DataPoint(description = "deletion to insertion ratio", format = "%.2f")
public double deletionInsertionRatio = 0;
@DataPoint(description = "insertion to deletion ratio", format = "%.2f")
public double insertionDeletionRatio = 0;
private double perLocusRate(long n) {
return rate(n, nProcessedLoci);
@ -113,12 +111,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
case SNP:
nVariantLoci++;
nSNPs++;
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
if (variantWasSingleton(vc1)) nSingletons++;
break;
case MNP:
nVariantLoci++;
nMNPs++;
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
if (variantWasSingleton(vc1)) nSingletons++;
break;
case INDEL:
nVariantLoci++;
@ -201,6 +199,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
hetHomRatio = ratio(nHets, nHomVar);
indelRate = perLocusRate(nDeletions + nInsertions + nComplex);
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex);
deletionInsertionRatio = ratio(nDeletions, nInsertions);
insertionDeletionRatio = ratio(nInsertions, nDeletions);
}
}

View File

@ -59,7 +59,7 @@ public class GenotypeConcordance extends VariantEvaluator {
private boolean discordantInteresting = false;
static class FrequencyStats implements TableType {
static class FrequencyStats extends TableType {
class Stats {
public Stats(int found, int missed) { nFound = found; nMissed = missed; }
public long nFound = 0;
@ -103,7 +103,7 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
static class QualityScoreHistograms implements TableType {
static class QualityScoreHistograms extends TableType {
final static int NUM_BINS = 20;
final HashMap<Integer,Integer> truePositiveQualityScoreMap = new HashMap<Integer,Integer>(); // A HashMap holds all the quality scores until we are able to bin them appropriately
final HashMap<Integer,Integer> falsePositiveQualityScoreMap = new HashMap<Integer,Integer>();
@ -362,7 +362,7 @@ public class GenotypeConcordance extends VariantEvaluator {
/**
* a table of sample names to genotype concordance figures
*/
class SampleStats implements TableType {
class SampleStats extends TableType {
private final int nGenotypeTypes;
// sample to concordance stats object
@ -448,7 +448,7 @@ class SampleStats implements TableType {
/**
* a table of sample names to genotype concordance summary statistics
*/
class SampleSummaryStats implements TableType {
class SampleSummaryStats extends TableType {
protected final static String ALL_SAMPLES_KEY = "allSamples";
protected final static String[] COLUMN_KEYS = new String[]{
"percent_comp_ref_called_ref",

View File

@ -376,7 +376,7 @@ class PhaseStats {
/**
* a table of sample names to genotype phasing statistics
*/
class SamplePhasingStatistics implements TableType {
class SamplePhasingStatistics extends TableType {
private HashMap<String, PhaseStats> sampleStats = null;
private double minPhaseQuality;

View File

@ -87,13 +87,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
public String indelNoveltyRate = "NA";
public void initialize(VariantEvalWalker walker) {}
@Override public boolean enabled() { return true; }
public int getComparisonOrder() {
return 2;
}
@Override public int getComparisonOrder() { return 2; }
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
@ -156,12 +151,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
// TODO -- implement me
}
private final String noveltyRate(final int all, final int known) {
final int novel = all - known;
final double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
}
public void finalizeEvaluation() {
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
@ -170,7 +159,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
TiTvRatio = (double)nTi / (double)nTv;
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs);
indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs);
}
}

View File

@ -49,4 +49,46 @@ public abstract class VariantEvaluator {
return true;
}
/**
* Returns true if the variant in vc was a singleton in the original input evaluation
* set, regardless of variant context subsetting that has occurred.
* @param eval
* @return true if eval was originally a singleton site
*/
protected static final boolean variantWasSingleton(final VariantContext eval) {
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
}
/**
* Convenience function that formats the novelty rate as a %.2f string
*
* @param known number of variants from all that are known
* @param all number of all variants
* @return a String novelty rate, or NA if all == 0
*/
protected static final String formattedNoveltyRate(final int known, final int all) {
return formattedPercent(all - known, all);
}
/**
* Convenience function that formats the novelty rate as a %.2f string
*
* @param x number of objects part of total that meet some criteria
* @param total count of all objects, including x
* @return a String percent rate, or NA if total == 0
*/
protected static final String formattedPercent(final int x, final int total) {
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
}
/**
* Convenience function that formats a ratio as a %.2f string
*
* @param num number of observations in the numerator
* @param denom number of observations in the denumerator
* @return a String formatted ratio, or NA if all == 0
*/
protected static final String formattedRatio(final int num, final int denom) {
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
}
}

View File

@ -54,7 +54,7 @@ public class VariantQualityScore extends VariantEvaluator {
@DataPoint(description = "average variant quality for each allele count")
AlleleCountStats alleleCountStats = null;
static class TiTvStats implements TableType {
static class TiTvStats extends TableType {
final static int NUM_BINS = 20;
final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
final long transitionByQuality[] = new long[NUM_BINS];
@ -73,10 +73,6 @@ public class VariantQualityScore extends VariantEvaluator {
return columnKeys;
}
public String getName() {
return "TiTvStats";
}
public String getCell(int x, int y) {
return String.valueOf(titvByQuality[y]);
}
@ -143,7 +139,7 @@ public class VariantQualityScore extends VariantEvaluator {
}
}
class AlleleCountStats implements TableType {
class AlleleCountStats extends TableType {
final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
@ -163,10 +159,6 @@ public class VariantQualityScore extends VariantEvaluator {
return new String[]{"alleleCount","avgQual"};
}
public String getName() {
return "AlleleCountStats";
}
public String getCell(int x, int y) {
int iii = 0;
for( final Integer key : qualityListMap.keySet() ) {

View File

@ -255,9 +255,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
private final String noveltyRate(Type type) {
final int all = allVariantCounts.all(type);
final int known = knownVariantCounts.all(type);
final int novel = all - known;
final double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
return formattedNoveltyRate(known, all);
}
public void finalizeEvaluation() {

View File

@ -9,9 +9,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
*
* an interface for turning arbritary objects into tables
*/
public interface TableType {
public Object[] getRowKeys();
public Object[] getColumnKeys();
public Object getCell(int x, int y);
public String getName();
public abstract class TableType {
public abstract Object[] getRowKeys();
public abstract Object[] getColumnKeys();
public abstract Object getCell(int x, int y);
public String getName() { return getClass().getSimpleName(); }
public String getRowName() { return "row"; }
}

View File

@ -310,7 +310,7 @@ public class VariantEvalUtils {
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
builder.attribute("ISSINGLETON", true);
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
}
VariantContextUtils.calculateChromosomeCounts(builder, true);