Cleanup in VariantEval. Example of molten VariantEval output
-- Moved a variety of useful formatting routines for ratios, percentages, etc, into VariantEvalator.java so everyone can share. Code updated to use these routines where appropriate -- Added variantWasSingleton() to VariantEvaluator, which can be used to determine if a site, even after subsetting to specific samples, was a singleton in the original full VCF -- TableType, which used to be an interface, is now an abstract class, allowing us to implement some generally functionality and avoid duplication. -- This included creating a getRowName() function that used to be hardcoded as "row" but how can be overridden. -- #### This allows us implement molten tables, which are vastly easier to use than multi-row data sets. See IndelHistogram class (in later commit) for example of molten VE output
This commit is contained in:
parent
e4d49357ce
commit
bcf80cc7b3
|
|
@ -93,6 +93,7 @@ import java.util.*;
|
|||
*/
|
||||
@Reference(window=@Window(start=-50, stop=50))
|
||||
public class VariantEvalWalker extends RodWalker<Integer, Integer> implements TreeReducible<Integer> {
|
||||
public static final String IS_SINGLETON_KEY = "ISSINGLETON";
|
||||
|
||||
@Output
|
||||
protected PrintStream out;
|
||||
|
|
@ -494,7 +495,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
if (field.get(ve) instanceof TableType) {
|
||||
TableType t = (TableType) field.get(ve);
|
||||
|
||||
String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
|
||||
final String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
|
||||
final DataPoint dataPointAnn = datamap.get(field);
|
||||
|
||||
GATKReportTable table;
|
||||
|
|
@ -509,17 +510,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
table.addColumn(vs.getName(), "unknown");
|
||||
}
|
||||
|
||||
table.addColumn("row", "unknown");
|
||||
|
||||
for ( Object o : t.getColumnKeys() ) {
|
||||
String c;
|
||||
|
||||
if (o instanceof String) {
|
||||
c = (String) o;
|
||||
} else {
|
||||
c = o.toString();
|
||||
}
|
||||
table.addColumn(t.getRowName(), "unknown");
|
||||
|
||||
for ( final Object o : t.getColumnKeys() ) {
|
||||
final String c = o.toString();
|
||||
table.addColumn(c, 0.0);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -527,7 +521,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
for (int row = 0; row < t.getRowKeys().length; row++) {
|
||||
String r = (String) t.getRowKeys()[row];
|
||||
final String r = t.getRowKeys()[row].toString();
|
||||
|
||||
for ( VariantStratifier vs : stratificationObjects ) {
|
||||
final String columnName = vs.getName();
|
||||
|
|
@ -535,17 +529,10 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
}
|
||||
|
||||
for (int col = 0; col < t.getColumnKeys().length; col++) {
|
||||
String c;
|
||||
if (t.getColumnKeys()[col] instanceof String) {
|
||||
c = (String) t.getColumnKeys()[col];
|
||||
} else {
|
||||
c = t.getColumnKeys()[col].toString();
|
||||
}
|
||||
|
||||
String newStateKey = stateKey.toString() + r;
|
||||
final String c = t.getColumnKeys()[col].toString();
|
||||
final String newStateKey = stateKey.toString() + r;
|
||||
table.set(newStateKey, c, t.getCell(row, col));
|
||||
|
||||
table.set(newStateKey, "row", r);
|
||||
table.set(newStateKey, t.getRowName(), r);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
@DataPoint(description = "Number of variants per base", format = "%.8f")
|
||||
public double variantRatePerBp = 0;
|
||||
|
||||
|
||||
@DataPoint(description = "Number of snp loci", format = "%d")
|
||||
public long nSNPs = 0;
|
||||
@DataPoint(description = "Number of mnp loci", format = "%d")
|
||||
|
|
@ -47,7 +46,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
@DataPoint(description = "Number of mixed loci (loci that can't be classified as a SNP, Indel or MNP)", format = "%d")
|
||||
public long nMixed = 0;
|
||||
|
||||
|
||||
@DataPoint(description = "Number of no calls loci", format = "%d")
|
||||
public long nNoCalls = 0;
|
||||
@DataPoint(description = "Number of het loci", format = "%d")
|
||||
|
|
@ -72,8 +70,8 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
public double indelRate = 0;
|
||||
@DataPoint(description = "indel rate per base pair", format = "%.2f")
|
||||
public double indelRatePerBp = 0;
|
||||
@DataPoint(description = "deletion to insertion ratio", format = "%.2f")
|
||||
public double deletionInsertionRatio = 0;
|
||||
@DataPoint(description = "insertion to deletion ratio", format = "%.2f")
|
||||
public double insertionDeletionRatio = 0;
|
||||
|
||||
private double perLocusRate(long n) {
|
||||
return rate(n, nProcessedLoci);
|
||||
|
|
@ -113,12 +111,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
case SNP:
|
||||
nVariantLoci++;
|
||||
nSNPs++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||
if (variantWasSingleton(vc1)) nSingletons++;
|
||||
break;
|
||||
case MNP:
|
||||
nVariantLoci++;
|
||||
nMNPs++;
|
||||
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||
if (variantWasSingleton(vc1)) nSingletons++;
|
||||
break;
|
||||
case INDEL:
|
||||
nVariantLoci++;
|
||||
|
|
@ -201,6 +199,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
hetHomRatio = ratio(nHets, nHomVar);
|
||||
indelRate = perLocusRate(nDeletions + nInsertions + nComplex);
|
||||
indelRatePerBp = perLocusRInverseRate(nDeletions + nInsertions + nComplex);
|
||||
deletionInsertionRatio = ratio(nDeletions, nInsertions);
|
||||
insertionDeletionRatio = ratio(nInsertions, nDeletions);
|
||||
}
|
||||
}
|
||||
|
|
@ -59,7 +59,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
private boolean discordantInteresting = false;
|
||||
|
||||
static class FrequencyStats implements TableType {
|
||||
static class FrequencyStats extends TableType {
|
||||
class Stats {
|
||||
public Stats(int found, int missed) { nFound = found; nMissed = missed; }
|
||||
public long nFound = 0;
|
||||
|
|
@ -103,7 +103,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
static class QualityScoreHistograms implements TableType {
|
||||
static class QualityScoreHistograms extends TableType {
|
||||
final static int NUM_BINS = 20;
|
||||
final HashMap<Integer,Integer> truePositiveQualityScoreMap = new HashMap<Integer,Integer>(); // A HashMap holds all the quality scores until we are able to bin them appropriately
|
||||
final HashMap<Integer,Integer> falsePositiveQualityScoreMap = new HashMap<Integer,Integer>();
|
||||
|
|
@ -362,7 +362,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
/**
|
||||
* a table of sample names to genotype concordance figures
|
||||
*/
|
||||
class SampleStats implements TableType {
|
||||
class SampleStats extends TableType {
|
||||
private final int nGenotypeTypes;
|
||||
|
||||
// sample to concordance stats object
|
||||
|
|
@ -448,7 +448,7 @@ class SampleStats implements TableType {
|
|||
/**
|
||||
* a table of sample names to genotype concordance summary statistics
|
||||
*/
|
||||
class SampleSummaryStats implements TableType {
|
||||
class SampleSummaryStats extends TableType {
|
||||
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||
protected final static String[] COLUMN_KEYS = new String[]{
|
||||
"percent_comp_ref_called_ref",
|
||||
|
|
|
|||
|
|
@ -376,7 +376,7 @@ class PhaseStats {
|
|||
/**
|
||||
* a table of sample names to genotype phasing statistics
|
||||
*/
|
||||
class SamplePhasingStatistics implements TableType {
|
||||
class SamplePhasingStatistics extends TableType {
|
||||
private HashMap<String, PhaseStats> sampleStats = null;
|
||||
private double minPhaseQuality;
|
||||
|
||||
|
|
|
|||
|
|
@ -87,13 +87,8 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
public String indelNoveltyRate = "NA";
|
||||
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {}
|
||||
|
||||
@Override public boolean enabled() { return true; }
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2;
|
||||
}
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
|
||||
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
|
||||
|
|
@ -156,12 +151,6 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
// TODO -- implement me
|
||||
}
|
||||
|
||||
private final String noveltyRate(final int all, final int known) {
|
||||
final int novel = all - known;
|
||||
final double rate = (novel / (1.0 * all));
|
||||
return all == 0 ? "NA" : String.format("%.2f", rate);
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
processedMultiSnpRatio = (double)nMultiSNPs / (double)nProcessedLoci;
|
||||
variantMultiSnpRatio = (double)nMultiSNPs / (double)nSNPs;
|
||||
|
|
@ -170,7 +159,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
|
||||
TiTvRatio = (double)nTi / (double)nTv;
|
||||
|
||||
SNPNoveltyRate = noveltyRate(nMultiSNPs, knownSNPsPartial + knownSNPsComplete);
|
||||
indelNoveltyRate = noveltyRate(nMultiSNPs, knownIndelsPartial + knownIndelsComplete);
|
||||
SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs);
|
||||
indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,4 +49,46 @@ public abstract class VariantEvaluator {
|
|||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the variant in vc was a singleton in the original input evaluation
|
||||
* set, regardless of variant context subsetting that has occurred.
|
||||
* @param eval
|
||||
* @return true if eval was originally a singleton site
|
||||
*/
|
||||
protected static final boolean variantWasSingleton(final VariantContext eval) {
|
||||
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats the novelty rate as a %.2f string
|
||||
*
|
||||
* @param known number of variants from all that are known
|
||||
* @param all number of all variants
|
||||
* @return a String novelty rate, or NA if all == 0
|
||||
*/
|
||||
protected static final String formattedNoveltyRate(final int known, final int all) {
|
||||
return formattedPercent(all - known, all);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats the novelty rate as a %.2f string
|
||||
*
|
||||
* @param x number of objects part of total that meet some criteria
|
||||
* @param total count of all objects, including x
|
||||
* @return a String percent rate, or NA if total == 0
|
||||
*/
|
||||
protected static final String formattedPercent(final int x, final int total) {
|
||||
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats a ratio as a %.2f string
|
||||
*
|
||||
* @param num number of observations in the numerator
|
||||
* @param denom number of observations in the denumerator
|
||||
* @return a String formatted ratio, or NA if all == 0
|
||||
*/
|
||||
protected static final String formattedRatio(final int num, final int denom) {
|
||||
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
@DataPoint(description = "average variant quality for each allele count")
|
||||
AlleleCountStats alleleCountStats = null;
|
||||
|
||||
static class TiTvStats implements TableType {
|
||||
static class TiTvStats extends TableType {
|
||||
final static int NUM_BINS = 20;
|
||||
final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
||||
final long transitionByQuality[] = new long[NUM_BINS];
|
||||
|
|
@ -73,10 +73,6 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
return columnKeys;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "TiTvStats";
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
return String.valueOf(titvByQuality[y]);
|
||||
}
|
||||
|
|
@ -143,7 +139,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
class AlleleCountStats implements TableType {
|
||||
class AlleleCountStats extends TableType {
|
||||
final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
||||
final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
||||
|
||||
|
|
@ -163,10 +159,6 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
return new String[]{"alleleCount","avgQual"};
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "AlleleCountStats";
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
int iii = 0;
|
||||
for( final Integer key : qualityListMap.keySet() ) {
|
||||
|
|
|
|||
|
|
@ -255,9 +255,7 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
private final String noveltyRate(Type type) {
|
||||
final int all = allVariantCounts.all(type);
|
||||
final int known = knownVariantCounts.all(type);
|
||||
final int novel = all - known;
|
||||
final double rate = (novel / (1.0 * all));
|
||||
return all == 0 ? "NA" : String.format("%.2f", rate);
|
||||
return formattedNoveltyRate(known, all);
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
|
|
|
|||
|
|
@ -9,9 +9,11 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
|||
*
|
||||
* an interface for turning arbritary objects into tables
|
||||
*/
|
||||
public interface TableType {
|
||||
public Object[] getRowKeys();
|
||||
public Object[] getColumnKeys();
|
||||
public Object getCell(int x, int y);
|
||||
public String getName();
|
||||
public abstract class TableType {
|
||||
public abstract Object[] getRowKeys();
|
||||
public abstract Object[] getColumnKeys();
|
||||
public abstract Object getCell(int x, int y);
|
||||
public String getName() { return getClass().getSimpleName(); }
|
||||
public String getRowName() { return "row"; }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -310,7 +310,7 @@ public class VariantEvalUtils {
|
|||
final int newAlleleCount = vcsub.getHetCount() + 2 * vcsub.getHomVarCount();
|
||||
|
||||
if (originalAlleleCount == newAlleleCount && newAlleleCount == 1) {
|
||||
builder.attribute("ISSINGLETON", true);
|
||||
builder.attribute(VariantEvalWalker.IS_SINGLETON_KEY, true);
|
||||
}
|
||||
|
||||
VariantContextUtils.calculateChromosomeCounts(builder, true);
|
||||
|
|
|
|||
Loading…
Reference in New Issue