VariantEval updates

-- Performance optimizations
-- Tables now are cleanly formatted (floats are %.2f printed)
-- VariantSummary is a standard report now
-- Removed CompEvalGenotypes (it didn't do anything)
-- Deleted unused classes in GenotypeConcordance
-- Updates integration tests as appropriate
This commit is contained in:
Mark DePristo 2011-11-23 13:02:07 -05:00
parent 5a4856b82e
commit 4107636144
12 changed files with 281 additions and 358 deletions

View File

@ -312,16 +312,17 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
String aastr = (ancestralAlignments == null) ? null : new String(ancestralAlignments.getSubsequenceAt(ref.getLocus().getContig(), ref.getLocus().getStart(), ref.getLocus().getStop()).getBases());
// --------- track --------- sample - VariantContexts -
HashMap<RodBinding<VariantContext>, HashMap<String, Set<VariantContext>>> evalVCs = variantEvalUtils.bindVariantContexts(tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
HashMap<RodBinding<VariantContext>, HashMap<String, Set<VariantContext>>> compVCs = variantEvalUtils.bindVariantContexts(tracker, ref, comps, byFilterIsEnabled, false, false, false);
HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> evalVCs = variantEvalUtils.bindVariantContexts(tracker, ref, evals, byFilterIsEnabled, true, perSampleIsEnabled, mergeEvals);
HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> compVCs = variantEvalUtils.bindVariantContexts(tracker, ref, comps, byFilterIsEnabled, false, false, false);
// for each eval track
for ( final RodBinding<VariantContext> evalRod : evals ) {
final HashMap<String, Set<VariantContext>> evalSet = evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : new HashMap<String, Set<VariantContext>>(0);
final Map<String, Collection<VariantContext>> emptyEvalMap = Collections.emptyMap();
final Map<String, Collection<VariantContext>> evalSet = evalVCs.containsKey(evalRod) ? evalVCs.get(evalRod) : emptyEvalMap;
// for each sample stratifier
for ( final String sampleName : sampleNamesForStratification ) {
Set<VariantContext> evalSetBySample = evalSet.get(sampleName);
Collection<VariantContext> evalSetBySample = evalSet.get(sampleName);
if ( evalSetBySample == null ) {
evalSetBySample = new HashSet<VariantContext>(1);
evalSetBySample.add(null);
@ -337,8 +338,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
// for each comp track
for ( final RodBinding<VariantContext> compRod : comps ) {
// no sample stratification for comps
final HashMap<String, Set<VariantContext>> compSetHash = compVCs.get(compRod);
final Set<VariantContext> compSet = (compSetHash == null || compSetHash.size() == 0) ? new HashSet<VariantContext>(0) : compVCs.get(compRod).values().iterator().next();
final HashMap<String, Collection<VariantContext>> compSetHash = compVCs.get(compRod);
final Collection<VariantContext> compSet = (compSetHash == null || compSetHash.size() == 0) ? Collections.<VariantContext>emptyList() : compVCs.get(compRod).values().iterator().next();
// find the comp
final VariantContext comp = findMatchingComp(eval, compSet);
@ -382,7 +383,7 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
return null;
}
private VariantContext findMatchingComp(final VariantContext eval, final Set<VariantContext> comps) {
private VariantContext findMatchingComp(final VariantContext eval, final Collection<VariantContext> comps) {
// if no comps, return null
if ( comps == null || comps.isEmpty() )
return null;
@ -447,11 +448,11 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
TableType t = (TableType) field.get(ve);
String subTableName = ve.getClass().getSimpleName() + "." + field.getName();
String subTableDesc = datamap.get(field).description();
final DataPoint dataPointAnn = datamap.get(field);
GATKReportTable table;
if (!report.hasTable(subTableName)) {
report.addTable(subTableName, subTableDesc);
report.addTable(subTableName, dataPointAnn.description());
table = report.getTable(subTableName);
table.addPrimaryKey("entry", false);

View File

@ -1,35 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
class NewCompEvalGenotypes {
private GenomeLoc loc;
private Genotype compGt;
private Genotype evalGt;
public NewCompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
this.loc = loc;
this.compGt = compGt;
this.evalGt = evalGt;
}
public GenomeLoc getLocus() {
return loc;
}
public Genotype getCompGenotpye() {
return compGt;
}
public Genotype getEvalGenotype() {
return evalGt;
}
public void setCompGenotype(Genotype compGt) {
this.compGt = compGt;
}
public void setEvalGenotype(Genotype evalGt) {
this.evalGt = evalGt;
}
}

View File

@ -28,13 +28,13 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
@DataPoint(description = "number of eval sites at comp sites")
long nVariantsAtComp = 0;
@DataPoint(description = "percentage of eval sites at comp sites")
@DataPoint(description = "percentage of eval sites at comp sites", format = "%.2f" )
double compRate = 0.0;
@DataPoint(description = "number of concordant sites")
long nConcordant = 0;
@DataPoint(description = "the concordance rate")
@DataPoint(description = "the concordance rate", format = "%.2f")
double concordantRate = 0.0;
public int getComparisonOrder() {

View File

@ -62,17 +62,17 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
public long nHomDerived = 0;
// calculations that get set in the finalizeEvaluation method
@DataPoint(description = "heterozygosity per locus rate")
@DataPoint(description = "heterozygosity per locus rate", format = "%.2e")
public double heterozygosity = 0;
@DataPoint(description = "heterozygosity per base pair")
@DataPoint(description = "heterozygosity per base pair", format = "%.2f")
public double heterozygosityPerBp = 0;
@DataPoint(description = "heterozygosity to homozygosity ratio")
@DataPoint(description = "heterozygosity to homozygosity ratio", format = "%.2f")
public double hetHomRatio = 0;
@DataPoint(description = "indel rate (insertion count + deletion count)")
@DataPoint(description = "indel rate (insertion count + deletion count)", format = "%.2e")
public double indelRate = 0;
@DataPoint(description = "indel rate per base pair")
@DataPoint(description = "indel rate per base pair", format = "%.2f")
public double indelRatePerBp = 0;
@DataPoint(description = "deletion to insertion ratio")
@DataPoint(description = "deletion to insertion ratio", format = "%.2f")
public double deletionInsertionRatio = 0;
private double perLocusRate(long n) {

View File

@ -1,159 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
@Analysis(description = "Build 1000 Genome Phase I paper summary of variants table")
public class G1KPhaseITable extends VariantEvaluator {
// basic counts on various rates found
@DataPoint(description = "Number of samples")
public long nSamples = 0;
@DataPoint(description = "Number of processed loci")
public long nProcessedLoci = 0;
@DataPoint(description = "Number of SNPs")
public long nSNPs = 0;
@DataPoint(description = "SNP Novelty Rate")
public String SNPNoveltyRate = "NA";
@DataPoint(description = "Mean number of SNPs per individual")
public long nSNPsPerSample = 0;
@DataPoint(description = "Number of Indels")
public long nIndels = 0;
@DataPoint(description = "Indel Novelty Rate")
public String IndelNoveltyRate = "NA";
@DataPoint(description = "Mean number of Indels per individual")
public long nIndelsPerSample = 0;
@DataPoint(description = "Number of SVs")
public long nSVs = 0;
@DataPoint(description = "SV Novelty Rate")
public String SVNoveltyRate = "NA";
@DataPoint(description = "Mean number of SVs per individual")
public long nSVsPerSample = 0;
Map<VariantContext.Type, Integer> allVariantCounts, knownVariantCounts;
Map<String, Map<VariantContext.Type, Integer>> countsPerSample;
private final Map<VariantContext.Type, Integer> makeCounts() {
Map<VariantContext.Type, Integer> counts = new EnumMap<VariantContext.Type, Integer>(VariantContext.Type.class);
counts.put(VariantContext.Type.SNP, 0);
counts.put(VariantContext.Type.INDEL, 0);
counts.put(VariantContext.Type.SYMBOLIC, 0);
return counts;
}
public void initialize(VariantEvalWalker walker) {
countsPerSample = new HashMap<String, Map<VariantContext.Type, Integer>>();
nSamples = walker.getSampleNamesForEvaluation().size();
for ( String sample : walker.getSampleNamesForEvaluation() ) {
countsPerSample.put(sample, makeCounts());
}
allVariantCounts = makeCounts();
knownVariantCounts = makeCounts();
}
@Override public boolean enabled() { return true; }
public int getComparisonOrder() {
return 2; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
switch (eval.getType()) {
case SNP:
case INDEL:
case SYMBOLIC:
allVariantCounts.put(eval.getType(), allVariantCounts.get(eval.getType()) + 1);
if ( comp != null )
knownVariantCounts.put(eval.getType(), knownVariantCounts.get(eval.getType()) + 1);
break;
default:
throw new UserException.BadInput("Unexpected variant context type: " + eval);
}
// count variants per sample
for (final Genotype g : eval.getGenotypes()) {
if ( ! g.isNoCall() && ! g.isHomRef() ) {
int count = countsPerSample.get(g.getSampleName()).get(eval.getType());
countsPerSample.get(g.getSampleName()).put(eval.getType(), count + 1);
}
}
return null; // we don't capture any interesting sites
}
private final int perSampleMean(VariantContext.Type type) {
long sum = 0;
for ( Map<VariantContext.Type, Integer> count : countsPerSample.values() ) {
sum += count.get(type);
}
return (int)(Math.round(sum / (1.0 * countsPerSample.size())));
}
private final String noveltyRate(VariantContext.Type type) {
int all = allVariantCounts.get(type);
int known = knownVariantCounts.get(type);
int novel = all - known;
double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
}
public void finalizeEvaluation() {
nSNPs = allVariantCounts.get(VariantContext.Type.SNP);
nIndels = allVariantCounts.get(VariantContext.Type.INDEL);
nSVs = allVariantCounts.get(VariantContext.Type.SYMBOLIC);
nSNPsPerSample = perSampleMean(VariantContext.Type.SNP);
nIndelsPerSample = perSampleMean(VariantContext.Type.INDEL);
nSVsPerSample = perSampleMean(VariantContext.Type.SYMBOLIC);
SNPNoveltyRate = noveltyRate(VariantContext.Type.SNP);
IndelNoveltyRate = noveltyRate(VariantContext.Type.INDEL);
SVNoveltyRate = noveltyRate(VariantContext.Type.SYMBOLIC);
}
}

View File

@ -445,39 +445,6 @@ class SampleStats implements TableType {
}
}
/**
* Sample stats, but for AC
*/
class ACStats extends SampleStats {
private String[] rowKeys;
public ACStats(VariantContext evalvc, VariantContext compvc, int nGenotypeTypes) {
super(nGenotypeTypes);
rowKeys = new String[1+2*evalvc.getGenotypes().size()+1+2*compvc.getGenotypes().size()];
for ( int i = 0; i <= 2*evalvc.getGenotypes().size(); i++ ) { // todo -- assuming ploidy 2 here...
concordanceStats.put(String.format("evalAC%d",i),new long[nGenotypeTypes][nGenotypeTypes]);
rowKeys[i] = String.format("evalAC%d",i);
}
for ( int i = 0; i <= 2*compvc.getGenotypes().size(); i++ ) {
concordanceStats.put(String.format("compAC%d",i), new long[nGenotypeTypes][nGenotypeTypes]);
rowKeys[1+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
}
}
public String getName() {
return "Allele Count Statistics";
}
public Object[] getRowKeys() {
if ( rowKeys == null ) {
throw new StingException("RowKeys is null!");
}
return rowKeys;
}
}
/**
* a table of sample names to genotype concordance summary statistics
*/
@ -637,79 +604,3 @@ class SampleSummaryStats implements TableType {
}
}
/**
* SampleSummaryStats .. but for allele counts
*/
class ACSummaryStats extends SampleSummaryStats {
private String[] rowKeys;
public ACSummaryStats (final VariantContext evalvc, final VariantContext compvc) {
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
rowKeys = new String[3+2*evalvc.getGenotypes().size() + 2*compvc.getGenotypes().size()];
rowKeys[0] = ALL_SAMPLES_KEY;
for( int i = 0; i <= 2*evalvc.getGenotypes().size() ; i ++ ) {
concordanceSummary.put(String.format("evalAC%d",i), new double[COLUMN_KEYS.length]);
rowKeys[i+1] = String.format("evalAC%d",i);
}
for( int i = 0; i <= 2*compvc.getGenotypes().size() ; i ++ ) {
concordanceSummary.put(String.format("compAC%d",i), new double[COLUMN_KEYS.length]);
rowKeys[2+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
}
}
public String getName() {
return "Allele Count Summary Statistics";
}
public Object[] getRowKeys() {
if ( rowKeys == null) {
throw new StingException("rowKeys is null!!");
}
return rowKeys;
}
}
class CompACNames implements Comparator{
final Logger myLogger;
private boolean info = true;
public CompACNames(Logger l) {
myLogger = l;
}
public boolean equals(Object o) {
return ( o.getClass() == CompACNames.class );
}
public int compare(Object o1, Object o2) {
if ( info ) {
myLogger.info("Sorting AC names");
info = false;
}
//System.out.printf("Objects %s %s get ranks %d %d%n",o1.toString(),o2.toString(),getRank(o1),getRank(o2));
return getRank(o1) - getRank(o2);
}
public int getRank(Object o) {
if ( o.getClass() != String.class ) {
return Integer.MIN_VALUE/4;
} else {
String s = (String) o;
if ( s.startsWith("eval") ) {
return Integer.MIN_VALUE/4 + 1 + parseAC(s);
} else if ( s.startsWith("comp") ) {
return 1+ parseAC(s);
} else {
return Integer.MIN_VALUE/4;
}
}
}
public int parseAC(String s) {
String[] g = s.split("AC");
return Integer.parseInt(g[1]);
}
}

View File

@ -16,19 +16,19 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
long nTi = 0;
@DataPoint(description = "number of transversion loci")
long nTv = 0;
@DataPoint(description = "the transition to transversion ratio")
@DataPoint(description = "the transition to transversion ratio", format = "%.2f")
double tiTvRatio = 0.0;
@DataPoint(description = "number of comp transition sites")
long nTiInComp = 0;
@DataPoint(description = "number of comp transversion sites")
long nTvInComp = 0;
@DataPoint(description = "the transition to transversion ratio for comp sites")
@DataPoint(description = "the transition to transversion ratio for comp sites", format = "%.2f")
double TiTvRatioStandard = 0.0;
@DataPoint(description = "number of derived transition loci")
long nTiDerived = 0;
@DataPoint(description = "number of derived transversion loci")
long nTvDerived = 0;
@DataPoint(description = "the derived transition to transversion ratio")
@DataPoint(description = "the derived transition to transversion ratio", format = "%.2f")
double tiTvDerivedRatio = 0.0;
public boolean enabled() {

View File

@ -30,10 +30,10 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
@DataPoint(description = "FN") int FN = 0;
@DataPoint(description = "TN") int TN = 0;
@DataPoint(description = "Sensitivity") double sensitivity = 0;
@DataPoint(description = "Specificity") double specificity = 0;
@DataPoint(description = "PPV") double PPV = 0;
@DataPoint(description = "FDR") double FDR = 0;
@DataPoint(description = "Sensitivity", format = "%.2f") double sensitivity = 0;
@DataPoint(description = "Specificity", format = "%.2f") double specificity = 0;
@DataPoint(description = "PPV", format = "%.2f") double PPV = 0;
@DataPoint(description = "FDR", format = "%.2f") double FDR = 0;
@DataPoint(description = "CompMonoEvalNoCall") int CompMonoEvalNoCall = 0;
@DataPoint(description = "CompMonoEvalFiltered") int CompMonoEvalFiltered = 0;

View File

@ -0,0 +1,223 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.Collection;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
@Analysis(description = "1000 Genomes Phase I summary of variants table")
public class VariantSummary extends VariantEvaluator implements StandardEval {
// basic counts on various rates found
@DataPoint(description = "Number of samples")
public long nSamples = 0;
@DataPoint(description = "Number of processed loci")
public long nProcessedLoci = 0;
@DataPoint(description = "Number of SNPs")
public long nSNPs = 0;
@DataPoint(description = "Overall TiTv ratio", format = "%.2f")
public double TiTvRatio = 0;
@DataPoint(description = "SNP Novelty Rate")
public String SNPNoveltyRate = "NA";
@DataPoint(description = "Mean number of SNPs per individual")
public long nSNPsPerSample = 0;
@DataPoint(description = "Mean TiTv ratio per individual", format = "%.2f")
public double TiTvRatioPerSample = 0;
@DataPoint(description = "Mean depth of coverage per sample at SNPs", format = "%.1f")
public double SNPDPPerSample = 0;
@DataPoint(description = "Number of Indels")
public long nIndels = 0;
@DataPoint(description = "Indel Novelty Rate")
public String IndelNoveltyRate = "NA";
@DataPoint(description = "Mean number of Indels per individual")
public long nIndelsPerSample = 0;
@DataPoint(description = "Mean depth of coverage per sample at Indels", format = "%.1f")
public double IndelDPPerSample = 0;
@DataPoint(description = "Number of SVs")
public long nSVs = 0;
@DataPoint(description = "SV Novelty Rate")
public String SVNoveltyRate = "NA";
@DataPoint(description = "Mean number of SVs per individual")
public long nSVsPerSample = 0;
TypeSampleMap allVariantCounts, knownVariantCounts;
TypeSampleMap countsPerSample;
TypeSampleMap transitionsPerSample, transversionsPerSample;
TypeSampleMap depthPerSample;
private final static String ALL = "ALL";
private class TypeSampleMap extends EnumMap<VariantContext.Type, Map<String, Integer>> {
public TypeSampleMap(final Collection<String> samples) {
super(VariantContext.Type.class);
for ( VariantContext.Type type : VariantContext.Type.values() ) {
Map<String, Integer> bySample = new HashMap<String, Integer>(samples.size());
for ( final String sample : samples ) {
bySample.put(sample, 0);
}
bySample.put(ALL, 0);
this.put(type, bySample);
}
}
public final void inc(final VariantContext.Type type, final String sample) {
final int count = this.get(type).get(sample);
get(type).put(sample, count + 1);
}
public final int all(VariantContext.Type type) {
return get(type).get(ALL);
}
public final int meanValue(VariantContext.Type type) {
long sum = 0;
int n = 0;
for ( final Map.Entry<String, Integer> pair : get(type).entrySet() ) {
if ( pair.getKey() != ALL) {
n++;
sum += pair.getValue();
}
}
return (int)(Math.round(sum / (1.0 * n)));
}
public final double ratioValue(VariantContext.Type type, TypeSampleMap denoms, boolean allP) {
double sum = 0;
int n = 0;
for ( final String sample : get(type).keySet() ) {
if ( (allP && sample == ALL) || (!allP && sample != ALL) ) {
final long num = get(type).get(sample);
final long denom = denoms.get(type).get(sample);
sum += ratio(num, denom);
n++;
}
}
return Math.round(sum / (1.0 * n));
}
}
public void initialize(VariantEvalWalker walker) {
nSamples = walker.getSampleNamesForEvaluation().size();
countsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
transitionsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
transversionsPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
allVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
knownVariantCounts = new TypeSampleMap(walker.getSampleNamesForEvaluation());
depthPerSample = new TypeSampleMap(walker.getSampleNamesForEvaluation());
}
@Override public boolean enabled() { return true; }
public int getComparisonOrder() {
return 2; // we only need to see each eval track
}
public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
nProcessedLoci += context.getSkippedBases() + (ref == null ? 0 : 1);
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
TypeSampleMap titvTable = null;
switch (eval.getType()) {
case SNP:
titvTable = VariantContextUtils.isTransition(eval) ? transitionsPerSample : transversionsPerSample;
titvTable.inc(eval.getType(), ALL);
case INDEL:
case SYMBOLIC:
allVariantCounts.inc(eval.getType(), ALL);
if ( comp != null )
knownVariantCounts.inc(eval.getType(), ALL);
if ( eval.hasAttribute(VCFConstants.DEPTH_KEY) )
depthPerSample.inc(eval.getType(), ALL);
break;
default:
throw new UserException.BadInput("Unexpected variant context type: " + eval);
}
// per sample metrics
for (final Genotype g : eval.getGenotypes()) {
if ( ! g.isNoCall() && ! g.isHomRef() ) {
countsPerSample.inc(eval.getType(), g.getSampleName());
// update transition / transversion ratio
if ( titvTable != null ) titvTable.inc(eval.getType(), g.getSampleName());
if ( g.hasAttribute(VCFConstants.DEPTH_KEY) )
depthPerSample.inc(eval.getType(), g.getSampleName());
}
}
return null; // we don't capture any interesting sites
}
private final String noveltyRate(VariantContext.Type type) {
final int all = allVariantCounts.all(type);
final int known = knownVariantCounts.all(type);
final int novel = all - known;
final double rate = (novel / (1.0 * all));
return all == 0 ? "NA" : String.format("%.2f", rate);
}
public void finalizeEvaluation() {
nSNPs = allVariantCounts.all(VariantContext.Type.SNP);
nIndels = allVariantCounts.all(VariantContext.Type.INDEL);
nSVs = allVariantCounts.all(VariantContext.Type.SYMBOLIC);
TiTvRatio = transitionsPerSample.ratioValue(VariantContext.Type.SNP, transversionsPerSample, true);
TiTvRatioPerSample = transitionsPerSample.ratioValue(VariantContext.Type.SNP, transversionsPerSample, false);
nSNPsPerSample = countsPerSample.meanValue(VariantContext.Type.SNP);
nIndelsPerSample = countsPerSample.meanValue(VariantContext.Type.INDEL);
nSVsPerSample = countsPerSample.meanValue(VariantContext.Type.SYMBOLIC);
SNPNoveltyRate = noveltyRate(VariantContext.Type.SNP);
IndelNoveltyRate = noveltyRate(VariantContext.Type.INDEL);
SVNoveltyRate = noveltyRate(VariantContext.Type.SYMBOLIC);
SNPDPPerSample = depthPerSample.meanValue(VariantContext.Type.SNP);
IndelDPPerSample = depthPerSample.meanValue(VariantContext.Type.INDEL);
}
}

View File

@ -6,4 +6,5 @@ import java.lang.annotation.RetentionPolicy;
@Retention(RetentionPolicy.RUNTIME)
public @interface DataPoint {
String description() default ""; // the description, optional
String format() default "";
}

View File

@ -246,7 +246,7 @@ public class VariantEvalUtils {
field.setAccessible(true);
if (!(field.get(vei) instanceof TableType)) {
table.addColumn(field.getName(), 0.0);
table.addColumn(field.getName(), 0.0, datamap.get(field).format());
}
}
} catch (InstantiationException e) {
@ -297,6 +297,7 @@ public class VariantEvalUtils {
* Additional variant contexts per sample are automatically generated and added to the map unless the sample name
* matches the ALL_SAMPLE_NAME constant.
*
*
* @param tracker the metadata tracker
* @param ref the reference context
* @param tracks the list of tracks to process
@ -308,7 +309,7 @@ public class VariantEvalUtils {
*
* @return the mapping of track to VC list that should be populated
*/
public HashMap<RodBinding<VariantContext>, HashMap<String, Set<VariantContext>>>
public HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>>
bindVariantContexts(RefMetaDataTracker tracker,
ReferenceContext ref,
List<RodBinding<VariantContext>> tracks,
@ -319,11 +320,11 @@ public class VariantEvalUtils {
if ( tracker == null )
return null;
HashMap<RodBinding<VariantContext>, HashMap<String, Set<VariantContext>>> bindings = new HashMap<RodBinding<VariantContext>, HashMap<String, Set<VariantContext>>>();
HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>> bindings = new HashMap<RodBinding<VariantContext>, HashMap<String, Collection<VariantContext>>>();
RodBinding<VariantContext> firstTrack = tracks.isEmpty() ? null : tracks.get(0);
for ( RodBinding<VariantContext> track : tracks ) {
HashMap<String, Set<VariantContext>> mapping = new HashMap<String, Set<VariantContext>>();
HashMap<String, Collection<VariantContext>> mapping = new HashMap<String, Collection<VariantContext>>();
for ( VariantContext vc : tracker.getValues(track, ref.getLocus()) ) {
@ -352,9 +353,9 @@ public class VariantEvalUtils {
if ( mergeTracks && bindings.containsKey(firstTrack) ) {
// go through each binding of sample -> value and add all of the bindings from this entry
HashMap<String, Set<VariantContext>> firstMapping = bindings.get(firstTrack);
for ( Map.Entry<String, Set<VariantContext>> elt : mapping.entrySet() ) {
Set<VariantContext> firstMappingSet = firstMapping.get(elt.getKey());
HashMap<String, Collection<VariantContext>> firstMapping = bindings.get(firstTrack);
for ( Map.Entry<String, Collection<VariantContext>> elt : mapping.entrySet() ) {
Collection<VariantContext> firstMappingSet = firstMapping.get(elt.getKey());
if ( firstMappingSet != null ) {
firstMappingSet.addAll(elt.getValue());
} else {
@ -369,9 +370,9 @@ public class VariantEvalUtils {
return bindings;
}
private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
private void addMapping(HashMap<String, Collection<VariantContext>> mappings, String sample, VariantContext vc) {
if ( !mappings.containsKey(sample) )
mappings.put(sample, new LinkedHashSet<VariantContext>());
mappings.put(sample, new ArrayList<VariantContext>(1));
mappings.get(sample).add(vc);
}

View File

@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("a36414421621b377d6146d58d2fcecd0")
Arrays.asList("abe943d1aac120d7e75b9b9e5dac2399")
);
executeTest("testFunctionClassWithSnpeff", spec);
}
@ -50,7 +50,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("6a71b17c19f5914c277a99f45f5d9c39")
Arrays.asList("5fd9624c7a35ffb79d0feb1e233fc757")
);
executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
}
@ -70,7 +70,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("fb926edfd3d811e18b33798a43ef4379")
Arrays.asList("4a8765cd02d36e63f6d0f0c10a6c674b")
);
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
}
@ -91,7 +91,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("26b7d57e3a204ac80a28cb29485b59b7")
Arrays.asList("4106ab8f742ad1c3138c29220151503c")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -113,7 +113,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("1df8184062f330bea9da8bacacc5a09d")
Arrays.asList("6cee3a8d68307a118944f2df5401ac89")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}
@ -134,7 +134,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("927f26414509db9e7c0a2c067d57c949")
Arrays.asList("af5dd27354d5dfd0d2fe03149af09b55")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
}
@ -155,7 +155,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e6fddefd95122cabc5a0f0b95bce6d34")
Arrays.asList("062a231e203671e19aa9c6507710d762")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
}
@ -176,7 +176,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("df10486dae73a9cf8c647964f51ba3e0")
Arrays.asList("75abdd2b17c0a5e04814b6969a3d4d7e")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
}
@ -197,7 +197,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("524adb0b7ff70e227b8803a88f36713e")
Arrays.asList("bdbb5f8230a4a193058750c5e506c733")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
}
@ -220,7 +220,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("ef6449789dfc032602458b7c5538a1bc")
Arrays.asList("f076120da22930294840fcc396f5f141")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
}
@ -245,7 +245,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("13b90e94fa82d72bb04a0a5addb27c3f")
Arrays.asList("69201f4a2a7a44b38805a4aeeb8830b6")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
}
@ -264,7 +264,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("8458b9d7803d75aae551fac7dbd152d6")
Arrays.asList("c3bd3cb6cfb21a8c2b4d5f69104bf6c2")
);
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
}
@ -277,7 +277,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("b954dee127ec4205ed7d33c91aa3e045"));
1, Arrays.asList("861f94e3237d62bd5bc00757319241f7"));
executeTestParallel("testSelect1", spec);
}
@ -294,7 +294,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ae0027197547731a9a5c1eec5fbe0221"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("955c33365e017679047fabec0f14d5e0"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -312,7 +312,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompOverlap() {
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("009ecc8376a20dce81ff5299ef6bfecb"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("fb7d989e44bd74c5376cb5732f9f3f64"));
executeTestParallel("testCompOverlap",spec);
}
@ -324,7 +324,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --dbsnp " + b37dbSNP132 +
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("835b44fc3004cc975c968c9f92ed25d6"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("da5bcb305c5ef207ce175821efdbdefd"));
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
}
@ -336,7 +336,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
" -noST -ST Novelty -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("f0e003f1293343c3210ae95e8936b19a"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("fde839ece1442388f21a2f0b936756a8"));
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
}
@ -353,13 +353,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" -noST -noEV -ST Novelty -EV CompOverlap" +
" -o %s";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("0b81d97f843ec4a1a4222d1f9949bfca"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("1efae6b3b88c752b771e0c8fae24464e"));
executeTestParallel("testMultipleCompTracks",spec);
}
@Test
public void testPerSampleAndSubsettedSampleHaveSameResults() {
String md5 = "7425ca5c439afd7bb33ed5cfea02c2b3";
public void testPerSampleAndSubsettedSampleHaveSameResults1() {
String md5 = "bc9bcabc3105e2515d9a2d41506d2de1";
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
@ -414,7 +414,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("924b6123edb9da540d0abc66f6f33e16")
Arrays.asList("e53546243250634fc03e83b4e61ec55f")
);
executeTest("testAlleleCountStrat", spec);
}
@ -435,7 +435,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9794e2dba205c6929dc89899fdf0bf6b")
Arrays.asList("c8086f0525bc13e666afeb670c2e13ae")
);
executeTest("testIntervalStrat", spec);
}