diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java
index 7a3630b35..79a0a12e3 100755
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/DbSNPPercentage.java
@@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
-import java.util.ArrayList;
import java.util.List;
import java.util.Arrays;
@@ -47,7 +46,7 @@ public class DbSNPPercentage extends VariantEvaluator {
/**
* What fraction of the evaluated site variants were also found in the db?
*
- * @return
+ * @return db rate
*/
public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); }
@@ -79,9 +78,9 @@ public class DbSNPPercentage extends VariantEvaluator {
/**
* Returns true if every allele in eval is also in dbsnp
*
- * @param eval
- * @param dbsnp
- * @return
+ * @param eval eval context
+ * @param dbsnp db context
+ * @return true if eval and db are discordant
*/
public boolean discordantP(VariantContext eval, VariantContext dbsnp ) {
for ( Allele a : eval.getAlleles() ) {
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java
new file mode 100755
index 000000000..702a23452
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/GenotypeConcordance.java
@@ -0,0 +1,207 @@
+package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
+
+import org.broadinstitute.sting.gatk.contexts.*;
+import org.broadinstitute.sting.gatk.refdata.*;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.util.*;
+
+/**
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ */
+public class GenotypeConcordance extends VariantEvaluator {
+
+ private static final int nGenotypeTypes = Genotype.Type.values().length;
+
+ class SampleStats {
+
+ long[][] concordance = new long[nGenotypeTypes][nGenotypeTypes];
+
+ public String toString() {
+ StringBuffer sb = new StringBuffer();
+ for (int truth = 0; truth < nGenotypeTypes; truth++ ) {
+ // don't print out when truth = no-call
+ if ( truth == Genotype.Type.NO_CALL.ordinal() )
+ continue;
+ long total = 0;
+ for (int called = 0; called < nGenotypeTypes; called++ )
+ total += concordance[truth][called];
+ sb.append(String.format("%d %d %.2f ", total, concordance[truth][truth], total == 0 ? 0.0 : (100.0 * (double)concordance[truth][truth] / (double)total)));
+ for (int called = 0; called < nGenotypeTypes; called++ ) {
+ if ( called != truth )
+ sb.append(String.format("%d ", concordance[truth][called]));
+ }
+ }
+
+ return sb.toString();
+ }
+ }
+
+ class FrequencyStats {
+ long nFound = 0;
+ long nMissed = 0;
+
+ public String toString() {
+ long total = nFound + nMissed;
+ return String.format("%d %d %.2f ", nFound, nMissed, total == 0 ? 0.0 : (100.0 * rate(nFound, total)));
+ }
+ }
+
+ // a mapping from allele count to stats
+ private HashMap alleleCountStats = new HashMap();
+
+ // a mapping from sample to stats
+ private HashMap concordanceStats = null;
+
+ // keep a list of the validation data we saw before the first eval data
+ private HashSet missedValidationData = new HashSet();
+
+
+ public GenotypeConcordance(VariantEval2Walker parent) {
+ // don't do anything
+ }
+
+ public String getName() {
+ return "genotypeConcordance";
+ }
+
+ public int getComparisonOrder() {
+ return 2; // we need to see each eval track and each comp track
+ }
+
+ public boolean enabled() { return true; }
+
+ public String toString() {
+ return getName() + ": " + getTableRows();
+ }
+
+ private static List SAMPLE_HEADER =
+ Arrays.asList("sample",
+ "total_true_ref", "n_ref/ref", "%_ref/ref",
+ "n_ref/no-call", "n_ref/het", "n_ref/hom",
+ "total_true_het", "n_het/het", "%_het/het",
+ "n_het/no-call", "n_het/ref", "n_het/hom",
+ "total_true_hom", "n_hom/hom", "%_hom/hom",
+ "n_hom/no-call", "n_hom/ref", "n_hom/het");
+
+ private static List FREQUENCY_HEADER =
+ Arrays.asList("alleleCount", "n_found", "n_missed", "%_found");
+
+ // making it a table
+ public List getTableHeader() {
+ ArrayList header = new ArrayList();
+ header.addAll(SAMPLE_HEADER);
+ header.addAll(FREQUENCY_HEADER);
+ return header;
+ }
+
+ public List> getTableRows() {
+ ArrayList> rows = new ArrayList>();
+
+ if ( concordanceStats != null ) {
+ for ( Map.Entry sample : concordanceStats.entrySet() )
+ rows.add(Arrays.asList(String.format("%s %s", sample.getKey(), sample.getValue().toString()).split(" ")));
+ }
+
+ if ( alleleCountStats != null ) {
+ for ( Map.Entry alleleCount : alleleCountStats.entrySet() )
+ rows.add(Arrays.asList(String.format("%d %s", alleleCount.getKey(), alleleCount.getValue().toString()).split(" ")));
+ }
+
+ return rows;
+ }
+
+ public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+ String interesting = null;
+
+ // sanity check that we at least have either eval or validation data
+ if ( eval == null && !isValidVC(validation) )
+ return interesting;
+
+ if ( concordanceStats == null ) {
+ if ( eval != null ) {
+ // initialize the concordance table
+ createConcordanceTable(eval);
+ for ( VariantContext vc : missedValidationData )
+ determineStats(null, vc);
+ missedValidationData = null;
+ } else {
+ missedValidationData.add(validation);
+ return interesting;
+ }
+ }
+
+ determineStats(eval, validation);
+
+ return interesting; // we don't capture any interesting sites
+ }
+
+ private void determineStats(VariantContext eval, VariantContext validation) {
+
+ // determine concordance for eval data
+ if ( eval != null ) {
+
+ for ( String sample : eval.getSampleNames() ) {
+ Genotype.Type called = eval.getGenotype(sample).getType();
+ Genotype.Type truth;
+
+ if ( !isValidVC(validation) || !validation.hasGenotype(sample) )
+ truth = Genotype.Type.NO_CALL;
+ else
+ truth = validation.getGenotype(sample).getType();
+
+ SampleStats stats = concordanceStats.get(sample);
+ if ( stats == null )
+ throw new StingException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context");
+ stats.concordance[truth.ordinal()][called.ordinal()]++;
+ }
+ }
+ // otherwise, mark no-calls for all samples
+ else {
+
+ Genotype.Type called = Genotype.Type.NO_CALL;
+
+ for ( String sample : validation.getSampleNames() ) {
+ SampleStats stats = concordanceStats.get(sample);
+ if ( stats == null )
+ continue;
+
+ Genotype.Type truth = validation.getGenotype(sample).getType();
+ stats.concordance[truth.ordinal()][called.ordinal()]++;
+ }
+ }
+
+ // determine allele count concordance ()
+ if ( isValidVC(validation) && validation.isPolymorphic() ) {
+ int trueAlleleCount = 0;
+ for ( Allele a : validation.getAlternateAlleles() )
+ trueAlleleCount += validation.getChromosomeCount(a);
+
+ if ( !alleleCountStats.containsKey(trueAlleleCount) )
+ alleleCountStats.put(trueAlleleCount, new FrequencyStats());
+ FrequencyStats stats = alleleCountStats.get(trueAlleleCount);
+ if ( eval != null )
+ stats.nFound++;
+ else
+ stats.nMissed++;
+ }
+ }
+
+ private static boolean isValidVC(VariantContext vc) {
+ return (vc != null && !vc.isFiltered());
+ }
+
+ private void createConcordanceTable(VariantContext vc) {
+ concordanceStats = new HashMap();
+ for ( String sample : vc.getSampleNames() )
+ concordanceStats.put(sample, new SampleStats());
+ }
+
+}
\ No newline at end of file
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java
index 02ce6058d..32039d96d 100755
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/MendelianViolationEvaluator.java
@@ -109,7 +109,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
if ( isViolation(vc, momG, dadG, childG) ) {
nViolations++;
- String label = null;
+ String label;
if ( childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar() )) {
label = "KidHomRef_ParentHomVar";
KidHomRef_ParentHomVar++;
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java
index 66272b3d1..aa600232c 100755
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/ValidationRate.java
@@ -4,9 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
-import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
-import java.util.ArrayList;
import java.util.List;
import java.util.Arrays;
@@ -84,7 +82,7 @@ public class ValidationRate extends VariantEvaluator {
String interesting = null;
if ( validation != null && validation.hasGenotypes() && validation.isNotFiltered() ) {
- SiteStats overlap = null;
+ SiteStats overlap;
if ( validation.isPolymorphic() ) {
validationStats.nPoly++;
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java
index f5ed2dfae..888900e94 100755
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEval2Walker.java
@@ -3,10 +3,7 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableVariantContext;
-import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
-import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
-import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
+import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
@@ -30,7 +27,6 @@ import java.util.*;
// todo -- write a simple column table system and have the evaluators return this instead of the list> objects
// todo -- site frequency spectrum eval (freq. of variants in eval as a function of their AC and AN numbers)
-// todo -- multiple sample concordance tool (genotypes in eval vs. genotypes in truth)
// todo -- allele freqeuncy discovery tool (FREQ in true vs. discovery counts in eval). Needs to process subset of samples in true (pools)
// todo -- clustered SNP counter
// todo -- HWEs
@@ -121,7 +117,7 @@ public class VariantEval2Walker extends RodWalker {
@Argument(shortName="rsID", fullName="rsID", doc="If provided, list of rsID and build number for capping known snps by their build date", required=false)
protected String rsIDFile = null;
- @Argument(shortName="maxRsIDBuild", fullName="maxRsIDBuild", doc="If provided, only variants with rsIDs <= maxRsIDBuild will be included in the set of knwon snps", required=false)
+ @Argument(shortName="maxRsIDBuild", fullName="maxRsIDBuild", doc="If provided, only variants with rsIDs <= maxRsIDBuild will be included in the set of known snps", required=false)
protected int maxRsIDBuild = Integer.MAX_VALUE;
Set rsIDsToExclude = null;
@@ -183,6 +179,8 @@ public class VariantEval2Walker extends RodWalker {
private static String KNOWN_SET_NAME = "known";
private static String NOVEL_SET_NAME = "novel";
+ private static String NO_COMP_NAME = "N/A";
+
private final static String CONTEXT_HEADER = "eval.comp.select.filter.novelty";
private final static int N_CONTEXT_NAME_PARTS = CONTEXT_HEADER.split("\\.").length;
private static int[] nameSizes = new int[N_CONTEXT_NAME_PARTS];
@@ -221,6 +219,10 @@ public class VariantEval2Walker extends RodWalker {
}
}
+ // if no comp rod was provided, we still want to be able to do evaluations, so use a default comp name
+ if ( compNames.size() == 0 )
+ compNames.add(NO_COMP_NAME);
+
contexts = initializeEvaluationContexts(evalNames, compNames, selectExps);
determineContextNamePartSizes();
diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java
index f12f05692..6727c59bf 100755
--- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java
+++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/varianteval2/VariantEvaluator.java
@@ -6,14 +6,15 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import java.util.List;
-import java.util.ArrayList;
/**
- * Created by IntelliJ IDEA.
- * User: depristo
- * Date: Jan 29, 2010
- * Time: 3:38:02 PM
- * To change this template use File | Settings | File Templates.
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
abstract class VariantEvaluator {
// protected boolean accumulateInterestingSites = false, printInterestingSites = false;
@@ -40,13 +41,11 @@ abstract class VariantEvaluator {
//public boolean processedAnySites() { return processedASite; }
//protected void markSiteAsProcessed() { processedASite = true; }
- /** Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 */
+ // Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2
public abstract int getComparisonOrder();
- /** called at all sites, regardless of eval context itself; useful for counting processed bases */
- public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-
- }
+ // called at all sites, regardless of eval context itself; useful for counting processed bases
+ public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { }
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
return null;