added some sample annotations to VariantEval2 analysis modules, and some changes to the report system.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3067 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2010-03-24 05:40:10 +00:00
parent 1f451e17e5
commit 60dfba997b
10 changed files with 391 additions and 274 deletions

View File

@ -5,39 +5,61 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.utils.StingException;
import java.util.List;
import java.util.Arrays;
@Analysis(name = "Count Variants", description = "Counts different classes of variants in the sample")
public class CountVariants extends VariantEvaluator {
@DataPoint(description = "Number of processed loci")
long nProcessedLoci = 0;
@DataPoint(description = "Number of called loci")
long nCalledLoci = 0;
@DataPoint(description = "Number of variant loci")
long nVariantLoci = 0;
@DataPoint(description = "Number of reference loci")
long nRefLoci = 0;
@DataPoint(description = "Number of snp loci")
long nSNPs = 0;
@DataPoint(description = "Number of insertions")
long nInsertions = 0;
@DataPoint(description = "Number of deletions")
long nDeletions = 0;
@DataPoint(description = "Number of complex loci")
long nComplex = 0;
@DataPoint(description = "Number of no calls loci")
long nNoCalls = 0;
@DataPoint(description = "Number of het loci")
long nHets = 0;
@DataPoint(description = "Number of hom ref loci")
long nHomRef = 0;
@DataPoint(description = "Number of hom var loci")
long nHomVar = 0;
public CountVariants(VariantEval2Walker parent) {
// don't do anything
}
private double perLocusRate(long n) { return rate(n, nProcessedLoci); }
private long perLocusRInverseRate(long n) { return inverseRate(n, nProcessedLoci); }
private double perLocusRate(long n) {
return rate(n, nProcessedLoci);
}
private long perLocusRInverseRate(long n) {
return inverseRate(n, nProcessedLoci);
}
public String getName() {
return "counter";
}
public boolean enabled() { return true; }
public boolean enabled() {
return true;
}
public int getComparisonOrder() {
return 1; // we only need to see each eval track
@ -51,27 +73,43 @@ public class CountVariants extends VariantEvaluator {
//nProcessedLoci++;
nCalledLoci++;
if ( vc1.isVariant() ) nVariantLoci++;
switch ( vc1.getType() ) {
case NO_VARIATION: nRefLoci++; break;
case SNP: nSNPs++; break;
case INDEL:
if ( vc1.isInsertion() ) nInsertions++; else nDeletions++;
if (vc1.isVariant()) nVariantLoci++;
switch (vc1.getType()) {
case NO_VARIATION:
nRefLoci++;
break;
case SNP:
nSNPs++;
break;
case INDEL:
if (vc1.isInsertion()) nInsertions++;
else nDeletions++;
break;
case MIXED:
nComplex++;
break;
case MIXED: nComplex++; break;
}
for ( Genotype g : vc1.getGenotypes().values() ) {
switch ( g.getType() ) {
case NO_CALL: nNoCalls++; break;
case HOM_REF: nHomRef++; break;
case HET: nHets++; break;
case HOM_VAR: nHomVar++; break;
default: throw new StingException("BUG: Unexpected genotype type: " + g);
for (Genotype g : vc1.getGenotypes().values()) {
switch (g.getType()) {
case NO_CALL:
nNoCalls++;
break;
case HOM_REF:
nHomRef++;
break;
case HET:
nHets++;
break;
case HOM_VAR:
nHomVar++;
break;
default:
throw new StingException("BUG: Unexpected genotype type: " + g);
}
}
return null; // we don't capture any intersting sites
return null; // we don't capture any interesting sites
}
public String toString() {

View File

@ -5,6 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import java.util.List;
import java.util.Arrays;
@ -18,11 +20,16 @@ import java.util.Arrays;
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
@Analysis(name = "dbOverlap", description = "the overlap between DbSNP sites and other SNP tracks")
public class DbSNPPercentage extends VariantEvaluator {
private long nDBSNPs = 0;
private long nEvalSNPs = 0;
@DataPoint(name = "DbSNP_count", description = "number of DPSNP sites")
private long nDBSNPs = 0;
@DataPoint(name = "total_count", description = "number of total snp sites")
private long nEvalSNPs = 0;
@DataPoint(name = "number_snps_at_dbsnp", description = "number of SNP sites at DPSNP sites")
private long nSNPsAtdbSNPs = 0;
private long nConcordant = 0;
@DataPoint(name = "number_concordant", description = "number of concordant sites")
private long nConcordant = 0;
public DbSNPPercentage(VariantEval2Walker parent) {
// don't do anything
@ -36,11 +43,25 @@ public class DbSNPPercentage extends VariantEvaluator {
return 2; // we need to see each eval track and each comp track
}
public long nDBSNPs() { return nDBSNPs; }
public long nEvalSNPs() { return nEvalSNPs; }
public long nSNPsAtdbSNPs() { return nSNPsAtdbSNPs; }
public long nConcordant() { return nConcordant; }
public long nNovelSites() { return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs()); }
public long nDBSNPs() {
return nDBSNPs;
}
public long nEvalSNPs() {
return nEvalSNPs;
}
public long nSNPsAtdbSNPs() {
return nSNPsAtdbSNPs;
}
public long nConcordant() {
return nConcordant;
}
public long nNovelSites() {
return Math.abs(nEvalSNPs() - nSNPsAtdbSNPs());
}
/**
@ -48,9 +69,14 @@ public class DbSNPPercentage extends VariantEvaluator {
*
* @return db rate
*/
public double dbSNPRate() { return rate(nSNPsAtdbSNPs(), nEvalSNPs()); }
public double concordanceRate() { return rate(nConcordant(), nSNPsAtdbSNPs()); }
public double dbSNPRate() {
return rate(nSNPsAtdbSNPs(), nEvalSNPs());
}
public double concordanceRate() {
return rate(nConcordant(), nSNPsAtdbSNPs());
}
public String toString() {
return getName() + ": " + summaryLine();
}
@ -65,12 +91,15 @@ public class DbSNPPercentage extends VariantEvaluator {
"n_novel_snps", "percent_eval_in_comp", "concordance_rate");
// making it a table
public List<String> getTableHeader() {
return HEADER;
}
public boolean enabled() { return true; }
public boolean enabled() {
return true;
}
public List<List<String>> getTableRows() {
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
}
@ -78,13 +107,13 @@ public class DbSNPPercentage extends VariantEvaluator {
/**
* Returns true if every allele in eval is also in dbsnp
*
* @param eval eval context
* @param dbsnp db context
* @param eval eval context
* @param dbsnp db context
* @return true if eval and db are discordant
*/
public boolean discordantP(VariantContext eval, VariantContext dbsnp ) {
for ( Allele a : eval.getAlleles() ) {
if ( ! dbsnp.hasAllele(a, true) )
public boolean discordantP(VariantContext eval, VariantContext dbsnp) {
for (Allele a : eval.getAlleles()) {
if (!dbsnp.hasAllele(a, true))
return true;
}
@ -95,13 +124,13 @@ public class DbSNPPercentage extends VariantEvaluator {
boolean dbSNPIsGood = dbsnp != null && dbsnp.isSNP() && dbsnp.isNotFiltered();
boolean evalIsGood = eval != null && eval.isSNP();
if ( dbSNPIsGood ) nDBSNPs++; // count the number of dbSNP events
if ( evalIsGood ) nEvalSNPs++; // count the number of dbSNP events
if (dbSNPIsGood) nDBSNPs++; // count the number of dbSNP events
if (evalIsGood) nEvalSNPs++; // count the number of dbSNP events
if ( dbSNPIsGood && evalIsGood ) {
if (dbSNPIsGood && evalIsGood) {
nSNPsAtdbSNPs++;
if ( ! discordantP(eval, dbsnp) ) // count whether we're concordant or not with the dbSNP value
if (!discordantP(eval, dbsnp)) // count whether we're concordant or not with the dbSNP value
nConcordant++;
}

View File

@ -3,6 +3,10 @@ package org.broadinstitute.sting.oneoffprojects.walkers.varianteval2;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.*;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.playground.utils.report.tags.Param;
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
import org.broadinstitute.sting.utils.StingException;
import org.apache.log4j.Logger;
@ -17,29 +21,56 @@ import java.util.*;
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks")
public class GenotypeConcordance extends VariantEvaluator {
protected static Logger logger = Logger.getLogger(GenotypeConcordance.class);
// a mapping from allele count to stats
@DataPoint(description = "the frequency statistics for each allele")
private HashMap<Integer, FrequencyStats> alleleCountStats = new HashMap<Integer, FrequencyStats>();
// a mapping from sample to stats
@DataPoint(description = "the concordance statistics for each sample")
private HashMap<String, SampleStats> concordanceStats = null;
private static final int MAX_MISSED_VALIDATION_DATA = 10000;
private static final int nGenotypeTypes = Genotype.Type.values().length;
class SampleStats {
class SampleStats implements TableType {
long[][] concordance = new long[nGenotypeTypes][nGenotypeTypes];
// TableType methods
public Object[] getRowKeys() {
return Genotype.Type.values();
}
public Object[] getColumnKeys() {
return Genotype.Type.values();
}
public String getCell(int x, int y) {
return String.valueOf(concordance[x][y]);
}
public String getName() {
return "SampleStats";
}
public String toString() {
StringBuffer sb = new StringBuffer();
for (int truth = 0; truth < nGenotypeTypes; truth++ ) {
for (int truth = 0; truth < nGenotypeTypes; truth++) {
// don't print out when truth = no-call
if ( truth == Genotype.Type.NO_CALL.ordinal() )
if (truth == Genotype.Type.NO_CALL.ordinal())
continue;
long total = 0;
for (int called = 0; called < nGenotypeTypes; called++ )
for (int called = 0; called < nGenotypeTypes; called++)
total += concordance[truth][called];
sb.append(String.format("%d %d %.2f ", total, concordance[truth][truth], total == 0 ? 0.0 : (100.0 * (double)concordance[truth][truth] / (double)total)));
for (int called = 0; called < nGenotypeTypes; called++ ) {
if ( called != truth )
sb.append(String.format("%d %d %.2f ", total, concordance[truth][truth], total == 0 ? 0.0 : (100.0 * (double) concordance[truth][truth] / (double) total)));
for (int called = 0; called < nGenotypeTypes; called++) {
if (called != truth)
sb.append(String.format("%d ", concordance[truth][called]));
}
}
@ -48,22 +79,33 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
class FrequencyStats {
class FrequencyStats implements TableType {
long nFound = 0;
long nMissed = 0;
public Object[] getRowKeys() {
return new String[]{"sample"};
}
public Object[] getColumnKeys() {
return new String[]{"number_found", "number_missing"};
}
public String getName() {
return "FrequencyStats";
}
public String getCell(int x, int y) {
if (y == 0) return String.valueOf(nFound);
else return String.valueOf(nMissed);
}
public String toString() {
long total = nFound + nMissed;
return String.format("%d %d %.2f ", nFound, nMissed, total == 0 ? 0.0 : (100.0 * rate(nFound, total)));
}
}
// a mapping from allele count to stats
private HashMap<Integer, FrequencyStats> alleleCountStats = new HashMap<Integer, FrequencyStats>();
// a mapping from sample to stats
private HashMap<String, SampleStats> concordanceStats = null;
// keep a list of the validation data we saw before the first eval data
private HashSet<VariantContext> missedValidationData = new HashSet<VariantContext>();
@ -80,7 +122,9 @@ public class GenotypeConcordance extends VariantEvaluator {
return 2; // we need to see each eval track and each comp track
}
public boolean enabled() { return true; }
public boolean enabled() {
return true;
}
public String toString() {
return getName() + ": " + getTableRows();
@ -99,6 +143,7 @@ public class GenotypeConcordance extends VariantEvaluator {
Arrays.asList("alleleCount", "n_found", "n_missed", "%_found");
// making it a table
public List<String> getTableHeader() {
ArrayList<String> header = new ArrayList<String>();
header.addAll(SAMPLE_HEADER);
@ -109,13 +154,13 @@ public class GenotypeConcordance extends VariantEvaluator {
public List<List<String>> getTableRows() {
ArrayList<List<String>> rows = new ArrayList<List<String>>();
if ( concordanceStats != null ) {
for ( Map.Entry<String, SampleStats> sample : concordanceStats.entrySet() )
if (concordanceStats != null) {
for (Map.Entry<String, SampleStats> sample : concordanceStats.entrySet())
rows.add(Arrays.asList(String.format("%s %s", sample.getKey(), sample.getValue().toString()).split(" ")));
}
if ( alleleCountStats != null ) {
for ( Map.Entry<Integer, FrequencyStats> alleleCount : alleleCountStats.entrySet() )
if (alleleCountStats != null) {
for (Map.Entry<Integer, FrequencyStats> alleleCount : alleleCountStats.entrySet())
rows.add(Arrays.asList(String.format("%d %s", alleleCount.getKey(), alleleCount.getValue().toString()).split(" ")));
}
@ -123,26 +168,27 @@ public class GenotypeConcordance extends VariantEvaluator {
}
private boolean warnedAboutValidationData = false;
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
String interesting = null;
// sanity check that we at least have either eval or validation data
if ( eval == null && !isValidVC(validation) )
if (eval == null && !isValidVC(validation))
return interesting;
if ( concordanceStats == null ) {
if ( eval != null ) {
if (concordanceStats == null) {
if (eval != null) {
// initialize the concordance table
createConcordanceTable(eval);
for ( VariantContext vc : missedValidationData )
for (VariantContext vc : missedValidationData)
determineStats(null, vc);
missedValidationData = null;
} else {
// todo -- Eric, this results in a memory problem when eval is WEx data but you are using CG calls genome-wide
// todo -- perhaps you need should extend the evaluators with an initialize
// todo -- method that gets the header (or samples) for the first eval sites?
if ( missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
if ( ! warnedAboutValidationData ) {
if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
if (!warnedAboutValidationData) {
logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
warnedAboutValidationData = true;
}
@ -154,26 +200,26 @@ public class GenotypeConcordance extends VariantEvaluator {
}
determineStats(eval, validation);
return interesting; // we don't capture any interesting sites
}
private void determineStats(VariantContext eval, VariantContext validation) {
// determine concordance for eval data
if ( eval != null ) {
if (eval != null) {
for ( String sample : eval.getSampleNames() ) {
for (String sample : eval.getSampleNames()) {
Genotype.Type called = eval.getGenotype(sample).getType();
Genotype.Type truth;
if ( !isValidVC(validation) || !validation.hasGenotype(sample) )
if (!isValidVC(validation) || !validation.hasGenotype(sample))
truth = Genotype.Type.NO_CALL;
else
truth = validation.getGenotype(sample).getType();
SampleStats stats = concordanceStats.get(sample);
if ( stats == null )
if (stats == null)
throw new StingException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context");
stats.concordance[truth.ordinal()][called.ordinal()]++;
}
@ -183,9 +229,9 @@ public class GenotypeConcordance extends VariantEvaluator {
Genotype.Type called = Genotype.Type.NO_CALL;
for ( String sample : validation.getSampleNames() ) {
for (String sample : validation.getSampleNames()) {
SampleStats stats = concordanceStats.get(sample);
if ( stats == null )
if (stats == null)
continue;
Genotype.Type truth = validation.getGenotype(sample).getType();
@ -194,15 +240,15 @@ public class GenotypeConcordance extends VariantEvaluator {
}
// determine allele count concordance ()
if ( isValidVC(validation) && validation.isPolymorphic() ) {
if (isValidVC(validation) && validation.isPolymorphic()) {
int trueAlleleCount = 0;
for ( Allele a : validation.getAlternateAlleles() )
for (Allele a : validation.getAlternateAlleles())
trueAlleleCount += validation.getChromosomeCount(a);
if ( !alleleCountStats.containsKey(trueAlleleCount) )
if (!alleleCountStats.containsKey(trueAlleleCount))
alleleCountStats.put(trueAlleleCount, new FrequencyStats());
FrequencyStats stats = alleleCountStats.get(trueAlleleCount);
if ( eval != null )
if (eval != null)
stats.nFound++;
else
stats.nMissed++;
@ -215,7 +261,7 @@ public class GenotypeConcordance extends VariantEvaluator {
private void createConcordanceTable(VariantContext vc) {
concordanceStats = new HashMap<String, SampleStats>();
for ( String sample : vc.getSampleNames() )
for (String sample : vc.getSampleNames())
concordanceStats.put(sample, new SampleStats());
}

View File

@ -6,6 +6,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import org.broadinstitute.sting.utils.StingException;
import java.util.List;
@ -15,36 +17,51 @@ import java.util.regex.Matcher;
/**
* Mendelian violation detection and counting
*
* <p/>
* a violation looks like:
* Suppose dad = A/B and mom = C/D
* The child can be [A or B] / [C or D].
* If the child doesn't match this, the site is a violation
*
* <p/>
* Some examples:
*
* <p/>
* mom = A/A, dad = C/C
* child can be A/C only
*
* <p/>
* mom = A/C, dad = C/C
* child can be A/C or C/C
*
* <p/>
* mom = A/C, dad = A/C
* child can be A/A, A/C, C/C
*
* <p/>
* The easiest way to do this calculation is to:
*
* <p/>
* Get alleles for mom => A/B
* Get alleles for dad => C/D
* Make allowed genotypes for child: A/C, A/D, B/C, B/D
* Check that the child is one of these.
*/
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
public class MendelianViolationEvaluator extends VariantEvaluator {
long nVariants, nViolations;
TrioStructure trio;
@DataPoint(name = "number_variants", description = "Number of mendelian variants found")
long nVariants;
@DataPoint(name = "number_violations", description = "Number of mendelian violations found")
long nViolations;
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
long KidHomRef_ParentHomVar;
@DataPoint(description = "number of child het calls where the parent was hom ref")
long KidHet_ParentsHomRef;
@DataPoint(description = "number of child het calls where the parent was hom variant")
long KidHet_ParentsHomVar;
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
long KidHomVar_ParentHomRef;
VariantEval2Walker parent;
long KidHomRef_ParentHomVar, KidHet_ParentsHomRef, KidHet_ParentsHomVar, KidHomVar_ParentHomRef;
TrioStructure trio;
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
@ -54,7 +71,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public static TrioStructure parseTrioDescription(String family) {
Matcher m = FAMILY_PATTERN.matcher(family);
if ( m.matches() ) {
if (m.matches()) {
TrioStructure trio = new TrioStructure();
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
trio.mom = m.group(1);
@ -69,7 +86,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
public MendelianViolationEvaluator(VariantEval2Walker parent) {
this.parent = parent;
if ( enabled() ) {
if (enabled()) {
trio = parseTrioDescription(parent.FAMILY_STRUCTURE);
parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s",
parent.FAMILY_STRUCTURE, trio.mom, trio.dad, trio.child));
@ -93,30 +110,30 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
}
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc.isBiallelic() && vc.hasGenotypes() ) { // todo -- currently limited to biallelic loci
Genotype momG = vc.getGenotype(trio.mom);
Genotype dadG = vc.getGenotype(trio.dad);
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
Genotype momG = vc.getGenotype(trio.mom);
Genotype dadG = vc.getGenotype(trio.dad);
Genotype childG = vc.getGenotype(trio.child);
if ( includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG) ) {
if (includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG)) {
nVariants++;
if ( momG == null || dadG == null || childG == null )
if (momG == null || dadG == null || childG == null)
throw new IllegalArgumentException(String.format("VariantContext didn't contain genotypes for expected trio members: mom=%s dad=%s child=%s", trio.mom, trio.dad, trio.child));
// all genotypes are good, so let's see if child is a violation
if ( isViolation(vc, momG, dadG, childG) ) {
if (isViolation(vc, momG, dadG, childG)) {
nViolations++;
String label;
if ( childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar() )) {
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
label = "KidHomRef_ParentHomVar";
KidHomRef_ParentHomVar++;
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef()) ) {
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
label = "KidHet_ParentsHomRef";
KidHet_ParentsHomRef++;
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar()) ) {
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
label = "KidHet_ParentsHomVar";
KidHet_ParentsHomVar++;
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
@ -138,20 +155,20 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
}
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG ) {
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
}
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA ) {
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
//VariantContext momVC = vc.subContextFromGenotypes(momG);
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
int i = 0;
Genotype childG = new Genotype("kidG", childA);
for ( Allele momAllele : momA ) {
for ( Allele dadAllele : dadA ) {
if ( momAllele.isCalled() && dadAllele.isCalled() ) {
for (Allele momAllele : momA) {
for (Allele dadAllele : dadA) {
if (momAllele.isCalled() && dadAllele.isCalled()) {
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
if ( childG.sameGenotype(possibleChild, false) ) {
if (childG.sameGenotype(possibleChild, false)) {
return false;
}
}
@ -173,6 +190,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
Arrays.asList("nVariants", "nViolations", "KidHomRef_ParentHomVar", "KidHet_ParentsHomRef", "KidHet_ParentsHomVar", "KidHomVar_ParentHomRef");
// making it a table
public List<String> getTableHeader() {
return HEADER;
}

View File

@ -4,20 +4,32 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
import java.util.List;
import java.util.Arrays;
@Analysis(name = "Ti/Tv Variant Evaluator", description = "Ti/Tv Variant Evaluator")
public class TiTvVariantEvaluator extends VariantEvaluator {
long nTi = 0, nTv = 0;
long nTiInStd = 0, nTvInStd = 0;
@DataPoint(name = "ti_count", description = "number of transition loci")
long nTi = 0;
@DataPoint(name = "tv_count", description = "number of transversion loci")
long nTv = 0;
@DataPoint(name = "ti_count_std", description = "number of transition sites in the std")
long nTiInStd = 0;
@DataPoint(name = "tv_count_std", description = "number of transversion sites in the std")
long nTvInStd = 0;
public TiTvVariantEvaluator(VariantEval2Walker parent) {
// don't do anything
}
public boolean enabled() { return true; }
public boolean enabled() {
return true;
}
public String getName() {
return "titv";
}
@ -27,18 +39,20 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
}
public void updateTiTv(VariantContext vc, boolean updateStandard) {
if ( vc != null && vc.isSNP() && vc.isBiallelic() ) {
if ( vc.isTransition() ) {
if ( updateStandard ) nTiInStd++; else nTi++;
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
if (vc.isTransition()) {
if (updateStandard) nTiInStd++;
else nTi++;
} else {
if ( updateStandard ) nTvInStd++; else nTv++;
if (updateStandard) nTvInStd++;
else nTv++;
}
}
}
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc1 != null ) updateTiTv(vc1, false);
if ( vc2 != null ) updateTiTv(vc2, true);
if (vc1 != null) updateTiTv(vc1, false);
if (vc2 != null) updateTiTv(vc2, true);
//if ( vc1 == null && vc2 != null && vc2.isSNP() && vc2.isBiallelic() )
// System.out.printf("VC2 = %s%n", vc2);

View File

@ -4,6 +4,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import java.util.List;
import java.util.Arrays;
@ -17,6 +18,7 @@ import java.util.Arrays;
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
@Analysis(name = "Validation Rate", description = "Validation Rate")
public class ValidationRate extends VariantEvaluator {
// todo -- subset validation data by list of samples, if provided
@ -26,7 +28,9 @@ public class ValidationRate extends VariantEvaluator {
class SiteStats {
long nPoly = 0, nMono = 0, nNoCall = 0;
double polyPercent() { return 100 * rate(nPoly, nPoly + nMono + nNoCall); }
double polyPercent() {
return 100 * rate(nPoly, nPoly + nMono + nNoCall);
}
}
private SiteStats validationStats = new SiteStats();
@ -68,11 +72,14 @@ public class ValidationRate extends VariantEvaluator {
"PPV", "Sensitivity");
// making it a table
public List<String> getTableHeader() {
return HEADER;
}
public boolean enabled() { return true; }
public boolean enabled() {
return true;
}
public List<List<String>> getTableRows() {
return Arrays.asList(Arrays.asList(summaryLine().split(" ")));
@ -81,26 +88,25 @@ public class ValidationRate extends VariantEvaluator {
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
String interesting = null;
if ( validation != null && validation.hasGenotypes() && validation.isNotFiltered() ) {
if (validation != null && validation.hasGenotypes() && validation.isNotFiltered()) {
SiteStats overlap;
if ( validation.isPolymorphic() ) {
if (validation.isPolymorphic()) {
validationStats.nPoly++;
overlap = evalOverlapAtPoly;
if ( eval == null || eval.isMonomorphic() )
if (eval == null || eval.isMonomorphic())
interesting = "ValidationStatus=FN";
}
else {
} else {
validationStats.nMono++;
overlap = evalOverlapAtMono;
if ( eval != null && eval.isPolymorphic() )
if (eval != null && eval.isPolymorphic())
interesting = "ValidationStatus=FP";
}
if ( eval == null )
if (eval == null)
overlap.nNoCall++;
else if ( eval.isPolymorphic() )
else if (eval.isPolymorphic())
overlap.nPoly++;
else
overlap.nMono++;

View File

@ -33,10 +33,7 @@ import org.broadinstitute.sting.utils.StingException;
import java.io.*;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.*;
/**
@ -52,6 +49,7 @@ public class ReportMarshaller {
// the aggregation of all our analyses
private Node root;
private File writeLocation;
/**
* create a marshaled object
*
@ -64,10 +62,8 @@ public class ReportMarshaller {
}
private void init(String reportName, File filename) {
root = new Node(reportName);
Node title= new Node("title");
title.addChild(new Node(reportName));
root.addChild(title);
root = new Node("report", reportName, "the overarching report object");
root.addChild(new Node("title", reportName, "title of the report"));
this.writeLocation = filename;
}
@ -77,7 +73,7 @@ public class ReportMarshaller {
* @param reportName the report name
*/
public ReportMarshaller(String reportName, File filename) {
init(reportName,filename);
init(reportName, filename);
temp = createTemplate();
}
@ -101,12 +97,40 @@ public class ReportMarshaller {
root.addChild(analysis);
}
/**
* add an analysis module to the output source
*
* @param toMarshall the object to marshall
*/
public void write(List<Node> prependNodes, Object toMarshall) {
// Create a context to add data to
HashMap analysisMap = new HashMap();
AnalysisModuleScanner moduleScanner = new AnalysisModuleScanner(toMarshall);
Node analysis = addAnalysis(moduleScanner);
analysis.addAllChildren(getParameterNodes(toMarshall, moduleScanner));
analysis.addAllChildren(getDataPointNodes(toMarshall, moduleScanner));
// prepend the list of nodes passed in
Node currChild = null;
for (Node n : prependNodes) {
if (currChild == null) {
root.addChild(n);
currChild = n;
} else {
currChild.addChild(n);
currChild = n;
}
}
// add this analysis to the root node
if (currChild == null) root.addChild(analysis);
else currChild.addChild(analysis);
}
private Node addAnalysis(AnalysisModuleScanner moduleScanner) {
Node analysis = new Node(moduleScanner.getAnalysis().name());
Node description = new Node("description");
description.createSubNode(moduleScanner.getAnalysis().description());
analysis.addChild(description);
return analysis;
return new Node("analysis", moduleScanner.getAnalysis().name(), moduleScanner.getAnalysis().description());
}
/**
@ -119,7 +143,9 @@ public class ReportMarshaller {
private Collection<Node> getParameterNodes(Object toMarshall, AnalysisModuleScanner moduleScanner) {
Collection<Node> nodes = new ArrayList<Node>();
for (Field f : moduleScanner.getParameters().keySet()) {
Node node = new Node(moduleScanner.getParameters().get(f).name().equals("") ? f.getName() : moduleScanner.getParameters().get(f).name());
Node node = new Node("parameter",
moduleScanner.getParameters().get(f).name().equals("") ? f.getName() : moduleScanner.getParameters().get(f).name(),
moduleScanner.getParameters().get(f).description());
addChildNodeFromField(toMarshall, f, node);
nodes.add(node);
}
@ -136,14 +162,18 @@ public class ReportMarshaller {
private Collection<Node> getDataPointNodes(Object toMarshall, AnalysisModuleScanner moduleScanner) {
Collection<Node> nodes = new ArrayList<Node>();
for (Field f : moduleScanner.getData().keySet()) {
Node node = new Node(moduleScanner.getData().get(f).name().equals("") ? f.getName() : moduleScanner.getData().get(f).name());
Node node = new Node("data_point",
moduleScanner.getData().get(f).name().equals("") ? f.getName() : moduleScanner.getData().get(f).name(),
moduleScanner.getData().get(f).description());
addChildNodeFromField(toMarshall, f, node);
nodes.add(node);
}
return nodes;
}
/** call the method to finalize the report */
/**
* call the method to finalize the report
*/
public void close() {
Writer out = null;
try {
@ -154,7 +184,7 @@ public class ReportMarshaller {
try {
// add the data to a map
Map map = new HashMap();
map.put("root",root);
map.put("root", root);
temp.process(map, out);
out.flush();
} catch (TemplateException e) {
@ -174,7 +204,7 @@ public class ReportMarshaller {
cfg.setObjectWrapper(new DefaultObjectWrapper());
Template temp = null;
try {
temp = cfg.getTemplate("machineReadable.ftl");
temp = cfg.getTemplate("myTestTemp.ftl"); // TODO: obviously this has to be changed to a factory or something like that
} catch (IOException e) {
e.printStackTrace();
}
@ -183,9 +213,10 @@ public class ReportMarshaller {
/**
* helper method for adding a Node to the specified node, given the field
*
* @param toMarshall the object which contains the specified field
* @param f the field
* @param node the node to add a child node to
* @param f the field
* @param node the node to add a child node to
*/
private static void addChildNodeFromField(Object toMarshall, Field f, Node node) {
f.setAccessible(true);

View File

@ -1,99 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.utils.report;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/**
*
* @author aaron
*
* Class TableFormatter
*
* a helper class for formatting a table of data. Given a collection, it formats it into rows and columns.
* If it's a Map, values are emited as key, value, key, value, so it may make sense to have colums set to 2.
*
* TODO: Needs some cleanup
*
*/
public class TableFormatter {
public Object tableEntries; // data storage
private int columns = 2; // default to two columns of data, overridden in the constructor
public TableFormatter(Collection<Object> tbl, int columns) {
tableEntries = tbl;
this.columns = columns;
}
public TableFormatter(Object tbl) {
tableEntries = tbl;
}
/**
* convert the underlying object to a list of lists, for convenient output
* @return a list of list of objects (a list for each row, then a list of row members)
*/
public List<List<Object>> toRows() {
if (tableEntries instanceof Collection)
return collectionToTable(((Collection)tableEntries));
else if (tableEntries instanceof Map)
return mapToTable(((Map)tableEntries));
else
throw new UnsupportedOperationException("underlying collection must be an instance of type Collection or of type Map");
}
private List<List<Object>> mapToTable(Map collection) {
ArrayList<List<Object>> list = new ArrayList<List<Object>>();
for (Object obj : collection.keySet()) {
List<Object> currentRow = new ArrayList<Object>();
currentRow.add(obj);
Object data = collection.get(obj);
currentRow.add(data.toString());
list.add(currentRow);
}
return list;
}
private List<List<Object>> collectionToTable(Collection collection) {
ArrayList<List<Object>> list = new ArrayList<List<Object>>();
List<Object> currentRow = new ArrayList<Object>();
for (Object obj : collection) {
if (currentRow.size() >= columns) {
list.add(currentRow);
currentRow = new ArrayList<Object>();
}
currentRow.add(obj);
}
list.add(currentRow);
return list;
}
}

View File

@ -1,20 +1,22 @@
package org.broadinstitute.sting.playground.utils.report.utils;
import org.broadinstitute.sting.utils.Utils;
import java.util.*;
/**
*
* @author aaron
*
* Class ComplexDataUtils
*
* This class contains methods and techniques for breaking down complex data in the output system
* @author aaron
* <p/>
* Class ComplexDataUtils
* <p/>
* This class contains methods and techniques for breaking down complex data in the output system
*/
public class ComplexDataUtils {
/**
* convert any string -> object pairing into a string keyed tree
*
* @param obj the object
* @return a mapping of the name to the associated value tree. All non-leaf nodes will be Strings
*/
@ -22,41 +24,50 @@ public class ComplexDataUtils {
Collection<Node> nodes = new ArrayList<Node>();
// the simplest case
if (obj == null) nodes.add(new Node("<null>")); // throw new IllegalStateException("object is null");
if (obj == null)
nodes.add(new Node("<null>", "<null>", "<null>")); // throw new IllegalStateException("object is null");
else if (obj instanceof TableType)
nodes.add(tableToNode((TableType)obj, ((TableType)obj).getName()));
nodes.add(tableToNode((TableType) obj, ((TableType) obj).getName()));
else if (obj instanceof Map) {
for (Object key : ((Map)obj).keySet()) {
Node keyNode = new Node(key.toString());
for (Object key : ((Map) obj).keySet()) {
Node keyNode = new Node("key", key.toString(), "map key");
nodes.add(keyNode);
keyNode.addAllChildren(resolveObjects(((Map)obj).get(key)));
keyNode.addAllChildren(resolveObjects(((Map) obj).get(key)));
}
} else if (obj instanceof Collection)
nodes.addAll(listToNode((Collection)obj, "collection"));
nodes.addAll(listToNode((Collection) obj, "collection"));
else if (obj.getClass().isArray())
nodes.addAll(listToNode(Arrays.asList(obj),"array"));
nodes.addAll(listToNode(Arrays.asList(obj), "array"));
else
nodes.add(new Node(obj.toString()));
nodes.add(new Node(obj.getClass().getSimpleName(), obj.toString(), "value"));
return nodes;
}
/**
* given a TableType object, make it into a tree using maps.
*
* @param table the table type to convert into a map
* @return
*/
private static Node tableToNode(TableType table, String name) {
Node root = new Node(name);
Node root = new Node("table", name, "Table");
Object[] rows = table.getRowKeys();
Object[] cols = table.getColumnKeys();
// add the columns names
Node col_names = new Node("col_names", "col_names", "the column names, ~ seperated", false);
for (Object col : cols)
col_names.addChild(new Node("col", col.toString(), "a column name", false));
root.addChild(col_names);
for (int x = 0; x < table.getRowKeys().length; x++) {
Node row = new Node(rows[x].toString());
Node row = new Node("row", rows[x].toString(), "a row");
root.addChild(row);
for (int y = 0; y < table.getRowKeys().length; y++) {
Node col = new Node(cols[y].toString());
for (int y = 0; y < table.getColumnKeys().length; y++) {
Node col = new Node("column", cols[y].toString(), "a column");
row.addChild(col);
col.addChild(new Node(table.getCell(x,y)));
col.addChild(new Node("cell(" + x + "," + y + ")", table.getCell(x, y), "a cell"));
}
}
return root;
@ -64,6 +75,7 @@ public class ComplexDataUtils {
/**
* given a Collection object, make it into a tree using maps.
*
* @param coll the collection to iterate, and turn into a list
* @return a mapping of String to Object
*/
@ -71,11 +83,10 @@ public class ComplexDataUtils {
Collection<Node> nodes = new ArrayList<Node>();
Iterator<Object> iter = coll.iterator();
for (int x = 0; x < coll.size(); x++) {
Node value = new Node(String.valueOf(x));
value.addChild(new Node(iter.next().toString()));
Node value = new Node("column " + x, String.valueOf(x), "column");
value.addChild(new Node("value " + x, iter.next().toString(), "value"));
nodes.add(value);
}
return nodes;
}
}

View File

@ -4,15 +4,27 @@ import java.util.Collection;
import java.util.LinkedHashSet;
/**
* a node, extracted using the new report output system.
*/
* a node, extracted using the new report output system.
*/
public class Node {
public String value;
public String description; // if one is available
public final String name;
public final String value;
public final String description;
public final boolean display; // is this node an output node, or a node for tracking internal data? true if output node
public Collection<Node> children;
public Node(String value) {
public Node(String name, String value, String description) {
this.value = value;
this.name = name;
this.description = description;
display = true;
}
public Node(String name, String value, String description, boolean display) {
this.value = value;
this.name = name;
this.description = description;
this.display = display;
}
public void addChild(Node child) {
@ -25,20 +37,27 @@ public class Node {
this.children.addAll(children);
}
public Boolean getComplex() { return (children != null && children.size() > 0); }
public Boolean getComplex() {
return (children != null && children.size() > 0);
}
/**
* a convenience method for adding a new sub-node with the specified value
*
* @param value the value of the sub-node
*/
public void createSubNode(String value) {
addChild(new Node(value));
public void createSubNode(String name, String value, String description) {
addChild(new Node(name, value, description));
}
public String getValue() {
return value;
}
public String getName() {
return name;
}
public String getDescription() {
return description;
}
@ -46,4 +65,8 @@ public class Node {
public Collection<Node> getChildren() {
return children;
}
public boolean getDisplay() {
return display;
}
}