Final version of new VariantEval infrastructure.
*** WAY FASTER *** -- 3x performance for multiple sample analysis with 1000 samples -- Analyzing 1MB of the ESP call set (3100 samples) takes 40 secs, compared to several minutes in the previous version -- According to JProfiler all of the runtime is now spent decoding genotypes, which will only get better when we move to BCF2 -- Remove the TableType system, as this was way too complex. No longer possible to embed what were effectively multiple tables in a single Evaluator. You now have to have 1 table per eval -- Replaced it with @Molten, which allows an evaluator to provide a single Map from variable -> value for analysis. IndelLengthHistogram is now a @Molten data type. GenotypeConcordance is also. -- No longer allow Evaluators to use private and protected variables at @DataPoints. You get an error if you do. -- Simplified entire IO system of VE. Refactored into VariantEvalReportWriter. -- Commented out GenotypePhasingEvaluator, as it uses the retired TableType -- Stratifications are all fully typed, so it's easy for GATKReports to format them. -- Removed old VE work around from GATKReportColumn -- General code cleanup throughout -- Updated integration tests
This commit is contained in:
parent
8c0718a7c9
commit
4b45a2c99d
|
|
@ -132,7 +132,6 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
|||
private static final Collection<String> RIGHT_ALIGN_STRINGS = Arrays.asList(
|
||||
"null",
|
||||
"NA",
|
||||
"unknown",
|
||||
String.valueOf(Double.POSITIVE_INFINITY),
|
||||
String.valueOf(Double.NEGATIVE_INFINITY),
|
||||
String.valueOf(Double.NaN));
|
||||
|
|
@ -214,7 +213,7 @@ public class GATKReportColumn extends LinkedHashMap<Object, Object> {
|
|||
public Object put(Object key, Object value) {
|
||||
if (value != null) {
|
||||
String formatted = formatValue(value);
|
||||
if (!formatted.equals("") && !formatted.equals("unknown")) {
|
||||
if (!formatted.equals("")) {
|
||||
updateMaxWidth(formatted);
|
||||
updateFormat(formatted);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,183 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.*;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Class for writing the GATKReport for VariantEval
|
||||
*/
|
||||
public class VariantEvalReportWriter {
|
||||
private final GATKReport report;
|
||||
private final StratificationManager<VariantStratifier, EvaluationContext> stratManager;
|
||||
|
||||
public VariantEvalReportWriter(final StratificationManager<VariantStratifier, EvaluationContext> stratManager,
|
||||
final Collection<VariantStratifier> stratifiers,
|
||||
final Collection<VariantEvaluator> evaluators) {
|
||||
this.stratManager = stratManager;
|
||||
this.report = initializeGATKReport(stratifiers, evaluators);
|
||||
}
|
||||
|
||||
public final void writeReport(final PrintStream out) {
|
||||
for ( int key = 0; key < stratManager.size(); key++ ) {
|
||||
final String stratStateString = stratManager.getStratsAndStatesForKeyString(key);
|
||||
final List<Pair<VariantStratifier, Object>> stratsAndStates = stratManager.getStratsAndStatesForKey(key);
|
||||
final EvaluationContext nec = stratManager.get(key);
|
||||
|
||||
for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) {
|
||||
ve.finalizeEvaluation();
|
||||
final GATKReportTable table = report.getTable(ve.getSimpleName());
|
||||
|
||||
final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
final Map<Field, DataPoint> datamap = scanner.getData();
|
||||
try {
|
||||
if ( scanner.hasMoltenField() ) {
|
||||
final Field field = scanner.getMoltenField();
|
||||
final Object fieldValue = field.get(ve);
|
||||
|
||||
if ( ! (fieldValue instanceof Map) )
|
||||
throw new ReviewedStingException("BUG field " + field.getName() + " must be an instance of Map in " + scanner.getAnalysis().name());
|
||||
final Map<Object, Object> map = (Map<Object, Object>)fieldValue;
|
||||
int counter = 0; // counter is used to ensure printing order is as defined by entrySet
|
||||
for ( Map.Entry<Object, Object> keyValue : map.entrySet() ) {
|
||||
// "%05d" is a terrible hack to ensure sort order
|
||||
final String moltenStratStateString = stratStateString + String.format("%05d", counter++);
|
||||
setStratificationColumns(table, moltenStratStateString, stratsAndStates);
|
||||
table.set(moltenStratStateString, "variable", keyValue.getKey());
|
||||
table.set(moltenStratStateString, "value", keyValue.getValue());
|
||||
}
|
||||
} else {
|
||||
setStratificationColumns(table, stratStateString, stratsAndStates);
|
||||
for ( final Field field : datamap.keySet()) {
|
||||
table.set(stratStateString, field.getName(), field.get(ve));
|
||||
}
|
||||
}
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new ReviewedStingException("BUG: analysis field not public: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* Common utility to configure a GATKReportTable columns
|
||||
*
|
||||
* Sets the column names to the strat names in stratsAndStates for the primary key in table
|
||||
*
|
||||
* @param table
|
||||
* @param primaryKey
|
||||
* @param stratsAndStates
|
||||
*/
|
||||
private final void setStratificationColumns(final GATKReportTable table,
|
||||
final String primaryKey,
|
||||
final List<Pair<VariantStratifier, Object>> stratsAndStates) {
|
||||
for ( final Pair<VariantStratifier, Object> stratAndState : stratsAndStates ) {
|
||||
final VariantStratifier vs = stratAndState.getFirst();
|
||||
final String columnName = vs.getName();
|
||||
final Object strat = stratAndState.getSecond();
|
||||
if ( columnName == null || strat == null )
|
||||
throw new ReviewedStingException("Unexpected null variant stratifier state at " + table + " key = " + primaryKey);
|
||||
table.set(primaryKey, columnName, strat);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the output report
|
||||
*
|
||||
* We have a set of stratifiers and evaluation objects. We need to create tables that look like:
|
||||
*
|
||||
* strat1 strat2 ... stratN eval1.field1 eval1.field2 ... eval1.fieldM
|
||||
*
|
||||
* for each eval.
|
||||
*
|
||||
* Note that this procedure doesn't support the creation of the old TableType system. As the
|
||||
* VariantEvaluators are effectively tables themselves, we require authors to just create new
|
||||
* evaluation modules externally instead of allow them to embed them in other evaluation modules
|
||||
*
|
||||
* @return an initialized report object
|
||||
*/
|
||||
public GATKReport initializeGATKReport(final Collection<VariantStratifier> stratifiers,
|
||||
final Collection<VariantEvaluator> evaluators) {
|
||||
final GATKReport report = new GATKReport();
|
||||
|
||||
for (final VariantEvaluator ve : evaluators) {
|
||||
final String tableName = ve.getSimpleName();
|
||||
final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description();
|
||||
|
||||
report.addTable(tableName, tableDesc, true);
|
||||
|
||||
final GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("entry", false);
|
||||
table.addColumn(tableName, tableName);
|
||||
|
||||
// create a column to hold each startifier state
|
||||
for (final VariantStratifier vs : stratifiers) {
|
||||
final String columnName = vs.getName();
|
||||
table.addColumn(columnName, null, vs.getFormat());
|
||||
}
|
||||
|
||||
final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
final Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
||||
// deal with the molten issue
|
||||
if ( scanner.hasMoltenField() ) {
|
||||
table.addColumn("variable", true, scanner.getMoltenAnnotation().variableFormat());
|
||||
table.addColumn("value", true, scanner.getMoltenAnnotation().valueFormat());
|
||||
} else {
|
||||
for (final Field field : datamap.keySet()) {
|
||||
try {
|
||||
field.setAccessible(true);
|
||||
|
||||
// this is an atomic value, add a column for it
|
||||
final String format = datamap.get(field).format();
|
||||
table.addColumn(field.getName(), true, format);
|
||||
} catch (SecurityException e) {
|
||||
throw new StingException("SecurityException: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
}
|
||||
|
|
@ -205,9 +205,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
private boolean byFilterIsEnabled = false;
|
||||
private boolean perSampleIsEnabled = false;
|
||||
|
||||
// Output report
|
||||
private GATKReport report = null;
|
||||
|
||||
// Public constants
|
||||
private static String ALL_SAMPLE_NAME = "all";
|
||||
|
||||
|
|
@ -293,9 +290,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
// Initialize the evaluation contexts
|
||||
createStratificationStates(stratificationObjects, evaluationClasses);
|
||||
|
||||
// Initialize report table
|
||||
report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationClasses);
|
||||
|
||||
// Load ancestral alignments
|
||||
if (ancestralAlignmentsFile != null) {
|
||||
try {
|
||||
|
|
@ -547,109 +541,8 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
|||
*/
|
||||
public void onTraversalDone(Integer result) {
|
||||
logger.info("Finalizing variant report");
|
||||
|
||||
for ( int key = 0; key < stratManager.size(); key++ ) {
|
||||
final String stratStateString = stratManager.getStratsAndStatesForKeyString(key);
|
||||
final List<Pair<VariantStratifier, Object>> stratsAndStates = stratManager.getStratsAndStatesForKey(key);
|
||||
final EvaluationContext nec = stratManager.get(key);
|
||||
|
||||
for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) {
|
||||
ve.finalizeEvaluation();
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
||||
for ( final Field field : datamap.keySet()) {
|
||||
try {
|
||||
field.setAccessible(true);
|
||||
|
||||
if (field.get(ve) instanceof TableType) {
|
||||
TableType t = (TableType) field.get(ve);
|
||||
|
||||
final String subTableName = ve.getSimpleName() + "." + field.getName();
|
||||
final DataPoint dataPointAnn = datamap.get(field);
|
||||
|
||||
if (! report.hasTable(subTableName))
|
||||
configureNewReportTable(t, subTableName, dataPointAnn);
|
||||
|
||||
final GATKReportTable table = report.getTable(subTableName);
|
||||
|
||||
for (int row = 0; row < t.getRowKeys().length; row++) {
|
||||
final String r = t.getRowKeys()[row].toString();
|
||||
final String newStratStateString = stratStateString + r;
|
||||
|
||||
setTableColumnNames(table, newStratStateString, stratsAndStates);
|
||||
|
||||
for (int col = 0; col < t.getColumnKeys().length; col++) {
|
||||
final String c = t.getColumnKeys()[col].toString();
|
||||
table.set(newStratStateString, c, t.getCell(row, col));
|
||||
table.set(newStratStateString, t.getRowName(), r);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
final GATKReportTable table = report.getTable(ve.getSimpleName());
|
||||
setTableColumnNames(table, stratStateString, stratsAndStates);
|
||||
table.set(stratStateString, field.getName(), field.get(ve));
|
||||
}
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("IllegalAccessException: " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
|
||||
/**
|
||||
* A common utility function to set up the GATKReportTable for an embedded TableType in
|
||||
* a VariantEvaluation
|
||||
*
|
||||
* @param t
|
||||
* @param subTableName
|
||||
* @param dataPointAnn
|
||||
*/
|
||||
@Requires({"t != null", "subTableName != null", "dataPointAnn != null", "!report.hasTable(subTableName)"})
|
||||
@Ensures({"report.hasTable(subTableName)"})
|
||||
private final void configureNewReportTable(final TableType t, final String subTableName, final DataPoint dataPointAnn) {
|
||||
// basic table configuration. Set up primary key, dummy column names
|
||||
report.addTable(subTableName, dataPointAnn.description());
|
||||
final GATKReportTable table = report.getTable(subTableName);
|
||||
|
||||
table.addPrimaryKey("entry", false);
|
||||
table.addColumn(subTableName, subTableName);
|
||||
|
||||
for ( final VariantStratifier vs : stratManager.getStratifiers() ) {
|
||||
table.addColumn(vs.getName(), "unknown");
|
||||
}
|
||||
|
||||
table.addColumn(t.getRowName(), "unknown");
|
||||
|
||||
for ( final Object o : t.getColumnKeys() ) {
|
||||
table.addColumn(o.toString(), 0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Common utility to configure a GATKReportTable columns
|
||||
*
|
||||
* Sets the column names to the strat names in stratsAndStates for the primary key in table
|
||||
*
|
||||
* @param table
|
||||
* @param primaryKey
|
||||
* @param stratsAndStates
|
||||
*/
|
||||
private final void setTableColumnNames(final GATKReportTable table,
|
||||
final String primaryKey,
|
||||
final List<Pair<VariantStratifier, Object>> stratsAndStates) {
|
||||
for ( final Pair<VariantStratifier, Object> stratAndState : stratsAndStates ) {
|
||||
final VariantStratifier vs = stratAndState.getFirst();
|
||||
final String columnName = vs.getName();
|
||||
final Object strat = stratAndState.getSecond();
|
||||
if ( columnName == null || strat == null )
|
||||
throw new ReviewedStingException("Unexpected null variant stratifier state at " + table + " key = " + primaryKey);
|
||||
table.set(primaryKey, columnName, strat);
|
||||
}
|
||||
final VariantEvalReportWriter writer = new VariantEvalReportWriter(stratManager, stratManager.getStratifiers(), stratManager.get(0).getVariantEvaluators());
|
||||
writer.writeReport(out);
|
||||
}
|
||||
|
||||
// Accessors
|
||||
|
|
|
|||
|
|
@ -20,22 +20,22 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
@Analysis(description = "The overlap between eval and comp sites")
|
||||
public class CompOverlap extends VariantEvaluator implements StandardEval {
|
||||
@DataPoint(description = "number of eval variant sites", format = "%d")
|
||||
long nEvalVariants = 0;
|
||||
public long nEvalVariants = 0;
|
||||
|
||||
@DataPoint(description = "number of eval sites outside of comp sites", format = "%d")
|
||||
long novelSites = 0;
|
||||
public long novelSites = 0;
|
||||
|
||||
@DataPoint(description = "number of eval sites at comp sites", format = "%d")
|
||||
long nVariantsAtComp = 0;
|
||||
public long nVariantsAtComp = 0;
|
||||
|
||||
@DataPoint(description = "percentage of eval sites at comp sites", format = "%.2f" )
|
||||
double compRate = 0.0;
|
||||
public double compRate = 0.0;
|
||||
|
||||
@DataPoint(description = "number of concordant sites", format = "%d")
|
||||
long nConcordant = 0;
|
||||
public long nConcordant = 0;
|
||||
|
||||
@DataPoint(description = "the concordance rate", format = "%.2f")
|
||||
double concordantRate = 0.0;
|
||||
public double concordantRate = 0.0;
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we need to see each eval track and each comp track
|
||||
|
|
@ -51,10 +51,6 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
novelSites = nNovelSites();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if every allele in eval is also in comp
|
||||
*
|
||||
|
|
@ -71,7 +67,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
return false;
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean evalIsGood = eval != null && eval.isPolymorphicInSamples();
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered();
|
||||
|
||||
|
|
@ -84,7 +80,5 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
|||
nConcordant++;
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
@Analysis(description = "Counts different classes of variants in the sample")
|
||||
public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||
|
||||
// the following fields are in output order:
|
||||
|
||||
// basic counts on various rates found
|
||||
|
|
@ -81,15 +80,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
return inverseRate(n, nProcessedLoci);
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
nCalledLoci++;
|
||||
|
||||
// Note from Eric:
|
||||
|
|
@ -135,12 +131,9 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
}
|
||||
|
||||
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
||||
|
||||
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
|
||||
// if (aaStr.equals(".")) {
|
||||
// aaStr = refStr;
|
||||
// }
|
||||
// these operations are ordered to ensure that we don't get the base string of the ref unless we need it
|
||||
final String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
|
||||
final String refStr = aaStr != null ? vc1.getReference().getBaseString().toUpperCase() : null;
|
||||
|
||||
// ref aa alt class
|
||||
// A C A der homozygote
|
||||
|
|
@ -183,8 +176,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
|||
throw new ReviewedStingException("BUG: Unexpected genotype type: " + g);
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
|
|
|
|||
|
|
@ -5,12 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -41,271 +37,67 @@ import java.util.*;
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks")
|
||||
/**
|
||||
* a table of sample names to genotype concordance figures
|
||||
*/
|
||||
@Analysis(name = "Genotype Concordance Detailed", description = "Determine the genotype concordance between the genotypes in difference tracks, and concordance statistics")
|
||||
public class GenotypeConcordance extends VariantEvaluator {
|
||||
private static final boolean PRINT_INTERESTING_SITES = true;
|
||||
|
||||
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
|
||||
|
||||
// a mapping from sample to stats
|
||||
@DataPoint(description = "the detailed concordance statistics for each sample")
|
||||
SampleStats detailedStats = null;
|
||||
@Molten(variableFormat = "%s", valueFormat = "%s")
|
||||
public final Map<Object, Object> map = new TreeMap<Object, Object>();
|
||||
|
||||
// a mapping from sample to stats summary
|
||||
@DataPoint(description = "the simplified concordance statistics for each sample")
|
||||
SampleSummaryStats simplifiedStats = null;
|
||||
// concordance counts
|
||||
private final long[][] truthByCalledGenotypeCounts;
|
||||
|
||||
private static final int MAX_MISSED_VALIDATION_DATA = 100;
|
||||
|
||||
private boolean discordantInteresting = false;
|
||||
|
||||
static class FrequencyStats extends TableType {
|
||||
class Stats {
|
||||
public Stats(int found, int missed) { nFound = found; nMissed = missed; }
|
||||
public long nFound = 0;
|
||||
public long nMissed = 0;
|
||||
}
|
||||
public HashMap<Integer, Stats> foundMissedByAC = new HashMap<Integer, Stats>();
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
String rows[] = new String[foundMissedByAC.size()];
|
||||
int index = 0;
|
||||
for (int i : foundMissedByAC.keySet()) rows[index++] = "AlleleCount_" + i;
|
||||
return rows;
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
return new String[]{"number_found", "number_missing"};
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "FrequencyStats";
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
if (x >= foundMissedByAC.size()) throw new IllegalStateException(x + " is greater than the max index of " + (foundMissedByAC.size()-1));
|
||||
if (y == 0) return String.valueOf(foundMissedByAC.get(foundMissedByAC.keySet().toArray(new Integer[foundMissedByAC.size()])[x]).nFound);
|
||||
else return String.valueOf(foundMissedByAC.get(foundMissedByAC.keySet().toArray(new Integer[foundMissedByAC.size()])[x]).nMissed);
|
||||
}
|
||||
|
||||
public void incrementFoundCount(int alleleFreq) {
|
||||
if (!foundMissedByAC.containsKey(alleleFreq))
|
||||
foundMissedByAC.put(alleleFreq,new Stats(1,0));
|
||||
else
|
||||
foundMissedByAC.get(alleleFreq).nFound++;
|
||||
}
|
||||
|
||||
public void incrementMissedCount(int alleleFreq) {
|
||||
if (!foundMissedByAC.containsKey(alleleFreq))
|
||||
foundMissedByAC.put(alleleFreq,new Stats(0,1));
|
||||
else
|
||||
foundMissedByAC.get(alleleFreq).nMissed++;
|
||||
}
|
||||
}
|
||||
|
||||
static class QualityScoreHistograms extends TableType {
|
||||
final static int NUM_BINS = 20;
|
||||
final HashMap<Integer,Integer> truePositiveQualityScoreMap = new HashMap<Integer,Integer>(); // A HashMap holds all the quality scores until we are able to bin them appropriately
|
||||
final HashMap<Integer,Integer> falsePositiveQualityScoreMap = new HashMap<Integer,Integer>();
|
||||
final int truePositiveHist[] = new int[NUM_BINS]; // the final histograms that get reported out
|
||||
final int falsePositiveHist[] = new int[NUM_BINS];
|
||||
final String[] rowKeys = new String[]{"true_positive_hist", "false_positive_hist"};
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
return rowKeys;
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
final String columnKeys[] = new String[NUM_BINS];
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
columnKeys[iii] = "histBin" + iii;
|
||||
}
|
||||
return columnKeys;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "QualityScoreHistogram";
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
if( x == 0 ) {
|
||||
return String.valueOf(truePositiveHist[y]);
|
||||
} else if ( x == 1 ) {
|
||||
return String.valueOf(falsePositiveHist[y]);
|
||||
} else {
|
||||
throw new ReviewedStingException( "Unknown row in " + getName() + ", row = " + x );
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String returnString = "";
|
||||
// output both histogram arrays
|
||||
returnString += "TP: ";
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
returnString += truePositiveHist[iii] + " ";
|
||||
}
|
||||
returnString += "\nFP: ";
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
returnString += falsePositiveHist[iii] + " ";
|
||||
}
|
||||
return returnString;
|
||||
}
|
||||
|
||||
public void incrValue( final double qual, final boolean isTruePositiveCall ) {
|
||||
HashMap<Integer,Integer> qualScoreMap;
|
||||
if( isTruePositiveCall ) {
|
||||
qualScoreMap = truePositiveQualityScoreMap;
|
||||
} else {
|
||||
qualScoreMap = falsePositiveQualityScoreMap;
|
||||
}
|
||||
final Integer qualKey = Math.round((float) qual);
|
||||
if( qualScoreMap.containsKey(qualKey) ) {
|
||||
qualScoreMap.put(qualKey, qualScoreMap.get(qualKey) + 1);
|
||||
} else {
|
||||
qualScoreMap.put(qualKey, 1);
|
||||
}
|
||||
}
|
||||
|
||||
public void organizeHistogramTables() {
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
truePositiveHist[iii] = 0;
|
||||
falsePositiveHist[iii] = 0;
|
||||
}
|
||||
|
||||
int maxQual = 0;
|
||||
|
||||
// Calculate the maximum quality score for both TP and FP calls in order to normalize and histogram
|
||||
for( final Integer qual : truePositiveQualityScoreMap.keySet()) {
|
||||
if( qual > maxQual ) {
|
||||
maxQual = qual;
|
||||
}
|
||||
}
|
||||
for( final Integer qual : falsePositiveQualityScoreMap.keySet()) {
|
||||
if( qual > maxQual ) {
|
||||
maxQual = qual;
|
||||
}
|
||||
}
|
||||
|
||||
final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); //BUGBUG: should be normalized max to min, not max to 0
|
||||
|
||||
for( final Integer qual : truePositiveQualityScoreMap.keySet()) {
|
||||
final int index = (int)Math.floor( ((double)qual) / binSize );
|
||||
if(index >= 0) { //BUGBUG: problem when maxQual is zero?
|
||||
truePositiveHist[ index ] += truePositiveQualityScoreMap.get(qual);
|
||||
}
|
||||
}
|
||||
for( final Integer qual : falsePositiveQualityScoreMap.keySet()) {
|
||||
final int index = (int)Math.floor( ((double)qual) / binSize );
|
||||
if(index >= 0) {
|
||||
falsePositiveHist[ index ] += falsePositiveQualityScoreMap.get(qual);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// keep a list of the validation data we saw before the first eval data
|
||||
private HashSet<VariantContext> missedValidationData = new HashSet<VariantContext>();
|
||||
|
||||
|
||||
//public GenotypeConcordance(VariantEvalWalker parent) {
|
||||
// super(parent);
|
||||
// discordantInteresting = parent.DISCORDANT_INTERESTING;
|
||||
//}
|
||||
|
||||
public String getName() {
|
||||
return "genotypeConcordance";
|
||||
/**
|
||||
* Initialize this object
|
||||
*/
|
||||
public GenotypeConcordance() {
|
||||
final int nGenotypeTypes = Genotype.Type.values().length;
|
||||
truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we need to see each eval track and each comp track
|
||||
return 2;
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getName() + ": <table>";
|
||||
}
|
||||
|
||||
private boolean warnedAboutValidationData = false;
|
||||
|
||||
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
String interesting = null;
|
||||
|
||||
@Override
|
||||
public void update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// sanity check that we at least have either eval or validation data
|
||||
if ( (validation != null && !validation.hasGenotypes()) || eval == null && !isValidVC(validation)) {
|
||||
return interesting;
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
final boolean validationIsValidVC = isValidVC(validation);
|
||||
|
||||
if (detailedStats == null) {
|
||||
// determine concordance for eval data
|
||||
if (eval != null) {
|
||||
// initialize the concordance table
|
||||
detailedStats = new SampleStats(eval,Genotype.Type.values().length);
|
||||
simplifiedStats = new SampleSummaryStats(eval);
|
||||
for (final VariantContext vc : missedValidationData) {
|
||||
determineStats(null, vc);
|
||||
}
|
||||
missedValidationData = null;
|
||||
} else {
|
||||
// todo -- Eric, this results in a memory problem when eval is WEx data but you are using CG calls genome-wide
|
||||
// todo -- perhaps you need should extend the evaluators with an initialize
|
||||
// todo -- method that gets the header (or samples) for the first eval sites?
|
||||
if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
|
||||
if (!warnedAboutValidationData) {
|
||||
//logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
|
||||
warnedAboutValidationData = true;
|
||||
for (final Genotype g : eval.getGenotypes() ) {
|
||||
final String sample = g.getSampleName();
|
||||
final Genotype.Type called = g.getType();
|
||||
final Genotype.Type truth;
|
||||
|
||||
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||
truth = Genotype.Type.NO_CALL;
|
||||
} else {
|
||||
truth = validation.getGenotype(sample).getType();
|
||||
}
|
||||
} else {
|
||||
missedValidationData.add(validation);
|
||||
|
||||
incrValue(truth, called);
|
||||
}
|
||||
return interesting;
|
||||
}
|
||||
}
|
||||
|
||||
interesting = determineStats(eval, validation);
|
||||
// otherwise, mark no-calls for all samples
|
||||
else {
|
||||
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||
|
||||
return interesting; // we don't capture any interesting sites
|
||||
}
|
||||
for (final Genotype g : validation.getGenotypes()) {
|
||||
final Genotype.Type truth = g.getType();
|
||||
incrValue(truth, called);
|
||||
|
||||
private String determineStats(final VariantContext eval, final VariantContext validation) {
|
||||
String interesting = null;
|
||||
|
||||
final boolean validationIsValidVC = isValidVC(validation);
|
||||
final String evalAC = ( vcHasGoodAC(eval) ) ? String.format("evalAC%d",getAC(eval)) : null ;
|
||||
final String validationAC = ( vcHasGoodAC(validation) ) ? String.format("compAC%d",getAC(validation)) : null;
|
||||
|
||||
// determine concordance for eval data
|
||||
if (eval != null) {
|
||||
for (final Genotype g : eval.getGenotypes() ) {
|
||||
final String sample = g.getSampleName();
|
||||
final Genotype.Type called = g.getType();
|
||||
final Genotype.Type truth;
|
||||
|
||||
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||
truth = Genotype.Type.NO_CALL;
|
||||
} else {
|
||||
truth = validation.getGenotype(sample).getType();
|
||||
// interesting = "ConcordanceStatus=FP";
|
||||
if (discordantInteresting && truth.ordinal() != called.ordinal())
|
||||
{
|
||||
interesting = "ConcordanceStatus=" + truth + "/" + called;
|
||||
}
|
||||
}
|
||||
|
||||
detailedStats.incrValue(sample, truth, called);
|
||||
}
|
||||
}
|
||||
// otherwise, mark no-calls for all samples
|
||||
else {
|
||||
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||
|
||||
for (final Genotype g : validation.getGenotypes()) {
|
||||
final Genotype.Type truth = g.getType();
|
||||
detailedStats.incrValue(g.getSampleName(), truth, called);
|
||||
|
||||
// print out interesting sites
|
||||
/*
|
||||
// print out interesting sites
|
||||
/*
|
||||
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
|
||||
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
|
||||
super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation);
|
||||
|
|
@ -315,292 +107,120 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return interesting;
|
||||
}
|
||||
|
||||
private static boolean isValidVC(final VariantContext vc) {
|
||||
return (vc != null && !vc.isFiltered());
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
if( simplifiedStats != null && detailedStats != null ) {
|
||||
simplifiedStats.generateSampleSummaryStats(detailedStats);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean vcHasGoodAC(VariantContext vc) {
|
||||
return ( vc != null && vc.getAlternateAlleles().size() == 1 && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) );
|
||||
|
||||
}
|
||||
|
||||
private int getAC(VariantContext vc) {
|
||||
if ( List.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) {
|
||||
return ((List<Integer>) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY)).get(0);
|
||||
} else if ( Integer.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass())) {
|
||||
return (Integer) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY);
|
||||
} else if ( String.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) {
|
||||
// two ways of parsing
|
||||
String ac = (String) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY);
|
||||
if ( ac.startsWith("[") ) {
|
||||
return Integer.parseInt(ac.replaceAll("\\[","").replaceAll("\\]",""));
|
||||
} else {
|
||||
try {
|
||||
return Integer.parseInt(ac);
|
||||
} catch ( NumberFormatException e ) {
|
||||
throw new UserException(String.format("The format of the AC field is improperly formatted: AC=%s",ac));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new UserException(String.format("The format of the AC field does not appear to be of integer-list or String format, class was %s",vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a table of sample names to genotype concordance figures
|
||||
*/
|
||||
class SampleStats extends TableType {
|
||||
private final int nGenotypeTypes;
|
||||
|
||||
// sample to concordance stats object
|
||||
public final HashMap<String, long[][]> concordanceStats = new HashMap<String, long[][]>();
|
||||
|
||||
/**
|
||||
*
|
||||
* @return one row per sample
|
||||
*/
|
||||
public Object[] getRowKeys() {
|
||||
return concordanceStats.keySet().toArray(new String[concordanceStats.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* increment the specified value
|
||||
* @param sample the sample name
|
||||
* @param truth the truth type
|
||||
* @param called the called type
|
||||
*/
|
||||
public void incrValue(String sample, Genotype.Type truth, Genotype.Type called) {
|
||||
if ( concordanceStats.containsKey(sample) )
|
||||
concordanceStats.get(sample)[truth.ordinal()][called.ordinal()]++;
|
||||
else if ( called != Genotype.Type.NO_CALL )
|
||||
throw new UserException.CommandLineException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context");
|
||||
private void incrValue(final Genotype.Type truth, final Genotype.Type called) {
|
||||
truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the column keys
|
||||
* @return a list of objects, in this case strings, that are the column names
|
||||
*/
|
||||
public Object[] getColumnKeys() {
|
||||
// return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call",
|
||||
// "n_ref/ref","n_ref/het","n_ref/hom",
|
||||
// "total_true_het","%_het/het","n_het/no-call",
|
||||
// "n_het/ref","n_het/het","n_het/hom",
|
||||
// "total_true_hom","%_hom/hom","n_hom/no-call",
|
||||
// "n_hom/ref","n_hom/het","n_hom/hom"};
|
||||
return new String[]{"total_true_ref","pct_ref_vs_ref","n_ref_vs_no_call",
|
||||
"n_ref_vs_ref","n_ref_vs_het","n_ref_vs_hom",
|
||||
"total_true_het","pct_het_vs_het","n_het_vs_no_call",
|
||||
"n_het_vs_ref","n_het_vs_het","n_het_vs_hom",
|
||||
"total_true_hom","pct_hom_vs_hom","n_hom_vs_no_call",
|
||||
"n_hom_vs_ref","n_hom_vs_het","n_hom_vs_hom"};
|
||||
private long count(final Genotype.Type truth, final Genotype.Type called) {
|
||||
return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()];
|
||||
}
|
||||
|
||||
|
||||
public SampleStats(VariantContext vc, int nGenotypeTypes) {
|
||||
this.nGenotypeTypes = nGenotypeTypes;
|
||||
for (final Genotype g : vc.getGenotypes())
|
||||
concordanceStats.put(g.getSampleName(), new long[nGenotypeTypes][nGenotypeTypes]);
|
||||
private long count(final EnumSet<Genotype.Type> truth, final Genotype.Type called) {
|
||||
return count(truth, EnumSet.of(called));
|
||||
}
|
||||
|
||||
public SampleStats(int genotypeTypes) {
|
||||
nGenotypeTypes = genotypeTypes;
|
||||
private long count(final Genotype.Type truth, final EnumSet<Genotype.Type> called) {
|
||||
return count(EnumSet.of(truth), called);
|
||||
}
|
||||
|
||||
public Object getCell(int x, int y) {
|
||||
// we have three rows of 6 right now for output (rows: ref, het, hom)
|
||||
Genotype.Type type = Genotype.Type.values()[(y/6)+1]; // get the row type
|
||||
// save some repeat work, get the total every time
|
||||
long total = 0;
|
||||
Object[] rowKeys = getRowKeys();
|
||||
for (int called = 0; called < nGenotypeTypes; called++) {
|
||||
total += concordanceStats.get(rowKeys[x])[type.ordinal()][called];
|
||||
}
|
||||
|
||||
// now get the cell they're interested in
|
||||
switch (y % 6) {
|
||||
case (0): // get the total_true for this type
|
||||
return total;
|
||||
case (1):
|
||||
return total == 0 ? 0.0 : (100.0 * (double) concordanceStats.get(rowKeys[x])[type.ordinal()][type.ordinal()] / (double) total);
|
||||
default:
|
||||
return concordanceStats.get(rowKeys[x])[type.ordinal()][(y % 6) - 2];
|
||||
private long count(final EnumSet<Genotype.Type> truth, final EnumSet<Genotype.Type> called) {
|
||||
long sum = 0;
|
||||
for ( final Genotype.Type truth1 : truth ) {
|
||||
for ( final Genotype.Type called1 : called ) {
|
||||
sum += count(truth1, called1);
|
||||
}
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Sample Statistics";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a table of sample names to genotype concordance summary statistics
|
||||
*/
|
||||
class SampleSummaryStats extends TableType {
|
||||
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||
protected final static String[] COLUMN_KEYS = new String[]{
|
||||
"percent_comp_ref_called_ref",
|
||||
"percent_comp_het_called_het",
|
||||
"percent_comp_hom_called_hom",
|
||||
"percent_non_reference_sensitivity",
|
||||
"percent_overall_genotype_concordance",
|
||||
"percent_non_reference_discrepancy_rate"};
|
||||
|
||||
// sample to concordance stats object
|
||||
protected final HashMap<String, double[]> concordanceSummary = new HashMap<String, double[]>();
|
||||
|
||||
/**
|
||||
*
|
||||
* @return one row per sample
|
||||
*/
|
||||
public Object[] getRowKeys() {
|
||||
return concordanceSummary.keySet().toArray(new String[concordanceSummary.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the column keys
|
||||
* @return a list of objects, in this case strings, that are the column names
|
||||
*/
|
||||
public Object[] getColumnKeys() {
|
||||
return COLUMN_KEYS;
|
||||
}
|
||||
|
||||
public SampleSummaryStats(final VariantContext vc) {
|
||||
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
|
||||
for( final Genotype g : vc.getGenotypes() ) {
|
||||
concordanceSummary.put(g.getSampleName(), new double[COLUMN_KEYS.length]);
|
||||
}
|
||||
}
|
||||
|
||||
public SampleSummaryStats() {
|
||||
|
||||
}
|
||||
|
||||
public Object getCell(int x, int y) {
|
||||
final Object[] rowKeys = getRowKeys();
|
||||
return String.format("%.2f",concordanceSummary.get(rowKeys[x])[y]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper routine that sums up all columns / rows found in stats specified by all pairs in d1 x d2
|
||||
*
|
||||
* @param stats
|
||||
* @param d1
|
||||
* @param d2
|
||||
* @return
|
||||
*/
|
||||
private long sumStatsAllPairs( final long[][] stats, EnumSet<Genotype.Type> d1, EnumSet<Genotype.Type> d2 ) {
|
||||
long sum = 0L;
|
||||
private long countDiag( final EnumSet<Genotype.Type> d1 ) {
|
||||
long sum = 0;
|
||||
|
||||
for(final Genotype.Type e1 : d1 ) {
|
||||
for(final Genotype.Type e2 : d2 ) {
|
||||
sum += stats[e1.ordinal()][e2.ordinal()];
|
||||
sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()];
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
final EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
|
||||
final EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
|
||||
final EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
|
||||
|
||||
// exact values of the table
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
final String field = String.format("n_true_%s_called_%s", truth, called);
|
||||
final Long value = count(truth, called);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
private long sumStatsDiag( final long[][] stats, EnumSet<Genotype.Type> d1) {
|
||||
long sum = 0L;
|
||||
|
||||
for(final Genotype.Type e1 : d1 ) {
|
||||
sum += stats[e1.ordinal()][e1.ordinal()];
|
||||
// counts of called genotypes
|
||||
for ( final Genotype.Type called : Genotype.Type.values() ) {
|
||||
final String field = String.format("total_called_%s", called);
|
||||
final Long value = count(allGenotypes, called);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
// counts of true genotypes
|
||||
for ( final Genotype.Type truth : Genotype.Type.values() ) {
|
||||
final String field = String.format("total_true_%s", truth);
|
||||
final Long value = count(truth, allGenotypes);
|
||||
map.put(field, value.toString());
|
||||
}
|
||||
|
||||
private double ratio(long numer, long denom) {
|
||||
return denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0;
|
||||
}
|
||||
for ( final Genotype.Type genotype : Genotype.Type.values() ) {
|
||||
final String field = String.format("percent_%s_called_%s", genotype, genotype);
|
||||
long numer = count(genotype, genotype);
|
||||
long denom = count(EnumSet.of(genotype), allGenotypes);
|
||||
map.put(field, Utils.formattedPercent(numer, denom));
|
||||
}
|
||||
|
||||
final long[] allSamplesNumerators = new long[COLUMN_KEYS.length];
|
||||
final long[] allSamplesDenominators = new long[COLUMN_KEYS.length];
|
||||
|
||||
private void updateSummaries(int i, double[] summary, long numer, long denom ) {
|
||||
allSamplesNumerators[i] += numer;
|
||||
allSamplesDenominators[i] += denom;
|
||||
summary[i] = ratio(numer, denom);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate the five summary stats per sample
|
||||
* @param sampleStats The Map which holds concordance values per sample
|
||||
*/
|
||||
public void generateSampleSummaryStats( final SampleStats sampleStats ) {
|
||||
EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
|
||||
EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
|
||||
EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
|
||||
|
||||
for( final String sample : concordanceSummary.keySet() ) {
|
||||
if ( sample.equals(ALL_SAMPLES_KEY) ) continue;
|
||||
|
||||
final long[][] stats = sampleStats.concordanceStats.get(sample);
|
||||
final double[] summary = concordanceSummary.get(sample);
|
||||
if( stats == null ) { throw new ReviewedStingException( "SampleStats and SampleSummaryStats contain different samples! sample = " + sample ); }
|
||||
|
||||
long numer, denom;
|
||||
|
||||
// Summary 0: % ref called as ref
|
||||
numer = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_REF), allGenotypes);
|
||||
updateSummaries(0, summary, numer, denom);
|
||||
|
||||
// Summary 1: % het called as het
|
||||
numer = stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()];
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allGenotypes);
|
||||
updateSummaries(1, summary, numer, denom);
|
||||
|
||||
// Summary 2: % homVar called as homVar
|
||||
numer = stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allGenotypes);
|
||||
updateSummaries(2, summary, numer, denom);
|
||||
|
||||
// Summary 3: % non-ref called as non-ref
|
||||
{
|
||||
// % non-ref called as non-ref
|
||||
// MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
|
||||
numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes);
|
||||
denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes);
|
||||
updateSummaries(3, summary, numer, denom);
|
||||
final String field = "percent_non_reference_sensitivity";
|
||||
long numer = count(allVariantGenotypes, allVariantGenotypes);
|
||||
long denom = count(allVariantGenotypes, allGenotypes);
|
||||
map.put(field, Utils.formattedPercent(numer, denom));
|
||||
}
|
||||
|
||||
// Summary 4: overall genotype concordance of sites called in eval track
|
||||
{
|
||||
// overall genotype concordance of sites called in eval track
|
||||
// MAD: this is the tradition genotype concordance
|
||||
numer = sumStatsDiag(stats, allCalledGenotypes);
|
||||
denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes);
|
||||
updateSummaries(4, summary, numer, denom);
|
||||
|
||||
// Summary 5: overall genotype concordance of sites called non-ref in eval track
|
||||
long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||
long diag = sumStatsDiag(stats, allVariantGenotypes);
|
||||
long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
numer = allNoHomRef - diag;
|
||||
denom = allNoHomRef;
|
||||
updateSummaries(5, summary, numer, denom);
|
||||
final String field = "percent_overall_genotype_concordance";
|
||||
long numer = countDiag(allCalledGenotypes);
|
||||
long denom = count(allCalledGenotypes, allCalledGenotypes);
|
||||
map.put(field, Utils.formattedPercent(numer, denom));
|
||||
}
|
||||
|
||||
// update the final summary stats
|
||||
final double[] allSamplesSummary = concordanceSummary.get(ALL_SAMPLES_KEY);
|
||||
for ( int i = 0; i < allSamplesSummary.length; i++) {
|
||||
allSamplesSummary[i] = ratio(allSamplesNumerators[i], allSamplesDenominators[i]);
|
||||
{
|
||||
// overall genotype concordance of sites called non-ref in eval track
|
||||
// MAD: this is the non-reference discrepancy rate
|
||||
final String field = "percent_non_reference_discrepancy_rate";
|
||||
long homrefConcords = count(Genotype.Type.HOM_REF, Genotype.Type.HOM_REF);
|
||||
long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
long numer = allNoHomRef - countDiag(allVariantGenotypes);
|
||||
long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
map.put(field, Utils.formattedPercent(numer, denom));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Sample Summary Statistics";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,426 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair;
|
||||
import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks")
|
||||
public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||
protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class);
|
||||
|
||||
// a mapping from sample to stats
|
||||
@DataPoint(description = "the phasing statistics for each sample")
|
||||
SamplePhasingStatistics samplePhasingStatistics = null;
|
||||
|
||||
SamplePreviousGenotypes samplePrevGenotypes = null;
|
||||
|
||||
double minPhaseQuality = 10.0;
|
||||
|
||||
public void initialize(VariantEvalWalker walker) {
|
||||
super.initialize(walker);
|
||||
this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
|
||||
this.samplePrevGenotypes = new SamplePreviousGenotypes();
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "GenotypePhasingEvaluator";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we only need to see pairs of (comp, eval)
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getName() + ": <table>";
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return update2(eval,comp,tracker,ref,context,null);
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, EvaluationContext group) {
|
||||
//public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||
Reasons interesting = new Reasons();
|
||||
if (ref == null)
|
||||
return interesting.toString();
|
||||
GenomeLoc curLocus = ref.getLocus();
|
||||
|
||||
logger.debug("update2() locus: " + curLocus);
|
||||
logger.debug("comp = " + comp + " eval = " + eval);
|
||||
|
||||
Set<String> allSamples = new HashSet<String>();
|
||||
|
||||
GenotypesContext compSampGenotypes = null;
|
||||
if (isRelevantToPhasing(comp)) {
|
||||
allSamples.addAll(comp.getSampleNames());
|
||||
compSampGenotypes = comp.getGenotypes();
|
||||
}
|
||||
|
||||
GenotypesContext evalSampGenotypes = null;
|
||||
if (isRelevantToPhasing(eval)) {
|
||||
allSamples.addAll(eval.getSampleNames());
|
||||
evalSampGenotypes = eval.getGenotypes();
|
||||
}
|
||||
|
||||
for (String samp : allSamples) {
|
||||
logger.debug("sample = " + samp);
|
||||
|
||||
Genotype compSampGt = null;
|
||||
if (compSampGenotypes != null)
|
||||
compSampGt = compSampGenotypes.get(samp);
|
||||
|
||||
Genotype evalSampGt = null;
|
||||
if (evalSampGenotypes != null)
|
||||
evalSampGt = evalSampGenotypes.get(samp);
|
||||
|
||||
if (compSampGt == null || evalSampGt == null || compSampGt.isNoCall() || evalSampGt.isNoCall()) { // Since either comp or eval (or both) are missing the site, the best we can do is hope to preserve phase [if the non-missing one preserves phase]
|
||||
// Having an unphased site breaks the phasing for the sample [does NOT permit "transitive phasing"] - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]:
|
||||
if (isNonNullButUnphased(compSampGt) || isNonNullButUnphased(evalSampGt))
|
||||
samplePrevGenotypes.put(samp, null);
|
||||
}
|
||||
else { // Both comp and eval have a non-null Genotype at this site:
|
||||
AllelePair compAllelePair = new AllelePair(compSampGt);
|
||||
AllelePair evalAllelePair = new AllelePair(evalSampGt);
|
||||
|
||||
boolean breakPhasing = false;
|
||||
if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
|
||||
breakPhasing = true; // since they are not both het or both hom
|
||||
else { // both are het, or both are hom:
|
||||
boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair));
|
||||
boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair));
|
||||
if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
|
||||
breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample
|
||||
}
|
||||
|
||||
if (breakPhasing) {
|
||||
samplePrevGenotypes.put(samp, null); // nothing to do for this site, AND must remove any history for the future
|
||||
}
|
||||
else if (compSampGt.isHet() && evalSampGt.isHet()) {
|
||||
/* comp and eval have the HET same Genotype at this site:
|
||||
[Note that if both are hom, then nothing is done here, but the het history IS preserved].
|
||||
*/
|
||||
CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
|
||||
if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
|
||||
prevCompAndEval = null;
|
||||
|
||||
// Replace the previous hets with the current hets:
|
||||
samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
|
||||
|
||||
if (prevCompAndEval != null) {
|
||||
GenomeLoc prevLocus = prevCompAndEval.getLocus();
|
||||
logger.debug("Potentially phaseable het locus: " + curLocus + " [relative to previous het locus: " + prevLocus + "]");
|
||||
PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
|
||||
|
||||
boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
|
||||
boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt);
|
||||
if (compSampIsPhased || evalSampIsPhased) {
|
||||
if (!evalSampIsPhased) {
|
||||
ps.onlyCompPhased++;
|
||||
//interesting.addReason("ONLY_COMP", samp, group, prevLocus, "");
|
||||
}
|
||||
else if (!compSampIsPhased) {
|
||||
ps.onlyEvalPhased++;
|
||||
//interesting.addReason("ONLY_EVAL", samp, group, prevLocus, "");
|
||||
}
|
||||
else { // both comp and eval are phased:
|
||||
AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye());
|
||||
AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype());
|
||||
|
||||
// Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample:
|
||||
boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair));
|
||||
boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair));
|
||||
|
||||
if (topsMatch || topMatchesBottom) {
|
||||
ps.phasesAgree++;
|
||||
|
||||
Double compPQ = getPQ(compSampGt);
|
||||
Double evalPQ = getPQ(evalSampGt);
|
||||
if (compPQ != null && evalPQ != null && MathUtils.compareDoubles(compPQ, evalPQ) != 0) {
|
||||
//interesting.addReason("PQ_CHANGE", samp, group, prevLocus, compPQ + " -> " + evalPQ);
|
||||
}
|
||||
}
|
||||
else {
|
||||
ps.phasesDisagree++;
|
||||
logger.debug("SWITCHED locus: " + curLocus);
|
||||
//interesting.addReason("SWITCH", samp, group, prevLocus, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair));
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ps.neitherPhased++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
logger.debug("\n" + samplePhasingStatistics + "\n");
|
||||
|
||||
return interesting.toString();
|
||||
}
|
||||
|
||||
public static boolean isRelevantToPhasing(VariantContext vc) {
|
||||
return (vc != null && !vc.isFiltered());
|
||||
}
|
||||
|
||||
public boolean isNonNullButUnphased(Genotype gt) {
|
||||
return (gt != null && !gt.isNoCall() && !genotypesArePhasedAboveThreshold(gt));
|
||||
}
|
||||
|
||||
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
|
||||
if (gt.isHom()) // Can always consider a hom site to be phased to its predecessor, since its successor will only be phased to it if it's hom or "truly" phased
|
||||
return true;
|
||||
|
||||
if (!gt.isPhased())
|
||||
return false;
|
||||
|
||||
Double pq = getPQ(gt);
|
||||
return (pq == null || pq >= minPhaseQuality);
|
||||
}
|
||||
|
||||
public static Double getPQ(Genotype gt) {
|
||||
Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||
return d == -1 ? null : d;
|
||||
}
|
||||
|
||||
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||
return b1.getTopAllele().equals(b2.getTopAllele());
|
||||
}
|
||||
|
||||
public static boolean topMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||
return b1.getTopAllele().equals(b2.getBottomAllele());
|
||||
}
|
||||
|
||||
public static boolean bottomMatchesTop(AllelePair b1, AllelePair b2) {
|
||||
return topMatchesBottom(b2, b1);
|
||||
}
|
||||
|
||||
public static boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||
return b1.getBottomAllele().equals(b2.getBottomAllele());
|
||||
}
|
||||
|
||||
public String toString(AllelePair prev, AllelePair cur) {
|
||||
return prev.getTopAllele().getBaseString() + "+" + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "+" + cur.getBottomAllele().getBaseString();
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
}
|
||||
|
||||
private static class Reasons {
|
||||
private StringBuilder sb;
|
||||
|
||||
public Reasons() {
|
||||
sb = new StringBuilder();
|
||||
}
|
||||
|
||||
// public void addReason(String category, String sample, VariantEvalWalker.EvaluationContext evalGroup, GenomeLoc prevLoc, String reason) {
|
||||
// sb.append(category + "(" + sample + ", previous: " + prevLoc + " [" + evalGroup.compTrackName + ", " + evalGroup.evalTrackName + "]): " + reason + ";");
|
||||
// }
|
||||
|
||||
public String toString() {
|
||||
if (sb.length() == 0)
|
||||
return null;
|
||||
|
||||
return "reasons=" + sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
class CompEvalGenotypes {
|
||||
private GenomeLoc loc;
|
||||
private Genotype compGt;
|
||||
private Genotype evalGt;
|
||||
|
||||
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||
this.loc = loc;
|
||||
this.compGt = compGt;
|
||||
this.evalGt = evalGt;
|
||||
}
|
||||
|
||||
public GenomeLoc getLocus() {
|
||||
return loc;
|
||||
}
|
||||
|
||||
public Genotype getCompGenotpye() {
|
||||
return compGt;
|
||||
}
|
||||
public Genotype getEvalGenotype() {
|
||||
return evalGt;
|
||||
}
|
||||
}
|
||||
|
||||
class SamplePreviousGenotypes {
|
||||
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||
|
||||
public SamplePreviousGenotypes() {
|
||||
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||
}
|
||||
|
||||
public CompEvalGenotypes get(String sample) {
|
||||
return sampleGenotypes.get(sample);
|
||||
}
|
||||
|
||||
public void put(String sample, CompEvalGenotypes compEvalGts) {
|
||||
sampleGenotypes.put(sample, compEvalGts);
|
||||
}
|
||||
|
||||
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
|
||||
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
|
||||
}
|
||||
}
|
||||
|
||||
class PhaseStats {
|
||||
public int neitherPhased;
|
||||
public int onlyCompPhased;
|
||||
public int onlyEvalPhased;
|
||||
public int phasesAgree;
|
||||
public int phasesDisagree;
|
||||
|
||||
public PhaseStats() {
|
||||
this.neitherPhased = 0;
|
||||
this.onlyCompPhased = 0;
|
||||
this.onlyEvalPhased = 0;
|
||||
this.phasesAgree = 0;
|
||||
this.phasesDisagree = 0;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static String[] getFieldNamesArray() {
|
||||
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
|
||||
}
|
||||
|
||||
public Object getField(int index) {
|
||||
switch (index) {
|
||||
case (0):
|
||||
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
|
||||
case (1):
|
||||
return neitherPhased;
|
||||
case (2):
|
||||
return onlyCompPhased;
|
||||
case (3):
|
||||
return onlyEvalPhased;
|
||||
case (4):
|
||||
return (phasesAgree + phasesDisagree);
|
||||
case (5):
|
||||
return phasesAgree;
|
||||
case (6):
|
||||
return phasesDisagree;
|
||||
case (7):
|
||||
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* a table of sample names to genotype phasing statistics
|
||||
*/
|
||||
class SamplePhasingStatistics extends TableType {
|
||||
private HashMap<String, PhaseStats> sampleStats = null;
|
||||
private double minPhaseQuality;
|
||||
|
||||
public SamplePhasingStatistics(double minPhaseQuality) {
|
||||
this.sampleStats = new HashMap<String, PhaseStats>();
|
||||
this.minPhaseQuality = minPhaseQuality;
|
||||
}
|
||||
|
||||
public PhaseStats ensureSampleStats(String samp) {
|
||||
PhaseStats ps = sampleStats.get(samp);
|
||||
if (ps == null) {
|
||||
ps = new PhaseStats();
|
||||
sampleStats.put(samp, ps);
|
||||
}
|
||||
return ps;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return one row per sample
|
||||
*/
|
||||
public String[] getRowKeys() {
|
||||
return sampleStats.keySet().toArray(new String[sampleStats.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the column keys
|
||||
*
|
||||
* @return a list of objects, in this case strings, that are the column names
|
||||
*/
|
||||
public String[] getColumnKeys() {
|
||||
return PhaseStats.getFieldNamesArray();
|
||||
}
|
||||
|
||||
public Object getCell(int x, int y) {
|
||||
String[] rowKeys = getRowKeys();
|
||||
PhaseStats ps = sampleStats.get(rowKeys[x]);
|
||||
return ps.getField(y);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "Sample Phasing Statistics (for PQ >= " + minPhaseQuality + ")";
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Map.Entry<String, PhaseStats> sampPhaseStatsEnt : sampleStats.entrySet()) {
|
||||
String sample = sampPhaseStatsEnt.getKey();
|
||||
PhaseStats ps = sampPhaseStatsEnt.getValue();
|
||||
|
||||
sb.append(sample + "\t" + ps);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Simple utility for histogramming indel lengths
|
||||
*
|
||||
* Based on code from chartl
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 3/21/12
|
||||
*/
|
||||
@Analysis(description = "Indel length histogram", molten = true)
|
||||
public class IndelLengthHistogram extends VariantEvaluator implements StandardEval {
|
||||
private final Map<Integer, Integer> counts = new HashMap<Integer, Integer>();
|
||||
private final boolean asFrequencies;
|
||||
int nIndels = 0;
|
||||
|
||||
@Molten(variableFormat = "%d", valueFormat = "%.2f")
|
||||
public TreeMap<Object, Object> results;
|
||||
|
||||
public final static int MAX_SIZE_FOR_HISTOGRAM = 10;
|
||||
|
||||
public IndelLengthHistogram() {
|
||||
this(MAX_SIZE_FOR_HISTOGRAM, true);
|
||||
}
|
||||
|
||||
public IndelLengthHistogram(int maxSize, boolean asFrequencies) {
|
||||
this.asFrequencies = asFrequencies;
|
||||
initializeCounts(maxSize);
|
||||
}
|
||||
|
||||
private void initializeCounts(int size) {
|
||||
for ( int i = -size; i <= size; i++ ) {
|
||||
if ( i != 0 ) counts.put(i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
if ( asFrequencies ) {
|
||||
results = new TreeMap<Object, Object>();
|
||||
for ( final int len : counts.keySet() ) {
|
||||
final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels);
|
||||
results.put(len, value);
|
||||
}
|
||||
} else {
|
||||
results = new TreeMap<Object, Object>(results);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getComparisonOrder() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update1(final VariantContext eval, final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
if ( eval.isIndel() && ! eval.isComplexIndel() ) {
|
||||
for ( Allele alt : eval.getAlternateAlleles() ) {
|
||||
final int alleleSize = alt.length() - eval.getReference().length();
|
||||
if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference());
|
||||
updateLengthHistogram(eval.getReference(), alt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void updateLengthHistogram(final Allele ref, final Allele alt) {
|
||||
final int len = alt.length() - ref.length();
|
||||
if ( counts.containsKey(len) ) {
|
||||
nIndels++;
|
||||
counts.put(len, counts.get(len) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.IndelHistogram;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
|
@ -119,9 +119,8 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
int n1bpInsertions = 0, n1bpDeletions = 0;
|
||||
int[] countByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used
|
||||
|
||||
public final static int MAX_SIZE_FOR_HISTOGRAM = 10;
|
||||
@DataPoint(description = "Histogram of indel lengths")
|
||||
IndelHistogram lengthHistogram = new IndelHistogram(MAX_SIZE_FOR_HISTOGRAM, true);
|
||||
|
||||
public final static int LARGE_INDEL_SIZE_THRESHOLD = 10;
|
||||
|
||||
@DataPoint(description = "Number of large (>10 bp) deletions")
|
||||
public int n_large_deletions = 0;
|
||||
|
|
@ -132,12 +131,11 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
@DataPoint(description = "Ratio of large (>10 bp) insertions to deletions")
|
||||
public String insertion_to_deletion_ratio_for_large_indels;
|
||||
|
||||
@Override public boolean enabled() { return true; }
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
// update counts
|
||||
switch ( eval.getType() ) {
|
||||
|
|
@ -176,9 +174,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
if ( alleleSize == 1 ) n1bpInsertions++;
|
||||
if ( alleleSize == -1 ) n1bpDeletions++;
|
||||
|
||||
// update the length histogram
|
||||
lengthHistogram.update(eval.getReference(), alt);
|
||||
|
||||
// requires snpEFF annotations
|
||||
if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) {
|
||||
final String effect = eval.getAttributeAsString("SNPEFF_EFFECT", "missing");
|
||||
|
|
@ -191,10 +186,16 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
else
|
||||
; // lots of protein coding effects that shouldn't be counted, such as INTRON
|
||||
}
|
||||
|
||||
if ( alleleSize > LARGE_INDEL_SIZE_THRESHOLD )
|
||||
n_large_insertions++;
|
||||
else if ( alleleSize < -LARGE_INDEL_SIZE_THRESHOLD )
|
||||
n_large_deletions++;
|
||||
|
||||
// update the baby histogram
|
||||
final int absSize = Math.abs(alleleSize);
|
||||
if ( absSize < countByLength.length ) countByLength[absSize]++;
|
||||
|
||||
}
|
||||
|
||||
break;
|
||||
|
|
@ -202,29 +203,26 @@ public class IndelSummary extends VariantEvaluator implements StandardEval {
|
|||
throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
return;
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
percent_of_sites_with_more_than_2_alleles = formattedRatio(nMultiIndelSites, nIndelSites);
|
||||
SNP_to_indel_ratio = formattedRatio(n_SNPs, n_indels);
|
||||
SNP_to_indel_ratio_for_singletons = formattedRatio(n_singleton_SNPs, n_singleton_indels);
|
||||
indel_novelty_rate = formattedNoveltyRate(nKnownIndels, n_indels);
|
||||
ratio_of_1_to_2_bp_indels = formattedRatio(countByLength[1], countByLength[2]);
|
||||
ratio_of_1_to_3_bp_indels = formattedRatio(countByLength[1], countByLength[3]);
|
||||
ratio_of_2_to_3_bp_indels = formattedRatio(countByLength[2], countByLength[3]);
|
||||
ratio_of_1_and_2_to_3_bp_indels = formattedRatio(countByLength[1] + countByLength[2], countByLength[3]);
|
||||
frameshift_rate_for_coding_indels = formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
|
||||
percent_of_sites_with_more_than_2_alleles = Utils.formattedRatio(nMultiIndelSites, nIndelSites);
|
||||
SNP_to_indel_ratio = Utils.formattedRatio(n_SNPs, n_indels);
|
||||
SNP_to_indel_ratio_for_singletons = Utils.formattedRatio(n_singleton_SNPs, n_singleton_indels);
|
||||
indel_novelty_rate = Utils.formattedNoveltyRate(nKnownIndels, n_indels);
|
||||
ratio_of_1_to_2_bp_indels = Utils.formattedRatio(countByLength[1], countByLength[2]);
|
||||
ratio_of_1_to_3_bp_indels = Utils.formattedRatio(countByLength[1], countByLength[3]);
|
||||
ratio_of_2_to_3_bp_indels = Utils.formattedRatio(countByLength[2], countByLength[3]);
|
||||
ratio_of_1_and_2_to_3_bp_indels = Utils.formattedRatio(countByLength[1] + countByLength[2], countByLength[3]);
|
||||
frameshift_rate_for_coding_indels = Utils.formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting);
|
||||
|
||||
SNP_het_to_hom_ratio = formattedRatio(nSNPHets, nSNPHoms);
|
||||
indel_het_to_hom_ratio = formattedRatio(nIndelHets, nIndelHoms);
|
||||
|
||||
n_large_deletions = lengthHistogram.getnTooBigDeletions();
|
||||
n_large_insertions = lengthHistogram.getnTooBigInsertions();
|
||||
SNP_het_to_hom_ratio = Utils.formattedRatio(nSNPHets, nSNPHoms);
|
||||
indel_het_to_hom_ratio = Utils.formattedRatio(nIndelHets, nIndelHoms);
|
||||
|
||||
insertion_to_deletion_ratio = formattedRatio(nInsertions, n_indels - nInsertions);
|
||||
insertion_to_deletion_ratio_for_1bp_indels = formattedRatio(n1bpInsertions, n1bpDeletions);
|
||||
insertion_to_deletion_ratio_for_large_indels = formattedRatio(n_large_insertions, n_large_deletions);
|
||||
insertion_to_deletion_ratio = Utils.formattedRatio(nInsertions, n_indels - nInsertions);
|
||||
insertion_to_deletion_ratio_for_1bp_indels = Utils.formattedRatio(n1bpInsertions, n1bpDeletions);
|
||||
insertion_to_deletion_ratio_for_large_indels = Utils.formattedRatio(n_large_insertions, n_large_deletions);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,63 +43,63 @@ import java.util.Set;
|
|||
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||
|
||||
@DataPoint(description = "Number of variants found with at least one family having genotypes", format = "%d")
|
||||
long nVariants;
|
||||
public long nVariants;
|
||||
@DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall", format = "%d")
|
||||
long nSkipped;
|
||||
public long nSkipped;
|
||||
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual)", format = "%d")
|
||||
long nFamCalled;
|
||||
public long nFamCalled;
|
||||
@DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.", format = "%d")
|
||||
long nVarFamCalled;
|
||||
public long nVarFamCalled;
|
||||
@DataPoint(description="Number of variants x families discarded as low quality", format = "%d")
|
||||
long nLowQual;
|
||||
public long nLowQual;
|
||||
@DataPoint(description="Number of variants x families discarded as no call", format = "%d")
|
||||
long nNoCall;
|
||||
public long nNoCall;
|
||||
@DataPoint(description="Number of loci with mendelian violations", format = "%d")
|
||||
long nLociViolations;
|
||||
public long nLociViolations;
|
||||
@DataPoint(description = "Number of mendelian violations found", format = "%d")
|
||||
long nViolations;
|
||||
public long nViolations;
|
||||
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d")
|
||||
long mvRefRef_Var;
|
||||
public long mvRefRef_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d")
|
||||
long mvRefRef_Het;
|
||||
public long mvRefRef_Het;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR", format = "%d")
|
||||
long mvRefHet_Var;
|
||||
public long mvRefHet_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR", format = "%d")
|
||||
long mvRefVar_Var;
|
||||
public long mvRefVar_Var;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF", format = "%d")
|
||||
long mvRefVar_Ref;
|
||||
public long mvRefVar_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF", format = "%d")
|
||||
long mvVarHet_Ref;
|
||||
public long mvVarHet_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF", format = "%d")
|
||||
long mvVarVar_Ref;
|
||||
public long mvVarVar_Ref;
|
||||
@DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d")
|
||||
long mvVarVar_Het;
|
||||
public long mvVarVar_Het;
|
||||
|
||||
@DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d")
|
||||
long HomRefHomRef_HomRef;
|
||||
public long HomRefHomRef_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/Het trios", format = "%d")
|
||||
long HetHet_Het;
|
||||
public long HetHet_Het;
|
||||
@DataPoint(description="Number of Het/Het/HomRef trios", format = "%d")
|
||||
long HetHet_HomRef;
|
||||
public long HetHet_HomRef;
|
||||
@DataPoint(description="Number of Het/Het/HomVar trios", format = "%d")
|
||||
long HetHet_HomVar;
|
||||
public long HetHet_HomVar;
|
||||
@DataPoint(description="Number of HomVar/HomVar/HomVar trios", format = "%d")
|
||||
long HomVarHomVar_HomVar;
|
||||
public long HomVarHomVar_HomVar;
|
||||
@DataPoint(description="Number of HomRef/HomVar/Het trios", format = "%d")
|
||||
long HomRefHomVAR_Het;
|
||||
public long HomRefHomVAR_Het;
|
||||
@DataPoint(description="Number of ref alleles inherited from het/het parents", format = "%d")
|
||||
long HetHet_inheritedRef;
|
||||
public long HetHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from het/het parents", format = "%d")
|
||||
long HetHet_inheritedVar;
|
||||
public long HetHet_inheritedVar;
|
||||
@DataPoint(description="Number of ref alleles inherited from homRef/het parents", format = "%d")
|
||||
long HomRefHet_inheritedRef;
|
||||
public long HomRefHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from homRef/het parents", format = "%d")
|
||||
long HomRefHet_inheritedVar;
|
||||
public long HomRefHet_inheritedVar;
|
||||
@DataPoint(description="Number of ref alleles inherited from homVar/het parents", format = "%d")
|
||||
long HomVarHet_inheritedRef;
|
||||
public long HomVarHet_inheritedRef;
|
||||
@DataPoint(description="Number of var alleles inherited from homVar/het parents", format = "%d")
|
||||
long HomVarHet_inheritedVar;
|
||||
public long HomVarHet_inheritedVar;
|
||||
|
||||
MendelianViolation mv;
|
||||
Map<String,Set<Sample>> families;
|
||||
|
|
@ -110,10 +110,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
families = walker.getSampleDB().getFamilies();
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "mendelian_violations";
|
||||
}
|
||||
|
|
@ -122,7 +118,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||
|
||||
if(mv.countViolations(families,vc)>0){
|
||||
|
|
@ -161,11 +157,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
else{
|
||||
nSkipped++;
|
||||
}
|
||||
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -88,12 +89,11 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
public String indelNoveltyRate = "NA";
|
||||
|
||||
|
||||
@Override public boolean enabled() { return true; }
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() )
|
||||
return null;
|
||||
return;
|
||||
|
||||
// update counts
|
||||
switch ( eval.getType() ) {
|
||||
|
|
@ -116,7 +116,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
throw new UserException.BadInput("Unexpected variant context type: " + eval);
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
return;
|
||||
}
|
||||
|
||||
private void calculatePairwiseTiTv(VariantContext vc) {
|
||||
|
|
@ -157,7 +157,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva
|
|||
|
||||
TiTvRatio = (double)nTi / (double)nTv;
|
||||
|
||||
SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs);
|
||||
indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs);
|
||||
SNPNoveltyRate = Utils.formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs);
|
||||
indelNoveltyRate = Utils.formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,11 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
|
||||
public class PrintMissingComp extends VariantEvaluator {
|
||||
@DataPoint(description = "number of eval sites outside of comp sites", format = "%d")
|
||||
long nMissing = 0;
|
||||
|
||||
//public PrintMissingComp(VariantEvalWalker parent) {
|
||||
// super(parent);
|
||||
//}
|
||||
public long nMissing = 0;
|
||||
|
||||
public String getName() {
|
||||
return "PrintMissingComp";
|
||||
|
|
@ -48,20 +44,13 @@ public class PrintMissingComp extends VariantEvaluator {
|
|||
return 2; // we need to see each eval track and each comp track
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP();
|
||||
boolean evalIsGood = eval != null && eval.isSNP();
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP();
|
||||
final boolean evalIsGood = eval != null && eval.isSNP();
|
||||
|
||||
if ( compIsGood & ! evalIsGood ) {
|
||||
nMissing++;
|
||||
return "MissingFrom" + comp.getSource();
|
||||
} else {
|
||||
return null;
|
||||
super.getWalker().getLogger().info("MissingFrom" + eval.toString() + " is missing from " + comp.getSource());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -15,30 +15,26 @@ import java.util.concurrent.ConcurrentMap;
|
|||
@Analysis(description = "Computes different estimates of theta based on variant sites and genotypes")
|
||||
public class ThetaVariantEvaluator extends VariantEvaluator {
|
||||
@DataPoint(description = "Average heterozygosity at variant sites; note that missing genotypes are ignored when computing this value", format = "%.8f")
|
||||
double avgHet = 0.0;
|
||||
public double avgHet = 0.0;
|
||||
@DataPoint(description = "Average pairwise differences at aligned sequences; averaged over both number of sequeneces and number of variant sites; note that missing genotypes are ignored when computing this value", format = "%.8f")
|
||||
double avgAvgDiffs = 0.0;
|
||||
public double avgAvgDiffs = 0.0;
|
||||
@DataPoint(description = "Sum of heterozygosity over all variant sites; divide this by total target to get estimate of per base theta", format = "%.8f")
|
||||
double totalHet = 0.0;
|
||||
public double totalHet = 0.0;
|
||||
@DataPoint(description = "Sum of pairwise diffs over all variant sites; divide this by total target to get estimate of per base theta", format = "%.8f")
|
||||
double totalAvgDiffs = 0.0;
|
||||
public double totalAvgDiffs = 0.0;
|
||||
@DataPoint(description = "Theta for entire region estimated based on number of segregating sites; divide ths by total target to get estimate of per base theta", format = "%.8f")
|
||||
double thetaRegionNumSites = 0.0;
|
||||
public double thetaRegionNumSites = 0.0;
|
||||
|
||||
//helper variables
|
||||
double numSites = 0;
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphicInSamples()) {
|
||||
return null; //no interesting sites
|
||||
return;
|
||||
}
|
||||
|
||||
//this maps allele to a count
|
||||
|
|
@ -107,8 +103,6 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
|||
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -11,29 +11,24 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
|||
|
||||
@Analysis(description = "Ti/Tv Variant Evaluator")
|
||||
public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval {
|
||||
|
||||
@DataPoint(description = "number of transition loci", format = "%d")
|
||||
long nTi = 0;
|
||||
public long nTi = 0;
|
||||
@DataPoint(description = "number of transversion loci", format = "%d")
|
||||
long nTv = 0;
|
||||
public long nTv = 0;
|
||||
@DataPoint(description = "the transition to transversion ratio", format = "%.2f")
|
||||
double tiTvRatio = 0.0;
|
||||
public double tiTvRatio = 0.0;
|
||||
@DataPoint(description = "number of comp transition sites", format = "%d")
|
||||
long nTiInComp = 0;
|
||||
public long nTiInComp = 0;
|
||||
@DataPoint(description = "number of comp transversion sites", format = "%d")
|
||||
long nTvInComp = 0;
|
||||
public long nTvInComp = 0;
|
||||
@DataPoint(description = "the transition to transversion ratio for comp sites", format = "%.2f")
|
||||
double TiTvRatioStandard = 0.0;
|
||||
public double TiTvRatioStandard = 0.0;
|
||||
@DataPoint(description = "number of derived transition loci", format = "%d")
|
||||
long nTiDerived = 0;
|
||||
public long nTiDerived = 0;
|
||||
@DataPoint(description = "number of derived transversion loci", format = "%d")
|
||||
long nTvDerived = 0;
|
||||
public long nTvDerived = 0;
|
||||
@DataPoint(description = "the derived transition to transversion ratio", format = "%.2f")
|
||||
double tiTvDerivedRatio = 0.0;
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
public double tiTvDerivedRatio = 0.0;
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we only need to see each eval track
|
||||
|
|
@ -62,11 +57,9 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
}
|
||||
|
||||
public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc1 != null) updateTiTv(vc1, false);
|
||||
if (vc2 != null) updateTiTv(vc2, true);
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -24,29 +24,29 @@ import java.util.Collection;
|
|||
@Analysis(description = "Assess site accuracy and sensitivity of callset against follow-up validation assay")
|
||||
public class ValidationReport extends VariantEvaluator implements StandardEval {
|
||||
// todo -- note this isn't strictly allele away. It's really focused on sites. A/T call at a validated A/G site is currently counted as a TP
|
||||
@DataPoint(description = "nComp", format = "%d") int nComp = 0;
|
||||
@DataPoint(description = "TP", format = "%d") int TP = 0;
|
||||
@DataPoint(description = "FP", format = "%d") int FP = 0;
|
||||
@DataPoint(description = "FN", format = "%d") int FN = 0;
|
||||
@DataPoint(description = "TN", format = "%d") int TN = 0;
|
||||
@DataPoint(description = "nComp", format = "%d") public int nComp = 0;
|
||||
@DataPoint(description = "TP", format = "%d") public int TP = 0;
|
||||
@DataPoint(description = "FP", format = "%d") public int FP = 0;
|
||||
@DataPoint(description = "FN", format = "%d") public int FN = 0;
|
||||
@DataPoint(description = "TN", format = "%d") public int TN = 0;
|
||||
|
||||
@DataPoint(description = "Sensitivity", format = "%.2f") double sensitivity = 0;
|
||||
@DataPoint(description = "Specificity", format = "%.2f") double specificity = 0;
|
||||
@DataPoint(description = "PPV", format = "%.2f") double PPV = 0;
|
||||
@DataPoint(description = "FDR", format = "%.2f") double FDR = 0;
|
||||
@DataPoint(description = "Sensitivity", format = "%.2f") public double sensitivity = 0;
|
||||
@DataPoint(description = "Specificity", format = "%.2f") public double specificity = 0;
|
||||
@DataPoint(description = "PPV", format = "%.2f") public double PPV = 0;
|
||||
@DataPoint(description = "FDR", format = "%.2f") public double FDR = 0;
|
||||
|
||||
@DataPoint(description = "CompMonoEvalNoCall", format = "%d") int CompMonoEvalNoCall = 0;
|
||||
@DataPoint(description = "CompMonoEvalFiltered", format = "%d") int CompMonoEvalFiltered = 0;
|
||||
@DataPoint(description = "CompMonoEvalMono", format = "%d") int CompMonoEvalMono = 0;
|
||||
@DataPoint(description = "CompMonoEvalPoly", format = "%d") int CompMonoEvalPoly = 0;
|
||||
@DataPoint(description = "CompMonoEvalNoCall", format = "%d") public int CompMonoEvalNoCall = 0;
|
||||
@DataPoint(description = "CompMonoEvalFiltered", format = "%d") public int CompMonoEvalFiltered = 0;
|
||||
@DataPoint(description = "CompMonoEvalMono", format = "%d") public int CompMonoEvalMono = 0;
|
||||
@DataPoint(description = "CompMonoEvalPoly", format = "%d") public int CompMonoEvalPoly = 0;
|
||||
|
||||
@DataPoint(description = "CompPolyEvalNoCall", format = "%d") int CompPolyEvalNoCall = 0;
|
||||
@DataPoint(description = "CompPolyEvalFiltered", format = "%d") int CompPolyEvalFiltered = 0;
|
||||
@DataPoint(description = "CompPolyEvalMono", format = "%d") int CompPolyEvalMono = 0;
|
||||
@DataPoint(description = "CompPolyEvalPoly", format = "%d") int CompPolyEvalPoly = 0;
|
||||
@DataPoint(description = "CompPolyEvalNoCall", format = "%d") public int CompPolyEvalNoCall = 0;
|
||||
@DataPoint(description = "CompPolyEvalFiltered", format = "%d") public int CompPolyEvalFiltered = 0;
|
||||
@DataPoint(description = "CompPolyEvalMono", format = "%d") public int CompPolyEvalMono = 0;
|
||||
@DataPoint(description = "CompPolyEvalPoly", format = "%d") public int CompPolyEvalPoly = 0;
|
||||
|
||||
@DataPoint(description = "CompFiltered", format = "%d") int CompFiltered = 0;
|
||||
@DataPoint(description = "Eval and comp have different alleles", format = "%d") int nDifferentAlleleSites = 0;
|
||||
@DataPoint(description = "CompFiltered", format = "%d") public int CompFiltered = 0;
|
||||
@DataPoint(description = "Eval and comp have different alleles", format = "%d") public int nDifferentAlleleSites = 0;
|
||||
|
||||
private static final boolean TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED = true;
|
||||
private static final boolean REQUIRE_IDENTICAL_ALLELES = false;
|
||||
|
|
@ -57,7 +57,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
final int[][] counts = new int[SiteStatus.values().length][SiteStatus.values().length];
|
||||
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
@Override public boolean enabled() { return true; }
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
|
|
@ -97,7 +96,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( comp != null ) { // we only need to consider sites in comp
|
||||
if ( REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp)))
|
||||
nDifferentAlleleSites++;
|
||||
|
|
@ -107,8 +106,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
|||
counts[compStatus.ordinal()][evalStatus.ordinal()]++;
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
//
|
||||
|
|
|
|||
|
|
@ -22,8 +22,6 @@ public abstract class VariantEvaluator implements Comparable<VariantEvaluator> {
|
|||
return walker;
|
||||
}
|
||||
|
||||
public abstract boolean enabled();
|
||||
|
||||
// Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2
|
||||
public abstract int getComparisonOrder();
|
||||
|
||||
|
|
@ -31,12 +29,10 @@ public abstract class VariantEvaluator implements Comparable<VariantEvaluator> {
|
|||
// No longer available. The processed bp is kept in VEW itself for performance reasons
|
||||
// public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
public void update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return null;
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {}
|
||||
|
|
@ -63,39 +59,6 @@ public abstract class VariantEvaluator implements Comparable<VariantEvaluator> {
|
|||
return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats the novelty rate as a %.2f string
|
||||
*
|
||||
* @param known number of variants from all that are known
|
||||
* @param all number of all variants
|
||||
* @return a String novelty rate, or NA if all == 0
|
||||
*/
|
||||
protected static String formattedNoveltyRate(final int known, final int all) {
|
||||
return formattedPercent(all - known, all);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats the novelty rate as a %.2f string
|
||||
*
|
||||
* @param x number of objects part of total that meet some criteria
|
||||
* @param total count of all objects, including x
|
||||
* @return a String percent rate, or NA if total == 0
|
||||
*/
|
||||
protected static String formattedPercent(final int x, final int total) {
|
||||
return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats a ratio as a %.2f string
|
||||
*
|
||||
* @param num number of observations in the numerator
|
||||
* @param denom number of observations in the denumerator
|
||||
* @return a String formatted ratio, or NA if all == 0
|
||||
*/
|
||||
protected static String formattedRatio(final int num, final int denom) {
|
||||
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
||||
}
|
||||
|
||||
public String getSimpleName() {
|
||||
return simpleName;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
|
@ -44,207 +43,207 @@ import java.util.HashMap;
|
|||
* @since Apr 6, 2010
|
||||
*/
|
||||
|
||||
@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
||||
public class VariantQualityScore extends VariantEvaluator {
|
||||
//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
||||
@Deprecated
|
||||
public class VariantQualityScore {
|
||||
// TODO - this should really be a stratification
|
||||
|
||||
// a mapping from quality score histogram bin to Ti/Tv ratio
|
||||
@DataPoint(description = "the Ti/Tv ratio broken out by variant quality")
|
||||
TiTvStats titvStats = null;
|
||||
|
||||
@DataPoint(description = "average variant quality for each allele count")
|
||||
AlleleCountStats alleleCountStats = null;
|
||||
|
||||
static class TiTvStats extends TableType {
|
||||
final static int NUM_BINS = 20;
|
||||
final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
||||
final long transitionByQuality[] = new long[NUM_BINS];
|
||||
final long transversionByQuality[] = new long[NUM_BINS];
|
||||
final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
return new String[]{"sample"};
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
final String columnKeys[] = new String[NUM_BINS];
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
columnKeys[iii] = "titvBin" + iii;
|
||||
}
|
||||
return columnKeys;
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
return String.valueOf(titvByQuality[y]);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer returnString = new StringBuffer();
|
||||
// output the ti/tv array
|
||||
returnString.append("titvByQuality: ");
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
returnString.append(titvByQuality[iii]);
|
||||
returnString.append(" ");
|
||||
}
|
||||
return returnString.toString();
|
||||
}
|
||||
|
||||
public void incrValue( final double qual, final boolean isTransition ) {
|
||||
final Integer qualKey = Math.round((float) qual);
|
||||
final long numTransition = (isTransition ? 1L : 0L);
|
||||
final long numTransversion = (isTransition ? 0L : 1L);
|
||||
if( qualByIsTransition.containsKey(qualKey) ) {
|
||||
Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
|
||||
transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
|
||||
qualByIsTransition.put(qualKey, transitionPair);
|
||||
} else {
|
||||
qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
|
||||
}
|
||||
}
|
||||
|
||||
public void organizeTiTvTables() {
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
transitionByQuality[iii] = 0L;
|
||||
transversionByQuality[iii] = 0L;
|
||||
titvByQuality[iii] = 0.0;
|
||||
}
|
||||
|
||||
int maxQual = 0;
|
||||
|
||||
// Calculate the maximum quality score in order to normalize and histogram
|
||||
for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
if( qual > maxQual ) {
|
||||
maxQual = qual;
|
||||
}
|
||||
}
|
||||
|
||||
final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
|
||||
|
||||
for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
final int index = (int)Math.floor( ((double) qual) / binSize );
|
||||
if( index >= 0 ) { // BUGBUG: why is there overflow here?
|
||||
Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
|
||||
transitionByQuality[index] += transitionPair.getFirst();
|
||||
transversionByQuality[index] += transitionPair.getSecond();
|
||||
}
|
||||
}
|
||||
|
||||
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
||||
titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
||||
} else {
|
||||
titvByQuality[iii] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
class AlleleCountStats extends TableType {
|
||||
final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
||||
final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
||||
|
||||
public Object[] getRowKeys() {
|
||||
final int NUM_BINS = qualityListMap.keySet().size();
|
||||
final String rowKeys[] = new String[NUM_BINS];
|
||||
int iii = 0;
|
||||
for( final Integer key : qualityListMap.keySet() ) {
|
||||
rowKeys[iii] = "AC" + key;
|
||||
iii++;
|
||||
}
|
||||
return rowKeys;
|
||||
|
||||
}
|
||||
|
||||
public Object[] getColumnKeys() {
|
||||
return new String[]{"alleleCount","avgQual"};
|
||||
}
|
||||
|
||||
public String getCell(int x, int y) {
|
||||
int iii = 0;
|
||||
for( final Integer key : qualityListMap.keySet() ) {
|
||||
if(iii == x) {
|
||||
if(y == 0) { return String.valueOf(key); }
|
||||
else { return String.valueOf(qualityMap.get(key)); }
|
||||
}
|
||||
iii++;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
String returnString = "";
|
||||
// output the quality map
|
||||
returnString += "AlleleCountStats: ";
|
||||
//for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// returnString += titvByQuality[iii] + " ";
|
||||
//}
|
||||
return returnString;
|
||||
}
|
||||
|
||||
public void incrValue( final double qual, final int alleleCount ) {
|
||||
ArrayList<Double> list = qualityListMap.get(alleleCount);
|
||||
if(list==null) { list = new ArrayList<Double>(); }
|
||||
list.add(qual);
|
||||
qualityListMap.put(alleleCount, list);
|
||||
}
|
||||
|
||||
public void organizeAlleleCountTables() {
|
||||
for( final Integer key : qualityListMap.keySet() ) {
|
||||
final ArrayList<Double> list = qualityListMap.get(key);
|
||||
double meanQual = 0.0;
|
||||
final double numQuals = (double)list.size();
|
||||
for( Double qual : list ) {
|
||||
meanQual += qual / numQuals;
|
||||
}
|
||||
qualityMap.put(key, meanQual);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//public VariantQualityScore(VariantEvalWalker parent) {
|
||||
//super(parent);
|
||||
//}
|
||||
|
||||
public String getName() {
|
||||
return "VariantQualityScore";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 1; // we only need to see each eval track
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
|
||||
if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
||||
int alternateAlleleCount = 0;
|
||||
for (final Allele a : eval.getAlternateAlleles()) {
|
||||
alternateAlleleCount += eval.getCalledChrCount(a);
|
||||
}
|
||||
alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
|
||||
}
|
||||
|
||||
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
if( titvStats != null ) {
|
||||
titvStats.organizeTiTvTables();
|
||||
}
|
||||
if( alleleCountStats != null ) {
|
||||
alleleCountStats.organizeAlleleCountTables();
|
||||
}
|
||||
}
|
||||
// public class VariantQualityScore extends VariantEvaluator {
|
||||
//
|
||||
// // a mapping from quality score histogram bin to Ti/Tv ratio
|
||||
// @DataPoint(description = "the Ti/Tv ratio broken out by variant quality")
|
||||
// TiTvStats titvStats = null;
|
||||
//
|
||||
// @DataPoint(description = "average variant quality for each allele count")
|
||||
// AlleleCountStats alleleCountStats = null;
|
||||
//
|
||||
// static class TiTvStats extends TableType {
|
||||
// final static int NUM_BINS = 20;
|
||||
// final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
||||
// final long transitionByQuality[] = new long[NUM_BINS];
|
||||
// final long transversionByQuality[] = new long[NUM_BINS];
|
||||
// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
||||
//
|
||||
// public Object[] getRowKeys() {
|
||||
// return new String[]{"sample"};
|
||||
// }
|
||||
//
|
||||
// public Object[] getColumnKeys() {
|
||||
// final String columnKeys[] = new String[NUM_BINS];
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// columnKeys[iii] = "titvBin" + iii;
|
||||
// }
|
||||
// return columnKeys;
|
||||
// }
|
||||
//
|
||||
// public String getCell(int x, int y) {
|
||||
// return String.valueOf(titvByQuality[y]);
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// StringBuffer returnString = new StringBuffer();
|
||||
// // output the ti/tv array
|
||||
// returnString.append("titvByQuality: ");
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// returnString.append(titvByQuality[iii]);
|
||||
// returnString.append(" ");
|
||||
// }
|
||||
// return returnString.toString();
|
||||
// }
|
||||
//
|
||||
// public void incrValue( final double qual, final boolean isTransition ) {
|
||||
// final Integer qualKey = Math.round((float) qual);
|
||||
// final long numTransition = (isTransition ? 1L : 0L);
|
||||
// final long numTransversion = (isTransition ? 0L : 1L);
|
||||
// if( qualByIsTransition.containsKey(qualKey) ) {
|
||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
|
||||
// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
|
||||
// qualByIsTransition.put(qualKey, transitionPair);
|
||||
// } else {
|
||||
// qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// public void organizeTiTvTables() {
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// transitionByQuality[iii] = 0L;
|
||||
// transversionByQuality[iii] = 0L;
|
||||
// titvByQuality[iii] = 0.0;
|
||||
// }
|
||||
//
|
||||
// int maxQual = 0;
|
||||
//
|
||||
// // Calculate the maximum quality score in order to normalize and histogram
|
||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
// if( qual > maxQual ) {
|
||||
// maxQual = qual;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
|
||||
//
|
||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||
// final int index = (int)Math.floor( ((double) qual) / binSize );
|
||||
// if( index >= 0 ) { // BUGBUG: why is there overflow here?
|
||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
|
||||
// transitionByQuality[index] += transitionPair.getFirst();
|
||||
// transversionByQuality[index] += transitionPair.getSecond();
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
||||
// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
||||
// } else {
|
||||
// titvByQuality[iii] = 0.0;
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// class AlleleCountStats extends TableType {
|
||||
// final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
||||
// final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
||||
//
|
||||
// public Object[] getRowKeys() {
|
||||
// final int NUM_BINS = qualityListMap.keySet().size();
|
||||
// final String rowKeys[] = new String[NUM_BINS];
|
||||
// int iii = 0;
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// rowKeys[iii] = "AC" + key;
|
||||
// iii++;
|
||||
// }
|
||||
// return rowKeys;
|
||||
//
|
||||
// }
|
||||
//
|
||||
// public Object[] getColumnKeys() {
|
||||
// return new String[]{"alleleCount","avgQual"};
|
||||
// }
|
||||
//
|
||||
// public String getCell(int x, int y) {
|
||||
// int iii = 0;
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// if(iii == x) {
|
||||
// if(y == 0) { return String.valueOf(key); }
|
||||
// else { return String.valueOf(qualityMap.get(key)); }
|
||||
// }
|
||||
// iii++;
|
||||
// }
|
||||
// return null;
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// String returnString = "";
|
||||
// // output the quality map
|
||||
// returnString += "AlleleCountStats: ";
|
||||
// //for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||
// // returnString += titvByQuality[iii] + " ";
|
||||
// //}
|
||||
// return returnString;
|
||||
// }
|
||||
//
|
||||
// public void incrValue( final double qual, final int alleleCount ) {
|
||||
// ArrayList<Double> list = qualityListMap.get(alleleCount);
|
||||
// if(list==null) { list = new ArrayList<Double>(); }
|
||||
// list.add(qual);
|
||||
// qualityListMap.put(alleleCount, list);
|
||||
// }
|
||||
//
|
||||
// public void organizeAlleleCountTables() {
|
||||
// for( final Integer key : qualityListMap.keySet() ) {
|
||||
// final ArrayList<Double> list = qualityListMap.get(key);
|
||||
// double meanQual = 0.0;
|
||||
// final double numQuals = (double)list.size();
|
||||
// for( Double qual : list ) {
|
||||
// meanQual += qual / numQuals;
|
||||
// }
|
||||
// qualityMap.put(key, meanQual);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// //public VariantQualityScore(VariantEvalWalker parent) {
|
||||
// //super(parent);
|
||||
// //}
|
||||
//
|
||||
// public String getName() {
|
||||
// return "VariantQualityScore";
|
||||
// }
|
||||
//
|
||||
// public int getComparisonOrder() {
|
||||
// return 1; // we only need to see each eval track
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// return getName();
|
||||
// }
|
||||
//
|
||||
// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// final String interesting = null;
|
||||
//
|
||||
// if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||
// if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||
// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||
//
|
||||
// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
||||
// int alternateAlleleCount = 0;
|
||||
// for (final Allele a : eval.getAlternateAlleles()) {
|
||||
// alternateAlleleCount += eval.getCalledChrCount(a);
|
||||
// }
|
||||
// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
|
||||
// }
|
||||
//
|
||||
// return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
// }
|
||||
//
|
||||
// public void finalizeEvaluation() {
|
||||
// if( titvStats != null ) {
|
||||
// titvStats.organizeTiTvTables();
|
||||
// }
|
||||
// if( alleleCountStats != null ) {
|
||||
// alleleCountStats.organizeAlleleCountTables();
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
|||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
|
@ -169,8 +170,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
}
|
||||
}
|
||||
|
||||
@Override public boolean enabled() { return true; }
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we only need to see each eval track
|
||||
}
|
||||
|
|
@ -207,8 +206,8 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
return false;
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( eval == null || eval.isMonomorphicInSamples() ) return;
|
||||
|
||||
final Type type = getType(eval);
|
||||
|
||||
|
|
@ -243,14 +242,12 @@ public class VariantSummary extends VariantEvaluator implements StandardEval {
|
|||
depthPerSample.inc(type, g.getSampleName());
|
||||
}
|
||||
}
|
||||
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
private String noveltyRate(Type type) {
|
||||
final int all = allVariantCounts.all(type);
|
||||
final int known = knownVariantCounts.all(type);
|
||||
return formattedNoveltyRate(known, all);
|
||||
return Utils.formattedNoveltyRate(known, all);
|
||||
}
|
||||
|
||||
public void finalizeEvaluation() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,361 @@
|
|||
//package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.genotypePhasingEvaluator;
|
||||
//
|
||||
//import org.apache.log4j.Logger;
|
||||
//import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
//import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
//import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair;
|
||||
//import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker;
|
||||
//import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
//import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator;
|
||||
//import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
|
||||
//import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
|
||||
//import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext;
|
||||
//import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
//import org.broadinstitute.sting.utils.MathUtils;
|
||||
//import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
//import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
//import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
//
|
||||
//import java.util.HashMap;
|
||||
//import java.util.HashSet;
|
||||
//import java.util.Set;
|
||||
//
|
||||
///*
|
||||
// * Copyright (c) 2010 The Broad Institute
|
||||
// *
|
||||
// * Permission is hereby granted, free of charge, to any person
|
||||
// * obtaining a copy of this software and associated documentation
|
||||
// * files (the "Software"), to deal in the Software without
|
||||
// * restriction, including without limitation the rights to use,
|
||||
// * copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// * copies of the Software, and to permit persons to whom the
|
||||
// * Software is furnished to do so, subject to the following
|
||||
// * conditions:
|
||||
// *
|
||||
// * The above copyright notice and this permission notice shall be
|
||||
// * included in all copies or substantial portions of the Software.
|
||||
// *
|
||||
// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
// * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
// */
|
||||
//
|
||||
//@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks")
|
||||
//public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||
// protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class);
|
||||
//
|
||||
// // a mapping from sample to stats
|
||||
// @DataPoint(description = "the phasing statistics for each sample")
|
||||
// public SamplePhasingStatistics samplePhasingStatistics = null;
|
||||
//
|
||||
// SamplePreviousGenotypes samplePrevGenotypes = null;
|
||||
//
|
||||
// double minPhaseQuality = 10.0;
|
||||
//
|
||||
// public void initialize(VariantEvalWalker walker) {
|
||||
// super.initialize(walker);
|
||||
// this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
|
||||
// this.samplePrevGenotypes = new SamplePreviousGenotypes();
|
||||
// }
|
||||
//
|
||||
// public String getName() {
|
||||
// return "GenotypePhasingEvaluator";
|
||||
// }
|
||||
//
|
||||
// public int getComparisonOrder() {
|
||||
// return 2; // we only need to see pairs of (comp, eval)
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// return getName() + ": <table>";
|
||||
// }
|
||||
//
|
||||
// public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
// update2(eval,comp,tracker,ref,context,null);
|
||||
// }
|
||||
//
|
||||
// public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, EvaluationContext group) {
|
||||
// //public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||
// Reasons interesting = new Reasons();
|
||||
// if (ref == null)
|
||||
// return interesting.toString();
|
||||
// GenomeLoc curLocus = ref.getLocus();
|
||||
//
|
||||
// logger.debug("update2() locus: " + curLocus);
|
||||
// logger.debug("comp = " + comp + " eval = " + eval);
|
||||
//
|
||||
// Set<String> allSamples = new HashSet<String>();
|
||||
//
|
||||
// GenotypesContext compSampGenotypes = null;
|
||||
// if (isRelevantToPhasing(comp)) {
|
||||
// allSamples.addAll(comp.getSampleNames());
|
||||
// compSampGenotypes = comp.getGenotypes();
|
||||
// }
|
||||
//
|
||||
// GenotypesContext evalSampGenotypes = null;
|
||||
// if (isRelevantToPhasing(eval)) {
|
||||
// allSamples.addAll(eval.getSampleNames());
|
||||
// evalSampGenotypes = eval.getGenotypes();
|
||||
// }
|
||||
//
|
||||
// for (String samp : allSamples) {
|
||||
// logger.debug("sample = " + samp);
|
||||
//
|
||||
// Genotype compSampGt = null;
|
||||
// if (compSampGenotypes != null)
|
||||
// compSampGt = compSampGenotypes.get(samp);
|
||||
//
|
||||
// Genotype evalSampGt = null;
|
||||
// if (evalSampGenotypes != null)
|
||||
// evalSampGt = evalSampGenotypes.get(samp);
|
||||
//
|
||||
// if (compSampGt == null || evalSampGt == null || compSampGt.isNoCall() || evalSampGt.isNoCall()) { // Since either comp or eval (or both) are missing the site, the best we can do is hope to preserve phase [if the non-missing one preserves phase]
|
||||
// // Having an unphased site breaks the phasing for the sample [does NOT permit "transitive phasing"] - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]:
|
||||
// if (isNonNullButUnphased(compSampGt) || isNonNullButUnphased(evalSampGt))
|
||||
// samplePrevGenotypes.put(samp, null);
|
||||
// }
|
||||
// else { // Both comp and eval have a non-null Genotype at this site:
|
||||
// AllelePair compAllelePair = new AllelePair(compSampGt);
|
||||
// AllelePair evalAllelePair = new AllelePair(evalSampGt);
|
||||
//
|
||||
// boolean breakPhasing = false;
|
||||
// if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
|
||||
// breakPhasing = true; // since they are not both het or both hom
|
||||
// else { // both are het, or both are hom:
|
||||
// boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair));
|
||||
// boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair));
|
||||
// if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
|
||||
// breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample
|
||||
// }
|
||||
//
|
||||
// if (breakPhasing) {
|
||||
// samplePrevGenotypes.put(samp, null); // nothing to do for this site, AND must remove any history for the future
|
||||
// }
|
||||
// else if (compSampGt.isHet() && evalSampGt.isHet()) {
|
||||
// /* comp and eval have the HET same Genotype at this site:
|
||||
// [Note that if both are hom, then nothing is done here, but the het history IS preserved].
|
||||
// */
|
||||
// CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
|
||||
// if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
|
||||
// prevCompAndEval = null;
|
||||
//
|
||||
// // Replace the previous hets with the current hets:
|
||||
// samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
|
||||
//
|
||||
// if (prevCompAndEval != null) {
|
||||
// GenomeLoc prevLocus = prevCompAndEval.getLocus();
|
||||
// logger.debug("Potentially phaseable het locus: " + curLocus + " [relative to previous het locus: " + prevLocus + "]");
|
||||
// PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
|
||||
//
|
||||
// boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
|
||||
// boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt);
|
||||
// if (compSampIsPhased || evalSampIsPhased) {
|
||||
// if (!evalSampIsPhased) {
|
||||
// ps.onlyCompPhased++;
|
||||
// //interesting.addReason("ONLY_COMP", samp, group, prevLocus, "");
|
||||
// }
|
||||
// else if (!compSampIsPhased) {
|
||||
// ps.onlyEvalPhased++;
|
||||
// //interesting.addReason("ONLY_EVAL", samp, group, prevLocus, "");
|
||||
// }
|
||||
// else { // both comp and eval are phased:
|
||||
// AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye());
|
||||
// AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype());
|
||||
//
|
||||
// // Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample:
|
||||
// boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair));
|
||||
// boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair));
|
||||
//
|
||||
// if (topsMatch || topMatchesBottom) {
|
||||
// ps.phasesAgree++;
|
||||
//
|
||||
// Double compPQ = getPQ(compSampGt);
|
||||
// Double evalPQ = getPQ(evalSampGt);
|
||||
// if (compPQ != null && evalPQ != null && MathUtils.compareDoubles(compPQ, evalPQ) != 0) {
|
||||
// //interesting.addReason("PQ_CHANGE", samp, group, prevLocus, compPQ + " -> " + evalPQ);
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// ps.phasesDisagree++;
|
||||
// logger.debug("SWITCHED locus: " + curLocus);
|
||||
// //interesting.addReason("SWITCH", samp, group, prevLocus, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else {
|
||||
// ps.neitherPhased++;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// logger.debug("\n" + samplePhasingStatistics + "\n");
|
||||
//
|
||||
// return interesting.toString();
|
||||
// }
|
||||
//
|
||||
// public static boolean isRelevantToPhasing(VariantContext vc) {
|
||||
// return (vc != null && !vc.isFiltered());
|
||||
// }
|
||||
//
|
||||
// public boolean isNonNullButUnphased(Genotype gt) {
|
||||
// return (gt != null && !gt.isNoCall() && !genotypesArePhasedAboveThreshold(gt));
|
||||
// }
|
||||
//
|
||||
// public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
|
||||
// if (gt.isHom()) // Can always consider a hom site to be phased to its predecessor, since its successor will only be phased to it if it's hom or "truly" phased
|
||||
// return true;
|
||||
//
|
||||
// if (!gt.isPhased())
|
||||
// return false;
|
||||
//
|
||||
// Double pq = getPQ(gt);
|
||||
// return (pq == null || pq >= minPhaseQuality);
|
||||
// }
|
||||
//
|
||||
// public static Double getPQ(Genotype gt) {
|
||||
// Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||
// return d == -1 ? null : d;
|
||||
// }
|
||||
//
|
||||
// public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||
// return b1.getTopAllele().equals(b2.getTopAllele());
|
||||
// }
|
||||
//
|
||||
// public static boolean topMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||
// return b1.getTopAllele().equals(b2.getBottomAllele());
|
||||
// }
|
||||
//
|
||||
// public static boolean bottomMatchesTop(AllelePair b1, AllelePair b2) {
|
||||
// return topMatchesBottom(b2, b1);
|
||||
// }
|
||||
//
|
||||
// public static boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||
// return b1.getBottomAllele().equals(b2.getBottomAllele());
|
||||
// }
|
||||
//
|
||||
// public String toString(AllelePair prev, AllelePair cur) {
|
||||
// return prev.getTopAllele().getBaseString() + "+" + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "+" + cur.getBottomAllele().getBaseString();
|
||||
// }
|
||||
//
|
||||
// public void finalizeEvaluation() {
|
||||
// }
|
||||
//
|
||||
// private static class Reasons {
|
||||
// private StringBuilder sb;
|
||||
//
|
||||
// public Reasons() {
|
||||
// sb = new StringBuilder();
|
||||
// }
|
||||
//
|
||||
//// public void addReason(String category, String sample, VariantEvalWalker.EvaluationContext evalGroup, GenomeLoc prevLoc, String reason) {
|
||||
//// sb.append(category + "(" + sample + ", previous: " + prevLoc + " [" + evalGroup.compTrackName + ", " + evalGroup.evalTrackName + "]): " + reason + ";");
|
||||
//// }
|
||||
//
|
||||
// public String toString() {
|
||||
// if (sb.length() == 0)
|
||||
// return null;
|
||||
//
|
||||
// return "reasons=" + sb.toString();
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//class CompEvalGenotypes {
|
||||
// private GenomeLoc loc;
|
||||
// private Genotype compGt;
|
||||
// private Genotype evalGt;
|
||||
//
|
||||
// public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||
// this.loc = loc;
|
||||
// this.compGt = compGt;
|
||||
// this.evalGt = evalGt;
|
||||
// }
|
||||
//
|
||||
// public GenomeLoc getLocus() {
|
||||
// return loc;
|
||||
// }
|
||||
//
|
||||
// public Genotype getCompGenotpye() {
|
||||
// return compGt;
|
||||
// }
|
||||
// public Genotype getEvalGenotype() {
|
||||
// return evalGt;
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//class SamplePreviousGenotypes {
|
||||
// private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||
//
|
||||
// public SamplePreviousGenotypes() {
|
||||
// this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||
// }
|
||||
//
|
||||
// public CompEvalGenotypes get(String sample) {
|
||||
// return sampleGenotypes.get(sample);
|
||||
// }
|
||||
//
|
||||
// public void put(String sample, CompEvalGenotypes compEvalGts) {
|
||||
// sampleGenotypes.put(sample, compEvalGts);
|
||||
// }
|
||||
//
|
||||
// public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
|
||||
// sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
|
||||
// }
|
||||
//}
|
||||
//
|
||||
//class PhaseStats {
|
||||
// public int neitherPhased;
|
||||
// public int onlyCompPhased;
|
||||
// public int onlyEvalPhased;
|
||||
// public int phasesAgree;
|
||||
// public int phasesDisagree;
|
||||
//
|
||||
// public PhaseStats() {
|
||||
// this.neitherPhased = 0;
|
||||
// this.onlyCompPhased = 0;
|
||||
// this.onlyEvalPhased = 0;
|
||||
// this.phasesAgree = 0;
|
||||
// this.phasesDisagree = 0;
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// StringBuilder sb = new StringBuilder();
|
||||
// sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
|
||||
// return sb.toString();
|
||||
// }
|
||||
//
|
||||
// public static String[] getFieldNamesArray() {
|
||||
// return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
|
||||
// }
|
||||
//
|
||||
// public Object getField(int index) {
|
||||
// switch (index) {
|
||||
// case (0):
|
||||
// return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
|
||||
// case (1):
|
||||
// return neitherPhased;
|
||||
// case (2):
|
||||
// return onlyCompPhased;
|
||||
// case (3):
|
||||
// return onlyEvalPhased;
|
||||
// case (4):
|
||||
// return (phasesAgree + phasesDisagree);
|
||||
// case (5):
|
||||
// return phasesAgree;
|
||||
// case (6):
|
||||
// return phasesDisagree;
|
||||
// case (7):
|
||||
// return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
|
||||
// default:
|
||||
// return -1;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
//
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
///*
|
||||
// * Copyright (c) 2012, The Broad Institute
|
||||
// *
|
||||
// * Permission is hereby granted, free of charge, to any person
|
||||
// * obtaining a copy of this software and associated documentation
|
||||
// * files (the "Software"), to deal in the Software without
|
||||
// * restriction, including without limitation the rights to use,
|
||||
// * copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// * copies of the Software, and to permit persons to whom the
|
||||
// * Software is furnished to do so, subject to the following
|
||||
// * conditions:
|
||||
// *
|
||||
// * The above copyright notice and this permission notice shall be
|
||||
// * included in all copies or substantial portions of the Software.
|
||||
// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// * OTHER DEALINGS IN THE SOFTWARE.
|
||||
// */
|
||||
//
|
||||
//package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.genotypePhasingEvaluator;
|
||||
//
|
||||
//import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
|
||||
//
|
||||
//import java.util.HashMap;
|
||||
//import java.util.Map;
|
||||
//
|
||||
///**
|
||||
// * a table of sample names to genotype phasing statistics
|
||||
// */
|
||||
//class SamplePhasingStatistics extends TableType {
|
||||
// private HashMap<String, PhaseStats> sampleStats = null;
|
||||
// private double minPhaseQuality;
|
||||
//
|
||||
// public SamplePhasingStatistics(double minPhaseQuality) {
|
||||
// this.sampleStats = new HashMap<String, PhaseStats>();
|
||||
// this.minPhaseQuality = minPhaseQuality;
|
||||
// }
|
||||
//
|
||||
// public PhaseStats ensureSampleStats(String samp) {
|
||||
// PhaseStats ps = sampleStats.get(samp);
|
||||
// if (ps == null) {
|
||||
// ps = new PhaseStats();
|
||||
// sampleStats.put(samp, ps);
|
||||
// }
|
||||
// return ps;
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * @return one row per sample
|
||||
// */
|
||||
// public String[] getRowKeys() {
|
||||
// return sampleStats.keySet().toArray(new String[sampleStats.size()]);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * get the column keys
|
||||
// *
|
||||
// * @return a list of objects, in this case strings, that are the column names
|
||||
// */
|
||||
// public String[] getColumnKeys() {
|
||||
// return PhaseStats.getFieldNamesArray();
|
||||
// }
|
||||
//
|
||||
// public Object getCell(int x, int y) {
|
||||
// String[] rowKeys = getRowKeys();
|
||||
// PhaseStats ps = sampleStats.get(rowKeys[x]);
|
||||
// return ps.getField(y);
|
||||
// }
|
||||
//
|
||||
// public String getName() {
|
||||
// return "Sample Phasing Statistics (for PQ >= " + minPhaseQuality + ")";
|
||||
// }
|
||||
//
|
||||
// public String toString() {
|
||||
// StringBuilder sb = new StringBuilder();
|
||||
// for (Map.Entry<String, PhaseStats> sampPhaseStatsEnt : sampleStats.entrySet()) {
|
||||
// String sample = sampPhaseStatsEnt.getKey();
|
||||
// PhaseStats ps = sampPhaseStatsEnt.getValue();
|
||||
//
|
||||
// sb.append(sample + "\t" + ps);
|
||||
// }
|
||||
// return sb.toString();
|
||||
// }
|
||||
//}
|
||||
|
|
@ -41,15 +41,15 @@ public class AlleleCount extends VariantStratifier {
|
|||
|
||||
public List<Object> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||
if (eval != null) {
|
||||
int AC = -1;
|
||||
int AC = 0; // by default, the site is considered monomorphic
|
||||
|
||||
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
||||
AC = eval.getAttributeAsInt("AC", 0);
|
||||
} else if ( eval.isVariant() ) {
|
||||
for (Allele allele : eval.getAlternateAlleles())
|
||||
AC = Math.max(AC, eval.getCalledChrCount(allele));
|
||||
} else
|
||||
// by default, the site is considered monomorphic
|
||||
AC = 0;
|
||||
}
|
||||
|
||||
return Collections.singletonList((Object) AC);
|
||||
} else {
|
||||
return Collections.emptyList();
|
||||
|
|
@ -60,4 +60,9 @@ public class AlleleCount extends VariantStratifier {
|
|||
public Set<Class<? extends VariantEvaluator>> getIncompatibleEvaluators() {
|
||||
return new HashSet<Class<? extends VariantEvaluator>>(Arrays.asList(VariantSummary.class));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFormat() {
|
||||
return "%d";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import java.util.List;
|
|||
*/
|
||||
public class IndelSize extends VariantStratifier {
|
||||
static final int MAX_INDEL_SIZE = 100;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
for( int a=-MAX_INDEL_SIZE; a <=MAX_INDEL_SIZE; a++ ) {
|
||||
|
|
|
|||
|
|
@ -64,7 +64,9 @@ public abstract class VariantStratifier implements Comparable<VariantStratifier>
|
|||
public final String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
|
||||
public String getFormat() { return "%s"; }
|
||||
|
||||
public final ArrayList<Object> getAllStates() {
|
||||
return states;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,4 +7,5 @@ import java.lang.annotation.RetentionPolicy;
|
|||
public @interface Analysis {
|
||||
String name() default ""; // its description, required
|
||||
String description(); // its description, required
|
||||
boolean molten() default false; // if true we'll look for a @Molten map
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,10 @@ public class AnalysisModuleScanner {
|
|||
// what we extracted from the class
|
||||
private Map<Field, DataPoint> datums = new LinkedHashMap<Field, DataPoint>(); // the data we've discovered
|
||||
private Analysis analysis; // the analysis annotation
|
||||
|
||||
|
||||
private Field moltenField = null;
|
||||
private Molten moltenAnnotation = null;
|
||||
|
||||
// private storage of the class type
|
||||
private final Class cls;
|
||||
|
||||
|
|
@ -85,14 +88,38 @@ public class AnalysisModuleScanner {
|
|||
private void scanFields() {
|
||||
// get the fields from the class, and extract
|
||||
for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) {
|
||||
for (Field f : superCls.getDeclaredFields())
|
||||
for (Field f : superCls.getDeclaredFields()) {
|
||||
for (Annotation annotation : getAnnotations(f)) {
|
||||
if (annotation.annotationType().equals(DataPoint.class))
|
||||
datums.put(f,(DataPoint) annotation);
|
||||
if ( annotation.annotationType().equals(Molten.class)) {
|
||||
if ( hasMoltenField() )
|
||||
throw new ReviewedStingException("Analysis " + analysis.name() + " has multiple @Molten fields, which is forbidden");
|
||||
moltenField = f;
|
||||
moltenAnnotation = (Molten)annotation;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( hasMoltenField() ) {
|
||||
if ( datums.size() > 0 )
|
||||
throw new ReviewedStingException("Analysis " + analysis.name() + " has an @Molten field as well as @DataPoint fields, which is forbidden");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Field getMoltenField() {
|
||||
return moltenField;
|
||||
}
|
||||
|
||||
public Molten getMoltenAnnotation() {
|
||||
return moltenAnnotation;
|
||||
}
|
||||
|
||||
public boolean hasMoltenField() {
|
||||
return getMoltenField() != null;
|
||||
}
|
||||
|
||||
private Annotation[] getAnnotations(final Field field) {
|
||||
final String fieldName = field.toString();
|
||||
Annotation[] annotations = annotationCache.get(fieldName);
|
||||
|
|
|
|||
|
|
@ -25,9 +25,9 @@ public final class EvaluationContext {
|
|||
eval.initialize(walker);
|
||||
evaluationInstances.add(eval);
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
|
||||
throw new ReviewedStingException("Unable to instantiate eval module '" + c.getSimpleName() + "'", e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("Illegal access error when trying to instantiate eval module '" + c.getSimpleName() + "'");
|
||||
throw new ReviewedStingException("Illegal access error when trying to instantiate eval module '" + c.getSimpleName() + "'", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,113 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
||||
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Simple utility for histogramming indel lengths
|
||||
*
|
||||
* Based on code from chartl
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 3/21/12
|
||||
*/
|
||||
public class IndelHistogram extends TableType {
|
||||
private final boolean asFrequencies;
|
||||
int nIndels = 0, nTooBigDeletions = 0, nTooBigInsertions = 0;
|
||||
private final Integer[] rowKeys;
|
||||
|
||||
private Map<Integer, Double> frequencies = null;
|
||||
private final Map<Integer, Integer> counts = new HashMap<Integer, Integer>();
|
||||
|
||||
public IndelHistogram(int maxSize, boolean asFrequencies) {
|
||||
this.asFrequencies = asFrequencies;
|
||||
initializeCounts(maxSize);
|
||||
this.rowKeys = new ArrayList<Integer>(counts.keySet()).toArray(new Integer[maxSize]);
|
||||
}
|
||||
|
||||
private void initializeCounts(int size) {
|
||||
for ( int i = -size; i <= size; i++ ) {
|
||||
if ( i != 0 ) counts.put(i, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getRowName() {
|
||||
return "Length";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] getColumnKeys() {
|
||||
return new String[]{"Count"};
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object[] getRowKeys() {
|
||||
return rowKeys;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getCell(int row, int col) {
|
||||
final int key = (Integer)getRowKeys()[row];
|
||||
if ( asFrequencies ) {
|
||||
if ( frequencies == null ) {
|
||||
frequencies = new HashMap<Integer, Double>();
|
||||
for ( final int len : counts.keySet() ) {
|
||||
final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels);
|
||||
frequencies.put(len, value);
|
||||
}
|
||||
}
|
||||
return frequencies.get(key);
|
||||
}
|
||||
return counts.get(key);
|
||||
}
|
||||
|
||||
public int getnTooBigDeletions() {
|
||||
return nTooBigDeletions;
|
||||
}
|
||||
|
||||
public int getnTooBigInsertions() {
|
||||
return nTooBigInsertions;
|
||||
}
|
||||
|
||||
public void update(final Allele ref, final Allele alt) {
|
||||
final int alleleSize = alt.length() - ref.length();
|
||||
update(alleleSize);
|
||||
}
|
||||
|
||||
public void update(int len) {
|
||||
if ( counts.containsKey(len) ) {
|
||||
nIndels++;
|
||||
counts.put(len, counts.get(len) + 1);
|
||||
} else if ( len < 0 ) {
|
||||
nTooBigDeletions++;
|
||||
} else {
|
||||
nTooBigInsertions++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
||||
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
|
||||
/**
|
||||
* @Molten for @Analysis modules.
|
||||
*
|
||||
* If you are flagged as a molten analysis, then there must be one and
|
||||
* only one annotation in that evaluation module: @Molten which
|
||||
* must have time Map<Object, Object>. This data set will then
|
||||
* be represented in the VE output as:
|
||||
*
|
||||
* variable value
|
||||
* key1 value1
|
||||
* key2 value1
|
||||
* ...
|
||||
* keyN valueN
|
||||
*
|
||||
* in the output table
|
||||
*/
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
public @interface Molten {
|
||||
String description() default ""; // the description, optional
|
||||
String variableFormat() default "";
|
||||
String valueFormat() default "";
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.util;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class TableType
|
||||
*
|
||||
* an interface for turning arbritary objects into tables
|
||||
*/
|
||||
public abstract class TableType {
|
||||
public abstract Object[] getRowKeys();
|
||||
public abstract Object[] getColumnKeys();
|
||||
public abstract Object getCell(int x, int y);
|
||||
public String getName() { return getClass().getSimpleName(); }
|
||||
public String getRowName() { return "row"; }
|
||||
|
||||
}
|
||||
|
|
@ -180,56 +180,6 @@ public class VariantEvalUtils {
|
|||
return evals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the output report
|
||||
*
|
||||
* @param stratificationObjects the stratifications to use
|
||||
* @param evaluationObjects the evaluations to use
|
||||
* @return an initialized report object
|
||||
*/
|
||||
public GATKReport initializeGATKReport(Collection<VariantStratifier> stratificationObjects, Set<Class<? extends VariantEvaluator>> evaluationObjects) {
|
||||
final GATKReport report = new GATKReport();
|
||||
|
||||
for (Class<? extends VariantEvaluator> ve : evaluationObjects) {
|
||||
final String tableName = ve.getSimpleName();
|
||||
final String tableDesc = ve.getAnnotation(Analysis.class).description();
|
||||
|
||||
report.addTable(tableName, tableDesc);
|
||||
|
||||
final GATKReportTable table = report.getTable(tableName);
|
||||
table.addPrimaryKey("entry", false);
|
||||
table.addColumn(tableName, tableName);
|
||||
|
||||
for (final VariantStratifier vs : stratificationObjects) {
|
||||
final String columnName = vs.getName();
|
||||
table.addColumn(columnName, "unknown");
|
||||
}
|
||||
|
||||
try {
|
||||
final VariantEvaluator vei = ve.newInstance();
|
||||
vei.initialize(variantEvalWalker);
|
||||
|
||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei);
|
||||
Map<Field, DataPoint> datamap = scanner.getData();
|
||||
|
||||
for (Field field : datamap.keySet()) {
|
||||
field.setAccessible(true);
|
||||
|
||||
if (!(field.get(vei) instanceof TableType)) {
|
||||
final String format = datamap.get(field).format();
|
||||
table.addColumn(field.getName(), true, format);
|
||||
}
|
||||
}
|
||||
} catch (InstantiationException e) {
|
||||
throw new StingException("InstantiationException: " + e);
|
||||
} catch (IllegalAccessException e) {
|
||||
throw new StingException("IllegalAccessException: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
return report;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subset a VariantContext to a single sample
|
||||
*
|
||||
|
|
|
|||
|
|
@ -743,4 +743,37 @@ public class Utils {
|
|||
return nStates;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats the novelty rate as a %.2f string
|
||||
*
|
||||
* @param known number of variants from all that are known
|
||||
* @param all number of all variants
|
||||
* @return a String novelty rate, or NA if all == 0
|
||||
*/
|
||||
public static String formattedNoveltyRate(final int known, final int all) {
|
||||
return formattedPercent(all - known, all);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats the novelty rate as a %.2f string
|
||||
*
|
||||
* @param x number of objects part of total that meet some criteria
|
||||
* @param total count of all objects, including x
|
||||
* @return a String percent rate, or NA if total == 0
|
||||
*/
|
||||
public static String formattedPercent(final long x, final long total) {
|
||||
return total == 0 ? "NA" : String.format("%.2f", (100.0*x) / total);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience function that formats a ratio as a %.2f string
|
||||
*
|
||||
* @param num number of observations in the numerator
|
||||
* @param denom number of observations in the denumerator
|
||||
* @return a String formatted ratio, or NA if all == 0
|
||||
*/
|
||||
public static String formattedRatio(final long num, final long denom) {
|
||||
return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public abstract class BaseTest {
|
|||
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
||||
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
||||
|
||||
public static final boolean REQUIRE_NETWORK_CONNECTION = false;
|
||||
public static final boolean REQUIRE_NETWORK_CONNECTION = true;
|
||||
public static final String networkTempDir;
|
||||
public static final File networkTempDirFile;
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("add8b2213c091a41f5d7a2c8dd68c03a")
|
||||
Arrays.asList("e87932ffa1d310cecee49e7829a0f056")
|
||||
);
|
||||
executeTest("testFunctionClassWithSnpeff", spec);
|
||||
}
|
||||
|
|
@ -75,7 +75,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("621a712deb01e7fc7e5a13d3627b11ba")
|
||||
Arrays.asList("8279ee42a6785f9c2b3dda8d82674e00")
|
||||
);
|
||||
executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
|
||||
}
|
||||
|
|
@ -95,7 +95,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("7a726ecbedd722fa7cd4de3e023b7a82")
|
||||
Arrays.asList("0bac64d5615f901d3005247c6d016549")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndels", spec);
|
||||
}
|
||||
|
|
@ -116,7 +116,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("95bb4a4267a8f29dd7a8169561499f20")
|
||||
Arrays.asList("b84d8b4429116c887ceb5489c8782f00")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
|
||||
}
|
||||
|
|
@ -138,7 +138,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9b51029083495935823fb0447a2857b9")
|
||||
Arrays.asList("e4f37642d9113a65fbe8bc1d091c206f")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
|
||||
}
|
||||
|
|
@ -159,7 +159,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("318b5fbbc61e2fc11d49369359812edd")
|
||||
Arrays.asList("c5412ee824b4815dc8eea62a4c5462ef")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec);
|
||||
}
|
||||
|
|
@ -180,7 +180,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("74c02df2ef69dda231a2aec2a948747b")
|
||||
Arrays.asList("1d42e97643afd3e7f5f8c9f6416c5883")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec);
|
||||
}
|
||||
|
|
@ -201,7 +201,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("2d97b1fe15e532e89803ba7ba347ff20")
|
||||
Arrays.asList("8c2ba70bed2f0fdb0ca371f7038819ef")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec);
|
||||
}
|
||||
|
|
@ -222,7 +222,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("474cbc231ddbc4ba299ffe61a17405b6")
|
||||
Arrays.asList("c912b4b0bf1925d042119b301c183b93")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec);
|
||||
}
|
||||
|
|
@ -245,7 +245,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("2cc9bc4bbe8b4edb6dc27642ec41f66e")
|
||||
Arrays.asList("dea3d2cc53265ff8ed2f0030c40f3747")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec);
|
||||
}
|
||||
|
|
@ -270,7 +270,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("00c94cf3e14bc2855d39bbefa27f9bb2")
|
||||
Arrays.asList("dede22b15936c38e29b850c805c7b706")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec);
|
||||
}
|
||||
|
|
@ -289,7 +289,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("a0c0d4805db1245aa30a306aa506096f")
|
||||
Arrays.asList("9a94c4c613bf69feb3d9579c353baaf2")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsNoCompRod", spec);
|
||||
}
|
||||
|
|
@ -312,7 +312,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
|
||||
1,
|
||||
Arrays.asList("70da6a0f91a9f1052d68fc360cc99aed"));
|
||||
Arrays.asList("9bbc762f459023af0480774eb2986af4"));
|
||||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
||||
|
|
@ -330,7 +330,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2282523336c24d434d1cc0eb1697b4f9"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("8a7f23063fd7f3a292e5da36778e109e"));
|
||||
executeTestParallel("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
||||
|
|
@ -348,7 +348,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCompOverlap() {
|
||||
String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("00241ce70476187a2f910606b9242697"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("98f9c2f5fef43dbda688d32360908615"));
|
||||
executeTestParallel("testCompOverlap",spec);
|
||||
}
|
||||
|
||||
|
|
@ -360,7 +360,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" --dbsnp " + b37dbSNP132 +
|
||||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ec321fcc424fbad74a4a74e739173d03"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70b0e5b154f3e59e06188e876bbf083f"));
|
||||
executeTestParallel("testEvalTrackWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -372,7 +372,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" +
|
||||
" --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" +
|
||||
" -noST -ST Novelty -o %s";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ccaea6245086552cd63f828eabddfaf3"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9fe68cc45d9afc5210ccfc8d555722fd"));
|
||||
executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec);
|
||||
}
|
||||
|
||||
|
|
@ -389,13 +389,13 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" -noST -noEV -ST Novelty -EV CompOverlap" +
|
||||
" -o %s";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("15f6a6ba4f7fed49c617589ce9fdcbc5"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d0218c5435c8601f2355b7d183ab032f"));
|
||||
executeTestParallel("testMultipleCompTracks",spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPerSampleAndSubsettedSampleHaveSameResults1() {
|
||||
String md5 = "bcf55537db0762b8fd68f7f02439c475";
|
||||
String md5 = "b5cd5c286d459b8edd4ca54320e560a3";
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
|
|
@ -450,7 +450,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9954c769ef37c47d3b61481ab0807be0")
|
||||
Arrays.asList("1198bfea6183bd43219071a84c79a386")
|
||||
);
|
||||
executeTest("testAlleleCountStrat", spec);
|
||||
}
|
||||
|
|
@ -471,7 +471,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("c0d69ce7647a575d166d8bab5aa16299")
|
||||
Arrays.asList("6decba040051daafad4ecad5a411e1e1")
|
||||
);
|
||||
executeTest("testIntervalStrat", spec);
|
||||
}
|
||||
|
|
@ -488,7 +488,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9a8ffb506118c1bde6f7bfadc4fb6f10")
|
||||
Arrays.asList("c428a76df5039e1e035e3ce45e819d4f")
|
||||
);
|
||||
executeTest("testModernVCFWithLargeIndels", spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue