diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java index 8b54442b0..1e798143a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java +++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumn.java @@ -132,7 +132,6 @@ public class GATKReportColumn extends LinkedHashMap { private static final Collection RIGHT_ALIGN_STRINGS = Arrays.asList( "null", "NA", - "unknown", String.valueOf(Double.POSITIVE_INFINITY), String.valueOf(Double.NEGATIVE_INFINITY), String.valueOf(Double.NaN)); @@ -214,7 +213,7 @@ public class GATKReportColumn extends LinkedHashMap { public Object put(Object key, Object value) { if (value != null) { String formatted = formatValue(value); - if (!formatted.equals("") && !formatted.equals("unknown")) { + if (!formatted.equals("")) { updateMaxWidth(formatted); updateFormat(formatted); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java new file mode 100644 index 000000000..ca659dc9e --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java @@ -0,0 +1,183 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.VariantStratifier; +import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.manager.StratificationManager; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.*; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.exceptions.StingException; + +import java.io.PrintStream; +import java.lang.reflect.Field; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +/** + * Class for writing the GATKReport for VariantEval + */ +public class VariantEvalReportWriter { + private final GATKReport report; + private final StratificationManager stratManager; + + public VariantEvalReportWriter(final StratificationManager stratManager, + final Collection stratifiers, + final Collection evaluators) { + this.stratManager = stratManager; + this.report = initializeGATKReport(stratifiers, evaluators); + } + + public final void writeReport(final PrintStream out) { + for ( int key = 0; key < stratManager.size(); key++ ) { + final String stratStateString = stratManager.getStratsAndStatesForKeyString(key); + final List> stratsAndStates = stratManager.getStratsAndStatesForKey(key); + final EvaluationContext nec = stratManager.get(key); + + for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) { + ve.finalizeEvaluation(); + final GATKReportTable table = report.getTable(ve.getSimpleName()); + + final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); + final Map datamap = scanner.getData(); + try { + if ( scanner.hasMoltenField() ) { + final Field field = scanner.getMoltenField(); + final Object fieldValue = field.get(ve); + + if ( ! (fieldValue instanceof Map) ) + throw new ReviewedStingException("BUG field " + field.getName() + " must be an instance of Map in " + scanner.getAnalysis().name()); + final Map map = (Map)fieldValue; + int counter = 0; // counter is used to ensure printing order is as defined by entrySet + for ( Map.Entry keyValue : map.entrySet() ) { + // "%05d" is a terrible hack to ensure sort order + final String moltenStratStateString = stratStateString + String.format("%05d", counter++); + setStratificationColumns(table, moltenStratStateString, stratsAndStates); + table.set(moltenStratStateString, "variable", keyValue.getKey()); + table.set(moltenStratStateString, "value", keyValue.getValue()); + } + } else { + setStratificationColumns(table, stratStateString, stratsAndStates); + for ( final Field field : datamap.keySet()) { + table.set(stratStateString, field.getName(), field.get(ve)); + } + } + } catch (IllegalAccessException e) { + throw new ReviewedStingException("BUG: analysis field not public: " + e); + } + } + } + + report.print(out); + } + + /** + * Common utility to configure a GATKReportTable columns + * + * Sets the column names to the strat names in stratsAndStates for the primary key in table + * + * @param table + * @param primaryKey + * @param stratsAndStates + */ + private final void setStratificationColumns(final GATKReportTable table, + final String primaryKey, + final List> stratsAndStates) { + for ( final Pair stratAndState : stratsAndStates ) { + final VariantStratifier vs = stratAndState.getFirst(); + final String columnName = vs.getName(); + final Object strat = stratAndState.getSecond(); + if ( columnName == null || strat == null ) + throw new ReviewedStingException("Unexpected null variant stratifier state at " + table + " key = " + primaryKey); + table.set(primaryKey, columnName, strat); + } + } + + /** + * Initialize the output report + * + * We have a set of stratifiers and evaluation objects. We need to create tables that look like: + * + * strat1 strat2 ... stratN eval1.field1 eval1.field2 ... eval1.fieldM + * + * for each eval. + * + * Note that this procedure doesn't support the creation of the old TableType system. As the + * VariantEvaluators are effectively tables themselves, we require authors to just create new + * evaluation modules externally instead of allow them to embed them in other evaluation modules + * + * @return an initialized report object + */ + public GATKReport initializeGATKReport(final Collection stratifiers, + final Collection evaluators) { + final GATKReport report = new GATKReport(); + + for (final VariantEvaluator ve : evaluators) { + final String tableName = ve.getSimpleName(); + final String tableDesc = ve.getClass().getAnnotation(Analysis.class).description(); + + report.addTable(tableName, tableDesc, true); + + final GATKReportTable table = report.getTable(tableName); + table.addPrimaryKey("entry", false); + table.addColumn(tableName, tableName); + + // create a column to hold each startifier state + for (final VariantStratifier vs : stratifiers) { + final String columnName = vs.getName(); + table.addColumn(columnName, null, vs.getFormat()); + } + + final AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); + final Map datamap = scanner.getData(); + + // deal with the molten issue + if ( scanner.hasMoltenField() ) { + table.addColumn("variable", true, scanner.getMoltenAnnotation().variableFormat()); + table.addColumn("value", true, scanner.getMoltenAnnotation().valueFormat()); + } else { + for (final Field field : datamap.keySet()) { + try { + field.setAccessible(true); + + // this is an atomic value, add a column for it + final String format = datamap.get(field).format(); + table.addColumn(field.getName(), true, format); + } catch (SecurityException e) { + throw new StingException("SecurityException: " + e); + } + } + } + } + + return report; + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 08cc4b442..a0e76cc17 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -205,9 +205,6 @@ public class VariantEvalWalker extends RodWalker implements Tr private boolean byFilterIsEnabled = false; private boolean perSampleIsEnabled = false; - // Output report - private GATKReport report = null; - // Public constants private static String ALL_SAMPLE_NAME = "all"; @@ -293,9 +290,6 @@ public class VariantEvalWalker extends RodWalker implements Tr // Initialize the evaluation contexts createStratificationStates(stratificationObjects, evaluationClasses); - // Initialize report table - report = variantEvalUtils.initializeGATKReport(stratificationObjects, evaluationClasses); - // Load ancestral alignments if (ancestralAlignmentsFile != null) { try { @@ -547,109 +541,8 @@ public class VariantEvalWalker extends RodWalker implements Tr */ public void onTraversalDone(Integer result) { logger.info("Finalizing variant report"); - - for ( int key = 0; key < stratManager.size(); key++ ) { - final String stratStateString = stratManager.getStratsAndStatesForKeyString(key); - final List> stratsAndStates = stratManager.getStratsAndStatesForKey(key); - final EvaluationContext nec = stratManager.get(key); - - for ( final VariantEvaluator ve : nec.getVariantEvaluators() ) { - ve.finalizeEvaluation(); - - AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve); - Map datamap = scanner.getData(); - - for ( final Field field : datamap.keySet()) { - try { - field.setAccessible(true); - - if (field.get(ve) instanceof TableType) { - TableType t = (TableType) field.get(ve); - - final String subTableName = ve.getSimpleName() + "." + field.getName(); - final DataPoint dataPointAnn = datamap.get(field); - - if (! report.hasTable(subTableName)) - configureNewReportTable(t, subTableName, dataPointAnn); - - final GATKReportTable table = report.getTable(subTableName); - - for (int row = 0; row < t.getRowKeys().length; row++) { - final String r = t.getRowKeys()[row].toString(); - final String newStratStateString = stratStateString + r; - - setTableColumnNames(table, newStratStateString, stratsAndStates); - - for (int col = 0; col < t.getColumnKeys().length; col++) { - final String c = t.getColumnKeys()[col].toString(); - table.set(newStratStateString, c, t.getCell(row, col)); - table.set(newStratStateString, t.getRowName(), r); - } - } - } else { - final GATKReportTable table = report.getTable(ve.getSimpleName()); - setTableColumnNames(table, stratStateString, stratsAndStates); - table.set(stratStateString, field.getName(), field.get(ve)); - } - } catch (IllegalAccessException e) { - throw new StingException("IllegalAccessException: " + e); - } - } - } - } - - report.print(out); - } - - /** - * A common utility function to set up the GATKReportTable for an embedded TableType in - * a VariantEvaluation - * - * @param t - * @param subTableName - * @param dataPointAnn - */ - @Requires({"t != null", "subTableName != null", "dataPointAnn != null", "!report.hasTable(subTableName)"}) - @Ensures({"report.hasTable(subTableName)"}) - private final void configureNewReportTable(final TableType t, final String subTableName, final DataPoint dataPointAnn) { - // basic table configuration. Set up primary key, dummy column names - report.addTable(subTableName, dataPointAnn.description()); - final GATKReportTable table = report.getTable(subTableName); - - table.addPrimaryKey("entry", false); - table.addColumn(subTableName, subTableName); - - for ( final VariantStratifier vs : stratManager.getStratifiers() ) { - table.addColumn(vs.getName(), "unknown"); - } - - table.addColumn(t.getRowName(), "unknown"); - - for ( final Object o : t.getColumnKeys() ) { - table.addColumn(o.toString(), 0.0); - } - } - - /** - * Common utility to configure a GATKReportTable columns - * - * Sets the column names to the strat names in stratsAndStates for the primary key in table - * - * @param table - * @param primaryKey - * @param stratsAndStates - */ - private final void setTableColumnNames(final GATKReportTable table, - final String primaryKey, - final List> stratsAndStates) { - for ( final Pair stratAndState : stratsAndStates ) { - final VariantStratifier vs = stratAndState.getFirst(); - final String columnName = vs.getName(); - final Object strat = stratAndState.getSecond(); - if ( columnName == null || strat == null ) - throw new ReviewedStingException("Unexpected null variant stratifier state at " + table + " key = " + primaryKey); - table.set(primaryKey, columnName, strat); - } + final VariantEvalReportWriter writer = new VariantEvalReportWriter(stratManager, stratManager.getStratifiers(), stratManager.get(0).getVariantEvaluators()); + writer.writeReport(out); } // Accessors diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java index 8ef362ba5..c14754715 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java @@ -20,22 +20,22 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(description = "The overlap between eval and comp sites") public class CompOverlap extends VariantEvaluator implements StandardEval { @DataPoint(description = "number of eval variant sites", format = "%d") - long nEvalVariants = 0; + public long nEvalVariants = 0; @DataPoint(description = "number of eval sites outside of comp sites", format = "%d") - long novelSites = 0; + public long novelSites = 0; @DataPoint(description = "number of eval sites at comp sites", format = "%d") - long nVariantsAtComp = 0; + public long nVariantsAtComp = 0; @DataPoint(description = "percentage of eval sites at comp sites", format = "%.2f" ) - double compRate = 0.0; + public double compRate = 0.0; @DataPoint(description = "number of concordant sites", format = "%d") - long nConcordant = 0; + public long nConcordant = 0; @DataPoint(description = "the concordance rate", format = "%.2f") - double concordantRate = 0.0; + public double concordantRate = 0.0; public int getComparisonOrder() { return 2; // we need to see each eval track and each comp track @@ -51,10 +51,6 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { novelSites = nNovelSites(); } - public boolean enabled() { - return true; - } - /** * Returns true if every allele in eval is also in comp * @@ -71,7 +67,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { return false; } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { boolean evalIsGood = eval != null && eval.isPolymorphicInSamples(); boolean compIsGood = comp != null && comp.isNotFiltered(); @@ -84,7 +80,5 @@ public class CompOverlap extends VariantEvaluator implements StandardEval { nConcordant++; } } - - return null; // we don't capture any interesting sites } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java index 3a2635121..73eb61110 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(description = "Counts different classes of variants in the sample") public class CountVariants extends VariantEvaluator implements StandardEval { - // the following fields are in output order: // basic counts on various rates found @@ -81,15 +80,12 @@ public class CountVariants extends VariantEvaluator implements StandardEval { return inverseRate(n, nProcessedLoci); } - public boolean enabled() { - return true; - } public int getComparisonOrder() { return 1; // we only need to see each eval track } - public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { nCalledLoci++; // Note from Eric: @@ -135,12 +131,9 @@ public class CountVariants extends VariantEvaluator implements StandardEval { } } - String refStr = vc1.getReference().getBaseString().toUpperCase(); - - String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null; -// if (aaStr.equals(".")) { -// aaStr = refStr; -// } + // these operations are ordered to ensure that we don't get the base string of the ref unless we need it + final String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null; + final String refStr = aaStr != null ? vc1.getReference().getBaseString().toUpperCase() : null; // ref aa alt class // A C A der homozygote @@ -183,8 +176,6 @@ public class CountVariants extends VariantEvaluator implements StandardEval { throw new ReviewedStingException("BUG: Unexpected genotype type: " + g); } } - - return null; // we don't capture any interesting sites } public void finalizeEvaluation() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java old mode 100755 new mode 100644 index 75aacf5ba..09315db73 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java @@ -5,12 +5,8 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; -import org.broadinstitute.sting.utils.exceptions.StingException; -import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.variantcontext.Genotype; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -41,271 +37,67 @@ import java.util.*; * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks") +/** + * a table of sample names to genotype concordance figures + */ +@Analysis(name = "Genotype Concordance Detailed", description = "Determine the genotype concordance between the genotypes in difference tracks, and concordance statistics") public class GenotypeConcordance extends VariantEvaluator { - private static final boolean PRINT_INTERESTING_SITES = true; - protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class); - // a mapping from sample to stats - @DataPoint(description = "the detailed concordance statistics for each sample") - SampleStats detailedStats = null; + @Molten(variableFormat = "%s", valueFormat = "%s") + public final Map map = new TreeMap(); - // a mapping from sample to stats summary - @DataPoint(description = "the simplified concordance statistics for each sample") - SampleSummaryStats simplifiedStats = null; + // concordance counts + private final long[][] truthByCalledGenotypeCounts; - private static final int MAX_MISSED_VALIDATION_DATA = 100; - - private boolean discordantInteresting = false; - - static class FrequencyStats extends TableType { - class Stats { - public Stats(int found, int missed) { nFound = found; nMissed = missed; } - public long nFound = 0; - public long nMissed = 0; - } - public HashMap foundMissedByAC = new HashMap(); - - public Object[] getRowKeys() { - String rows[] = new String[foundMissedByAC.size()]; - int index = 0; - for (int i : foundMissedByAC.keySet()) rows[index++] = "AlleleCount_" + i; - return rows; - } - - public Object[] getColumnKeys() { - return new String[]{"number_found", "number_missing"}; - } - - public String getName() { - return "FrequencyStats"; - } - - public String getCell(int x, int y) { - if (x >= foundMissedByAC.size()) throw new IllegalStateException(x + " is greater than the max index of " + (foundMissedByAC.size()-1)); - if (y == 0) return String.valueOf(foundMissedByAC.get(foundMissedByAC.keySet().toArray(new Integer[foundMissedByAC.size()])[x]).nFound); - else return String.valueOf(foundMissedByAC.get(foundMissedByAC.keySet().toArray(new Integer[foundMissedByAC.size()])[x]).nMissed); - } - - public void incrementFoundCount(int alleleFreq) { - if (!foundMissedByAC.containsKey(alleleFreq)) - foundMissedByAC.put(alleleFreq,new Stats(1,0)); - else - foundMissedByAC.get(alleleFreq).nFound++; - } - - public void incrementMissedCount(int alleleFreq) { - if (!foundMissedByAC.containsKey(alleleFreq)) - foundMissedByAC.put(alleleFreq,new Stats(0,1)); - else - foundMissedByAC.get(alleleFreq).nMissed++; - } - } - - static class QualityScoreHistograms extends TableType { - final static int NUM_BINS = 20; - final HashMap truePositiveQualityScoreMap = new HashMap(); // A HashMap holds all the quality scores until we are able to bin them appropriately - final HashMap falsePositiveQualityScoreMap = new HashMap(); - final int truePositiveHist[] = new int[NUM_BINS]; // the final histograms that get reported out - final int falsePositiveHist[] = new int[NUM_BINS]; - final String[] rowKeys = new String[]{"true_positive_hist", "false_positive_hist"}; - - public Object[] getRowKeys() { - return rowKeys; - } - - public Object[] getColumnKeys() { - final String columnKeys[] = new String[NUM_BINS]; - for( int iii = 0; iii < NUM_BINS; iii++ ) { - columnKeys[iii] = "histBin" + iii; - } - return columnKeys; - } - - public String getName() { - return "QualityScoreHistogram"; - } - - public String getCell(int x, int y) { - if( x == 0 ) { - return String.valueOf(truePositiveHist[y]); - } else if ( x == 1 ) { - return String.valueOf(falsePositiveHist[y]); - } else { - throw new ReviewedStingException( "Unknown row in " + getName() + ", row = " + x ); - } - } - - public String toString() { - String returnString = ""; - // output both histogram arrays - returnString += "TP: "; - for( int iii = 0; iii < NUM_BINS; iii++ ) { - returnString += truePositiveHist[iii] + " "; - } - returnString += "\nFP: "; - for( int iii = 0; iii < NUM_BINS; iii++ ) { - returnString += falsePositiveHist[iii] + " "; - } - return returnString; - } - - public void incrValue( final double qual, final boolean isTruePositiveCall ) { - HashMap qualScoreMap; - if( isTruePositiveCall ) { - qualScoreMap = truePositiveQualityScoreMap; - } else { - qualScoreMap = falsePositiveQualityScoreMap; - } - final Integer qualKey = Math.round((float) qual); - if( qualScoreMap.containsKey(qualKey) ) { - qualScoreMap.put(qualKey, qualScoreMap.get(qualKey) + 1); - } else { - qualScoreMap.put(qualKey, 1); - } - } - - public void organizeHistogramTables() { - for( int iii = 0; iii < NUM_BINS; iii++ ) { - truePositiveHist[iii] = 0; - falsePositiveHist[iii] = 0; - } - - int maxQual = 0; - - // Calculate the maximum quality score for both TP and FP calls in order to normalize and histogram - for( final Integer qual : truePositiveQualityScoreMap.keySet()) { - if( qual > maxQual ) { - maxQual = qual; - } - } - for( final Integer qual : falsePositiveQualityScoreMap.keySet()) { - if( qual > maxQual ) { - maxQual = qual; - } - } - - final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); //BUGBUG: should be normalized max to min, not max to 0 - - for( final Integer qual : truePositiveQualityScoreMap.keySet()) { - final int index = (int)Math.floor( ((double)qual) / binSize ); - if(index >= 0) { //BUGBUG: problem when maxQual is zero? - truePositiveHist[ index ] += truePositiveQualityScoreMap.get(qual); - } - } - for( final Integer qual : falsePositiveQualityScoreMap.keySet()) { - final int index = (int)Math.floor( ((double)qual) / binSize ); - if(index >= 0) { - falsePositiveHist[ index ] += falsePositiveQualityScoreMap.get(qual); - } - } - } - } - - // keep a list of the validation data we saw before the first eval data - private HashSet missedValidationData = new HashSet(); - - - //public GenotypeConcordance(VariantEvalWalker parent) { - // super(parent); - // discordantInteresting = parent.DISCORDANT_INTERESTING; - //} - - public String getName() { - return "genotypeConcordance"; + /** + * Initialize this object + */ + public GenotypeConcordance() { + final int nGenotypeTypes = Genotype.Type.values().length; + truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes]; } + @Override public int getComparisonOrder() { - return 2; // we need to see each eval track and each comp track + return 2; } - public boolean enabled() { - return true; - } - - public String toString() { - return getName() + ": "; - } - - private boolean warnedAboutValidationData = false; - - public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - String interesting = null; - + @Override + public void update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { // sanity check that we at least have either eval or validation data if ( (validation != null && !validation.hasGenotypes()) || eval == null && !isValidVC(validation)) { - return interesting; - } + return; + } else { + final boolean validationIsValidVC = isValidVC(validation); - if (detailedStats == null) { + // determine concordance for eval data if (eval != null) { - // initialize the concordance table - detailedStats = new SampleStats(eval,Genotype.Type.values().length); - simplifiedStats = new SampleSummaryStats(eval); - for (final VariantContext vc : missedValidationData) { - determineStats(null, vc); - } - missedValidationData = null; - } else { - // todo -- Eric, this results in a memory problem when eval is WEx data but you are using CG calls genome-wide - // todo -- perhaps you need should extend the evaluators with an initialize - // todo -- method that gets the header (or samples) for the first eval sites? - if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) { - if (!warnedAboutValidationData) { - //logger.warn("Too many genotype sites missed before eval site appeared; ignoring"); - warnedAboutValidationData = true; + for (final Genotype g : eval.getGenotypes() ) { + final String sample = g.getSampleName(); + final Genotype.Type called = g.getType(); + final Genotype.Type truth; + + if (!validationIsValidVC || !validation.hasGenotype(sample)) { + truth = Genotype.Type.NO_CALL; + } else { + truth = validation.getGenotype(sample).getType(); } - } else { - missedValidationData.add(validation); + + incrValue(truth, called); } - return interesting; } - } - interesting = determineStats(eval, validation); + // otherwise, mark no-calls for all samples + else { + final Genotype.Type called = Genotype.Type.NO_CALL; - return interesting; // we don't capture any interesting sites - } + for (final Genotype g : validation.getGenotypes()) { + final Genotype.Type truth = g.getType(); + incrValue(truth, called); - private String determineStats(final VariantContext eval, final VariantContext validation) { - String interesting = null; - - final boolean validationIsValidVC = isValidVC(validation); - final String evalAC = ( vcHasGoodAC(eval) ) ? String.format("evalAC%d",getAC(eval)) : null ; - final String validationAC = ( vcHasGoodAC(validation) ) ? String.format("compAC%d",getAC(validation)) : null; - - // determine concordance for eval data - if (eval != null) { - for (final Genotype g : eval.getGenotypes() ) { - final String sample = g.getSampleName(); - final Genotype.Type called = g.getType(); - final Genotype.Type truth; - - if (!validationIsValidVC || !validation.hasGenotype(sample)) { - truth = Genotype.Type.NO_CALL; - } else { - truth = validation.getGenotype(sample).getType(); - // interesting = "ConcordanceStatus=FP"; - if (discordantInteresting && truth.ordinal() != called.ordinal()) - { - interesting = "ConcordanceStatus=" + truth + "/" + called; - } - } - - detailedStats.incrValue(sample, truth, called); - } - } - // otherwise, mark no-calls for all samples - else { - final Genotype.Type called = Genotype.Type.NO_CALL; - - for (final Genotype g : validation.getGenotypes()) { - final Genotype.Type truth = g.getType(); - detailedStats.incrValue(g.getSampleName(), truth, called); - - // print out interesting sites - /* + // print out interesting sites + /* if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) { if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) { super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation); @@ -315,292 +107,120 @@ public class GenotypeConcordance extends VariantEvaluator { } } */ + } } } - - return interesting; } private static boolean isValidVC(final VariantContext vc) { return (vc != null && !vc.isFiltered()); } - public void finalizeEvaluation() { - if( simplifiedStats != null && detailedStats != null ) { - simplifiedStats.generateSampleSummaryStats(detailedStats); - } - } - - private boolean vcHasGoodAC(VariantContext vc) { - return ( vc != null && vc.getAlternateAlleles().size() == 1 && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ); - - } - - private int getAC(VariantContext vc) { - if ( List.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) { - return ((List) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY)).get(0); - } else if ( Integer.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass())) { - return (Integer) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY); - } else if ( String.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) { - // two ways of parsing - String ac = (String) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY); - if ( ac.startsWith("[") ) { - return Integer.parseInt(ac.replaceAll("\\[","").replaceAll("\\]","")); - } else { - try { - return Integer.parseInt(ac); - } catch ( NumberFormatException e ) { - throw new UserException(String.format("The format of the AC field is improperly formatted: AC=%s",ac)); - } - } - } else { - throw new UserException(String.format("The format of the AC field does not appear to be of integer-list or String format, class was %s",vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass())); - } - } -} - -/** - * a table of sample names to genotype concordance figures - */ -class SampleStats extends TableType { - private final int nGenotypeTypes; - - // sample to concordance stats object - public final HashMap concordanceStats = new HashMap(); - - /** - * - * @return one row per sample - */ - public Object[] getRowKeys() { - return concordanceStats.keySet().toArray(new String[concordanceStats.size()]); - } - /** * increment the specified value - * @param sample the sample name * @param truth the truth type * @param called the called type */ - public void incrValue(String sample, Genotype.Type truth, Genotype.Type called) { - if ( concordanceStats.containsKey(sample) ) - concordanceStats.get(sample)[truth.ordinal()][called.ordinal()]++; - else if ( called != Genotype.Type.NO_CALL ) - throw new UserException.CommandLineException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context"); + private void incrValue(final Genotype.Type truth, final Genotype.Type called) { + truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++; } - /** - * get the column keys - * @return a list of objects, in this case strings, that are the column names - */ - public Object[] getColumnKeys() { -// return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call", -// "n_ref/ref","n_ref/het","n_ref/hom", -// "total_true_het","%_het/het","n_het/no-call", -// "n_het/ref","n_het/het","n_het/hom", -// "total_true_hom","%_hom/hom","n_hom/no-call", -// "n_hom/ref","n_hom/het","n_hom/hom"}; - return new String[]{"total_true_ref","pct_ref_vs_ref","n_ref_vs_no_call", - "n_ref_vs_ref","n_ref_vs_het","n_ref_vs_hom", - "total_true_het","pct_het_vs_het","n_het_vs_no_call", - "n_het_vs_ref","n_het_vs_het","n_het_vs_hom", - "total_true_hom","pct_hom_vs_hom","n_hom_vs_no_call", - "n_hom_vs_ref","n_hom_vs_het","n_hom_vs_hom"}; + private long count(final Genotype.Type truth, final Genotype.Type called) { + return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]; } - - public SampleStats(VariantContext vc, int nGenotypeTypes) { - this.nGenotypeTypes = nGenotypeTypes; - for (final Genotype g : vc.getGenotypes()) - concordanceStats.put(g.getSampleName(), new long[nGenotypeTypes][nGenotypeTypes]); + private long count(final EnumSet truth, final Genotype.Type called) { + return count(truth, EnumSet.of(called)); } - public SampleStats(int genotypeTypes) { - nGenotypeTypes = genotypeTypes; + private long count(final Genotype.Type truth, final EnumSet called) { + return count(EnumSet.of(truth), called); } - public Object getCell(int x, int y) { - // we have three rows of 6 right now for output (rows: ref, het, hom) - Genotype.Type type = Genotype.Type.values()[(y/6)+1]; // get the row type - // save some repeat work, get the total every time - long total = 0; - Object[] rowKeys = getRowKeys(); - for (int called = 0; called < nGenotypeTypes; called++) { - total += concordanceStats.get(rowKeys[x])[type.ordinal()][called]; - } - - // now get the cell they're interested in - switch (y % 6) { - case (0): // get the total_true for this type - return total; - case (1): - return total == 0 ? 0.0 : (100.0 * (double) concordanceStats.get(rowKeys[x])[type.ordinal()][type.ordinal()] / (double) total); - default: - return concordanceStats.get(rowKeys[x])[type.ordinal()][(y % 6) - 2]; + private long count(final EnumSet truth, final EnumSet called) { + long sum = 0; + for ( final Genotype.Type truth1 : truth ) { + for ( final Genotype.Type called1 : called ) { + sum += count(truth1, called1); + } } + return sum; } - public String getName() { - return "Sample Statistics"; - } -} - -/** - * a table of sample names to genotype concordance summary statistics - */ -class SampleSummaryStats extends TableType { - protected final static String ALL_SAMPLES_KEY = "allSamples"; - protected final static String[] COLUMN_KEYS = new String[]{ - "percent_comp_ref_called_ref", - "percent_comp_het_called_het", - "percent_comp_hom_called_hom", - "percent_non_reference_sensitivity", - "percent_overall_genotype_concordance", - "percent_non_reference_discrepancy_rate"}; - - // sample to concordance stats object - protected final HashMap concordanceSummary = new HashMap(); - - /** - * - * @return one row per sample - */ - public Object[] getRowKeys() { - return concordanceSummary.keySet().toArray(new String[concordanceSummary.size()]); - } - - /** - * get the column keys - * @return a list of objects, in this case strings, that are the column names - */ - public Object[] getColumnKeys() { - return COLUMN_KEYS; - } - - public SampleSummaryStats(final VariantContext vc) { - concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]); - for( final Genotype g : vc.getGenotypes() ) { - concordanceSummary.put(g.getSampleName(), new double[COLUMN_KEYS.length]); - } - } - - public SampleSummaryStats() { - - } - - public Object getCell(int x, int y) { - final Object[] rowKeys = getRowKeys(); - return String.format("%.2f",concordanceSummary.get(rowKeys[x])[y]); - } - - /** - * Helper routine that sums up all columns / rows found in stats specified by all pairs in d1 x d2 - * - * @param stats - * @param d1 - * @param d2 - * @return - */ - private long sumStatsAllPairs( final long[][] stats, EnumSet d1, EnumSet d2 ) { - long sum = 0L; + private long countDiag( final EnumSet d1 ) { + long sum = 0; for(final Genotype.Type e1 : d1 ) { - for(final Genotype.Type e2 : d2 ) { - sum += stats[e1.ordinal()][e2.ordinal()]; + sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()]; + } + + return sum; + } + + @Override + public void finalizeEvaluation() { + final EnumSet allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET); + final EnumSet allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF); + final EnumSet allGenotypes = EnumSet.allOf(Genotype.Type.class); + + // exact values of the table + for ( final Genotype.Type truth : Genotype.Type.values() ) { + for ( final Genotype.Type called : Genotype.Type.values() ) { + final String field = String.format("n_true_%s_called_%s", truth, called); + final Long value = count(truth, called); + map.put(field, value.toString()); } } - return sum; - } - - private long sumStatsDiag( final long[][] stats, EnumSet d1) { - long sum = 0L; - - for(final Genotype.Type e1 : d1 ) { - sum += stats[e1.ordinal()][e1.ordinal()]; + // counts of called genotypes + for ( final Genotype.Type called : Genotype.Type.values() ) { + final String field = String.format("total_called_%s", called); + final Long value = count(allGenotypes, called); + map.put(field, value.toString()); } - return sum; - } + // counts of true genotypes + for ( final Genotype.Type truth : Genotype.Type.values() ) { + final String field = String.format("total_true_%s", truth); + final Long value = count(truth, allGenotypes); + map.put(field, value.toString()); + } - private double ratio(long numer, long denom) { - return denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0; - } + for ( final Genotype.Type genotype : Genotype.Type.values() ) { + final String field = String.format("percent_%s_called_%s", genotype, genotype); + long numer = count(genotype, genotype); + long denom = count(EnumSet.of(genotype), allGenotypes); + map.put(field, Utils.formattedPercent(numer, denom)); + } - final long[] allSamplesNumerators = new long[COLUMN_KEYS.length]; - final long[] allSamplesDenominators = new long[COLUMN_KEYS.length]; - - private void updateSummaries(int i, double[] summary, long numer, long denom ) { - allSamplesNumerators[i] += numer; - allSamplesDenominators[i] += denom; - summary[i] = ratio(numer, denom); - } - - - /** - * Calculate the five summary stats per sample - * @param sampleStats The Map which holds concordance values per sample - */ - public void generateSampleSummaryStats( final SampleStats sampleStats ) { - EnumSet allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET); - EnumSet allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF); - EnumSet allGenotypes = EnumSet.allOf(Genotype.Type.class); - - for( final String sample : concordanceSummary.keySet() ) { - if ( sample.equals(ALL_SAMPLES_KEY) ) continue; - - final long[][] stats = sampleStats.concordanceStats.get(sample); - final double[] summary = concordanceSummary.get(sample); - if( stats == null ) { throw new ReviewedStingException( "SampleStats and SampleSummaryStats contain different samples! sample = " + sample ); } - - long numer, denom; - - // Summary 0: % ref called as ref - numer = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()]; - denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_REF), allGenotypes); - updateSummaries(0, summary, numer, denom); - - // Summary 1: % het called as het - numer = stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()]; - denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allGenotypes); - updateSummaries(1, summary, numer, denom); - - // Summary 2: % homVar called as homVar - numer = stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()]; - denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allGenotypes); - updateSummaries(2, summary, numer, denom); - - // Summary 3: % non-ref called as non-ref + { + // % non-ref called as non-ref // MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp) - numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes); - denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes); - updateSummaries(3, summary, numer, denom); + final String field = "percent_non_reference_sensitivity"; + long numer = count(allVariantGenotypes, allVariantGenotypes); + long denom = count(allVariantGenotypes, allGenotypes); + map.put(field, Utils.formattedPercent(numer, denom)); + } - // Summary 4: overall genotype concordance of sites called in eval track + { + // overall genotype concordance of sites called in eval track // MAD: this is the tradition genotype concordance - numer = sumStatsDiag(stats, allCalledGenotypes); - denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes); - updateSummaries(4, summary, numer, denom); - - // Summary 5: overall genotype concordance of sites called non-ref in eval track - long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()]; - long diag = sumStatsDiag(stats, allVariantGenotypes); - long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords; - numer = allNoHomRef - diag; - denom = allNoHomRef; - updateSummaries(5, summary, numer, denom); + final String field = "percent_overall_genotype_concordance"; + long numer = countDiag(allCalledGenotypes); + long denom = count(allCalledGenotypes, allCalledGenotypes); + map.put(field, Utils.formattedPercent(numer, denom)); } - // update the final summary stats - final double[] allSamplesSummary = concordanceSummary.get(ALL_SAMPLES_KEY); - for ( int i = 0; i < allSamplesSummary.length; i++) { - allSamplesSummary[i] = ratio(allSamplesNumerators[i], allSamplesDenominators[i]); + { + // overall genotype concordance of sites called non-ref in eval track + // MAD: this is the non-reference discrepancy rate + final String field = "percent_non_reference_discrepancy_rate"; + long homrefConcords = count(Genotype.Type.HOM_REF, Genotype.Type.HOM_REF); + long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords; + long numer = allNoHomRef - countDiag(allVariantGenotypes); + long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords; + map.put(field, Utils.formattedPercent(numer, denom)); } - - } - - public String getName() { - return "Sample Summary Statistics"; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java deleted file mode 100755 index 266c4fa89..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java +++ /dev/null @@ -1,426 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; -import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.variantcontext.Genotype; -import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; -import org.broadinstitute.sting.utils.variantcontext.VariantContext; - -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks") -public class GenotypePhasingEvaluator extends VariantEvaluator { - protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class); - - // a mapping from sample to stats - @DataPoint(description = "the phasing statistics for each sample") - SamplePhasingStatistics samplePhasingStatistics = null; - - SamplePreviousGenotypes samplePrevGenotypes = null; - - double minPhaseQuality = 10.0; - - public void initialize(VariantEvalWalker walker) { - super.initialize(walker); - this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality()); - this.samplePrevGenotypes = new SamplePreviousGenotypes(); - } - - public String getName() { - return "GenotypePhasingEvaluator"; - } - - public int getComparisonOrder() { - return 2; // we only need to see pairs of (comp, eval) - } - - public boolean enabled() { - return true; - } - - public String toString() { - return getName() + ":
"; - } - - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return update2(eval,comp,tracker,ref,context,null); - } - - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, EvaluationContext group) { - //public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) { - Reasons interesting = new Reasons(); - if (ref == null) - return interesting.toString(); - GenomeLoc curLocus = ref.getLocus(); - - logger.debug("update2() locus: " + curLocus); - logger.debug("comp = " + comp + " eval = " + eval); - - Set allSamples = new HashSet(); - - GenotypesContext compSampGenotypes = null; - if (isRelevantToPhasing(comp)) { - allSamples.addAll(comp.getSampleNames()); - compSampGenotypes = comp.getGenotypes(); - } - - GenotypesContext evalSampGenotypes = null; - if (isRelevantToPhasing(eval)) { - allSamples.addAll(eval.getSampleNames()); - evalSampGenotypes = eval.getGenotypes(); - } - - for (String samp : allSamples) { - logger.debug("sample = " + samp); - - Genotype compSampGt = null; - if (compSampGenotypes != null) - compSampGt = compSampGenotypes.get(samp); - - Genotype evalSampGt = null; - if (evalSampGenotypes != null) - evalSampGt = evalSampGenotypes.get(samp); - - if (compSampGt == null || evalSampGt == null || compSampGt.isNoCall() || evalSampGt.isNoCall()) { // Since either comp or eval (or both) are missing the site, the best we can do is hope to preserve phase [if the non-missing one preserves phase] - // Having an unphased site breaks the phasing for the sample [does NOT permit "transitive phasing"] - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]: - if (isNonNullButUnphased(compSampGt) || isNonNullButUnphased(evalSampGt)) - samplePrevGenotypes.put(samp, null); - } - else { // Both comp and eval have a non-null Genotype at this site: - AllelePair compAllelePair = new AllelePair(compSampGt); - AllelePair evalAllelePair = new AllelePair(evalSampGt); - - boolean breakPhasing = false; - if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom()) - breakPhasing = true; // since they are not both het or both hom - else { // both are het, or both are hom: - boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair)); - boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair)); - if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop) - breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample - } - - if (breakPhasing) { - samplePrevGenotypes.put(samp, null); // nothing to do for this site, AND must remove any history for the future - } - else if (compSampGt.isHet() && evalSampGt.isHet()) { - /* comp and eval have the HET same Genotype at this site: - [Note that if both are hom, then nothing is done here, but the het history IS preserved]. - */ - CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp); - if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome - prevCompAndEval = null; - - // Replace the previous hets with the current hets: - samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt); - - if (prevCompAndEval != null) { - GenomeLoc prevLocus = prevCompAndEval.getLocus(); - logger.debug("Potentially phaseable het locus: " + curLocus + " [relative to previous het locus: " + prevLocus + "]"); - PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp); - - boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt); - boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt); - if (compSampIsPhased || evalSampIsPhased) { - if (!evalSampIsPhased) { - ps.onlyCompPhased++; - //interesting.addReason("ONLY_COMP", samp, group, prevLocus, ""); - } - else if (!compSampIsPhased) { - ps.onlyEvalPhased++; - //interesting.addReason("ONLY_EVAL", samp, group, prevLocus, ""); - } - else { // both comp and eval are phased: - AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye()); - AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype()); - - // Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample: - boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair)); - boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair)); - - if (topsMatch || topMatchesBottom) { - ps.phasesAgree++; - - Double compPQ = getPQ(compSampGt); - Double evalPQ = getPQ(evalSampGt); - if (compPQ != null && evalPQ != null && MathUtils.compareDoubles(compPQ, evalPQ) != 0) { - //interesting.addReason("PQ_CHANGE", samp, group, prevLocus, compPQ + " -> " + evalPQ); - } - } - else { - ps.phasesDisagree++; - logger.debug("SWITCHED locus: " + curLocus); - //interesting.addReason("SWITCH", samp, group, prevLocus, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair)); - } - } - } - else { - ps.neitherPhased++; - } - } - } - } - } - logger.debug("\n" + samplePhasingStatistics + "\n"); - - return interesting.toString(); - } - - public static boolean isRelevantToPhasing(VariantContext vc) { - return (vc != null && !vc.isFiltered()); - } - - public boolean isNonNullButUnphased(Genotype gt) { - return (gt != null && !gt.isNoCall() && !genotypesArePhasedAboveThreshold(gt)); - } - - public boolean genotypesArePhasedAboveThreshold(Genotype gt) { - if (gt.isHom()) // Can always consider a hom site to be phased to its predecessor, since its successor will only be phased to it if it's hom or "truly" phased - return true; - - if (!gt.isPhased()) - return false; - - Double pq = getPQ(gt); - return (pq == null || pq >= minPhaseQuality); - } - - public static Double getPQ(Genotype gt) { - Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1); - return d == -1 ? null : d; - } - - public static boolean topMatchesTop(AllelePair b1, AllelePair b2) { - return b1.getTopAllele().equals(b2.getTopAllele()); - } - - public static boolean topMatchesBottom(AllelePair b1, AllelePair b2) { - return b1.getTopAllele().equals(b2.getBottomAllele()); - } - - public static boolean bottomMatchesTop(AllelePair b1, AllelePair b2) { - return topMatchesBottom(b2, b1); - } - - public static boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) { - return b1.getBottomAllele().equals(b2.getBottomAllele()); - } - - public String toString(AllelePair prev, AllelePair cur) { - return prev.getTopAllele().getBaseString() + "+" + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "+" + cur.getBottomAllele().getBaseString(); - } - - public void finalizeEvaluation() { - } - - private static class Reasons { - private StringBuilder sb; - - public Reasons() { - sb = new StringBuilder(); - } - -// public void addReason(String category, String sample, VariantEvalWalker.EvaluationContext evalGroup, GenomeLoc prevLoc, String reason) { -// sb.append(category + "(" + sample + ", previous: " + prevLoc + " [" + evalGroup.compTrackName + ", " + evalGroup.evalTrackName + "]): " + reason + ";"); -// } - - public String toString() { - if (sb.length() == 0) - return null; - - return "reasons=" + sb.toString(); - } - } -} - - - -class CompEvalGenotypes { - private GenomeLoc loc; - private Genotype compGt; - private Genotype evalGt; - - public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) { - this.loc = loc; - this.compGt = compGt; - this.evalGt = evalGt; - } - - public GenomeLoc getLocus() { - return loc; - } - - public Genotype getCompGenotpye() { - return compGt; - } - public Genotype getEvalGenotype() { - return evalGt; - } -} - -class SamplePreviousGenotypes { - private HashMap sampleGenotypes = null; - - public SamplePreviousGenotypes() { - this.sampleGenotypes = new HashMap(); - } - - public CompEvalGenotypes get(String sample) { - return sampleGenotypes.get(sample); - } - - public void put(String sample, CompEvalGenotypes compEvalGts) { - sampleGenotypes.put(sample, compEvalGts); - } - - public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) { - sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt)); - } -} - -class PhaseStats { - public int neitherPhased; - public int onlyCompPhased; - public int onlyEvalPhased; - public int phasesAgree; - public int phasesDisagree; - - public PhaseStats() { - this.neitherPhased = 0; - this.onlyCompPhased = 0; - this.onlyEvalPhased = 0; - this.phasesAgree = 0; - this.phasesDisagree = 0; - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree); - return sb.toString(); - } - - public static String[] getFieldNamesArray() { - return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"}; - } - - public Object getField(int index) { - switch (index) { - case (0): - return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree); - case (1): - return neitherPhased; - case (2): - return onlyCompPhased; - case (3): - return onlyEvalPhased; - case (4): - return (phasesAgree + phasesDisagree); - case (5): - return phasesAgree; - case (6): - return phasesDisagree; - case (7): - return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree)); - default: - return -1; - } - } -} - -/** - * a table of sample names to genotype phasing statistics - */ -class SamplePhasingStatistics extends TableType { - private HashMap sampleStats = null; - private double minPhaseQuality; - - public SamplePhasingStatistics(double minPhaseQuality) { - this.sampleStats = new HashMap(); - this.minPhaseQuality = minPhaseQuality; - } - - public PhaseStats ensureSampleStats(String samp) { - PhaseStats ps = sampleStats.get(samp); - if (ps == null) { - ps = new PhaseStats(); - sampleStats.put(samp, ps); - } - return ps; - } - - /** - * @return one row per sample - */ - public String[] getRowKeys() { - return sampleStats.keySet().toArray(new String[sampleStats.size()]); - } - - /** - * get the column keys - * - * @return a list of objects, in this case strings, that are the column names - */ - public String[] getColumnKeys() { - return PhaseStats.getFieldNamesArray(); - } - - public Object getCell(int x, int y) { - String[] rowKeys = getRowKeys(); - PhaseStats ps = sampleStats.get(rowKeys[x]); - return ps.getField(y); - } - - public String getName() { - return "Sample Phasing Statistics (for PQ >= " + minPhaseQuality + ")"; - } - - public String toString() { - StringBuilder sb = new StringBuilder(); - for (Map.Entry sampPhaseStatsEnt : sampleStats.entrySet()) { - String sample = sampPhaseStatsEnt.getKey(); - PhaseStats ps = sampPhaseStatsEnt.getValue(); - - sb.append(sample + "\t" + ps); - } - return sb.toString(); - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java new file mode 100644 index 000000000..9c6fb2344 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +/** + * Simple utility for histogramming indel lengths + * + * Based on code from chartl + * + * @author Mark DePristo + * @since 3/21/12 + */ +@Analysis(description = "Indel length histogram", molten = true) +public class IndelLengthHistogram extends VariantEvaluator implements StandardEval { + private final Map counts = new HashMap(); + private final boolean asFrequencies; + int nIndels = 0; + + @Molten(variableFormat = "%d", valueFormat = "%.2f") + public TreeMap results; + + public final static int MAX_SIZE_FOR_HISTOGRAM = 10; + + public IndelLengthHistogram() { + this(MAX_SIZE_FOR_HISTOGRAM, true); + } + + public IndelLengthHistogram(int maxSize, boolean asFrequencies) { + this.asFrequencies = asFrequencies; + initializeCounts(maxSize); + } + + private void initializeCounts(int size) { + for ( int i = -size; i <= size; i++ ) { + if ( i != 0 ) counts.put(i, 0); + } + } + + @Override + public void finalizeEvaluation() { + if ( asFrequencies ) { + results = new TreeMap(); + for ( final int len : counts.keySet() ) { + final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels); + results.put(len, value); + } + } else { + results = new TreeMap(results); + } + } + + @Override + public int getComparisonOrder() { + return 1; + } + + @Override + public void update1(final VariantContext eval, final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + if ( eval.isIndel() && ! eval.isComplexIndel() ) { + for ( Allele alt : eval.getAlternateAlleles() ) { + final int alleleSize = alt.length() - eval.getReference().length(); + if ( alleleSize == 0 ) throw new ReviewedStingException("Allele size not expected to be zero for indel: alt = " + alt + " ref = " + eval.getReference()); + updateLengthHistogram(eval.getReference(), alt); + } + } + } + + public void updateLengthHistogram(final Allele ref, final Allele alt) { + final int len = alt.length() - ref.length(); + if ( counts.containsKey(len) ) { + nIndels++; + counts.put(len, counts.get(len) + 1); + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java index 51cf2bb6a..9ee5c73ab 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java @@ -30,7 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.IndelHistogram; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; @@ -119,9 +119,8 @@ public class IndelSummary extends VariantEvaluator implements StandardEval { int n1bpInsertions = 0, n1bpDeletions = 0; int[] countByLength = new int[]{0, 0, 0, 0}; // note that the first element isn't used - public final static int MAX_SIZE_FOR_HISTOGRAM = 10; - @DataPoint(description = "Histogram of indel lengths") - IndelHistogram lengthHistogram = new IndelHistogram(MAX_SIZE_FOR_HISTOGRAM, true); + + public final static int LARGE_INDEL_SIZE_THRESHOLD = 10; @DataPoint(description = "Number of large (>10 bp) deletions") public int n_large_deletions = 0; @@ -132,12 +131,11 @@ public class IndelSummary extends VariantEvaluator implements StandardEval { @DataPoint(description = "Ratio of large (>10 bp) insertions to deletions") public String insertion_to_deletion_ratio_for_large_indels; - @Override public boolean enabled() { return true; } @Override public int getComparisonOrder() { return 2; } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( eval == null || eval.isMonomorphicInSamples() ) - return null; + return; // update counts switch ( eval.getType() ) { @@ -176,9 +174,6 @@ public class IndelSummary extends VariantEvaluator implements StandardEval { if ( alleleSize == 1 ) n1bpInsertions++; if ( alleleSize == -1 ) n1bpDeletions++; - // update the length histogram - lengthHistogram.update(eval.getReference(), alt); - // requires snpEFF annotations if ( eval.getAttributeAsString("SNPEFF_GENE_BIOTYPE", "missing").equals("protein_coding") ) { final String effect = eval.getAttributeAsString("SNPEFF_EFFECT", "missing"); @@ -191,10 +186,16 @@ public class IndelSummary extends VariantEvaluator implements StandardEval { else ; // lots of protein coding effects that shouldn't be counted, such as INTRON } + + if ( alleleSize > LARGE_INDEL_SIZE_THRESHOLD ) + n_large_insertions++; + else if ( alleleSize < -LARGE_INDEL_SIZE_THRESHOLD ) + n_large_deletions++; // update the baby histogram final int absSize = Math.abs(alleleSize); if ( absSize < countByLength.length ) countByLength[absSize]++; + } break; @@ -202,29 +203,26 @@ public class IndelSummary extends VariantEvaluator implements StandardEval { throw new UserException.BadInput("Unexpected variant context type: " + eval); } - return null; // we don't capture any interesting sites + return; } public void finalizeEvaluation() { - percent_of_sites_with_more_than_2_alleles = formattedRatio(nMultiIndelSites, nIndelSites); - SNP_to_indel_ratio = formattedRatio(n_SNPs, n_indels); - SNP_to_indel_ratio_for_singletons = formattedRatio(n_singleton_SNPs, n_singleton_indels); - indel_novelty_rate = formattedNoveltyRate(nKnownIndels, n_indels); - ratio_of_1_to_2_bp_indels = formattedRatio(countByLength[1], countByLength[2]); - ratio_of_1_to_3_bp_indels = formattedRatio(countByLength[1], countByLength[3]); - ratio_of_2_to_3_bp_indels = formattedRatio(countByLength[2], countByLength[3]); - ratio_of_1_and_2_to_3_bp_indels = formattedRatio(countByLength[1] + countByLength[2], countByLength[3]); - frameshift_rate_for_coding_indels = formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting); + percent_of_sites_with_more_than_2_alleles = Utils.formattedRatio(nMultiIndelSites, nIndelSites); + SNP_to_indel_ratio = Utils.formattedRatio(n_SNPs, n_indels); + SNP_to_indel_ratio_for_singletons = Utils.formattedRatio(n_singleton_SNPs, n_singleton_indels); + indel_novelty_rate = Utils.formattedNoveltyRate(nKnownIndels, n_indels); + ratio_of_1_to_2_bp_indels = Utils.formattedRatio(countByLength[1], countByLength[2]); + ratio_of_1_to_3_bp_indels = Utils.formattedRatio(countByLength[1], countByLength[3]); + ratio_of_2_to_3_bp_indels = Utils.formattedRatio(countByLength[2], countByLength[3]); + ratio_of_1_and_2_to_3_bp_indels = Utils.formattedRatio(countByLength[1] + countByLength[2], countByLength[3]); + frameshift_rate_for_coding_indels = Utils.formattedPercent(n_coding_indels_frameshifting, n_coding_indels_in_frame + n_coding_indels_frameshifting); - SNP_het_to_hom_ratio = formattedRatio(nSNPHets, nSNPHoms); - indel_het_to_hom_ratio = formattedRatio(nIndelHets, nIndelHoms); - - n_large_deletions = lengthHistogram.getnTooBigDeletions(); - n_large_insertions = lengthHistogram.getnTooBigInsertions(); + SNP_het_to_hom_ratio = Utils.formattedRatio(nSNPHets, nSNPHoms); + indel_het_to_hom_ratio = Utils.formattedRatio(nIndelHets, nIndelHoms); - insertion_to_deletion_ratio = formattedRatio(nInsertions, n_indels - nInsertions); - insertion_to_deletion_ratio_for_1bp_indels = formattedRatio(n1bpInsertions, n1bpDeletions); - insertion_to_deletion_ratio_for_large_indels = formattedRatio(n_large_insertions, n_large_deletions); + insertion_to_deletion_ratio = Utils.formattedRatio(nInsertions, n_indels - nInsertions); + insertion_to_deletion_ratio_for_1bp_indels = Utils.formattedRatio(n1bpInsertions, n1bpDeletions); + insertion_to_deletion_ratio_for_large_indels = Utils.formattedRatio(n_large_insertions, n_large_deletions); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java index db2bf61c6..ff3bf66f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java @@ -43,63 +43,63 @@ import java.util.Set; public class MendelianViolationEvaluator extends VariantEvaluator { @DataPoint(description = "Number of variants found with at least one family having genotypes", format = "%d") - long nVariants; + public long nVariants; @DataPoint(description = "Number of variants found with no family having genotypes -- these sites do not count in the nNoCall", format = "%d") - long nSkipped; + public long nSkipped; @DataPoint(description="Number of variants x families called (no missing genotype or lowqual)", format = "%d") - long nFamCalled; + public long nFamCalled; @DataPoint(description="Number of variants x families called (no missing genotype or lowqual) that contain at least one var allele.", format = "%d") - long nVarFamCalled; + public long nVarFamCalled; @DataPoint(description="Number of variants x families discarded as low quality", format = "%d") - long nLowQual; + public long nLowQual; @DataPoint(description="Number of variants x families discarded as no call", format = "%d") - long nNoCall; + public long nNoCall; @DataPoint(description="Number of loci with mendelian violations", format = "%d") - long nLociViolations; + public long nLociViolations; @DataPoint(description = "Number of mendelian violations found", format = "%d") - long nViolations; + public long nViolations; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HOM_VAR", format = "%d") - long mvRefRef_Var; + public long mvRefRef_Var; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_REF -> HET", format = "%d") - long mvRefRef_Het; + public long mvRefRef_Het; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HET -> HOM_VAR", format = "%d") - long mvRefHet_Var; + public long mvRefHet_Var; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_VAR", format = "%d") - long mvRefVar_Var; + public long mvRefVar_Var; @DataPoint(description="Number of mendelian violations of the type HOM_REF/HOM_VAR -> HOM_REF", format = "%d") - long mvRefVar_Ref; + public long mvRefVar_Ref; @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HET -> HOM_REF", format = "%d") - long mvVarHet_Ref; + public long mvVarHet_Ref; @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HOM_REF", format = "%d") - long mvVarVar_Ref; + public long mvVarVar_Ref; @DataPoint(description="Number of mendelian violations of the type HOM_VAR/HOM_VAR -> HET", format = "%d") - long mvVarVar_Het; + public long mvVarVar_Het; @DataPoint(description="Number of HomRef/HomRef/HomRef trios", format = "%d") - long HomRefHomRef_HomRef; + public long HomRefHomRef_HomRef; @DataPoint(description="Number of Het/Het/Het trios", format = "%d") - long HetHet_Het; + public long HetHet_Het; @DataPoint(description="Number of Het/Het/HomRef trios", format = "%d") - long HetHet_HomRef; + public long HetHet_HomRef; @DataPoint(description="Number of Het/Het/HomVar trios", format = "%d") - long HetHet_HomVar; + public long HetHet_HomVar; @DataPoint(description="Number of HomVar/HomVar/HomVar trios", format = "%d") - long HomVarHomVar_HomVar; + public long HomVarHomVar_HomVar; @DataPoint(description="Number of HomRef/HomVar/Het trios", format = "%d") - long HomRefHomVAR_Het; + public long HomRefHomVAR_Het; @DataPoint(description="Number of ref alleles inherited from het/het parents", format = "%d") - long HetHet_inheritedRef; + public long HetHet_inheritedRef; @DataPoint(description="Number of var alleles inherited from het/het parents", format = "%d") - long HetHet_inheritedVar; + public long HetHet_inheritedVar; @DataPoint(description="Number of ref alleles inherited from homRef/het parents", format = "%d") - long HomRefHet_inheritedRef; + public long HomRefHet_inheritedRef; @DataPoint(description="Number of var alleles inherited from homRef/het parents", format = "%d") - long HomRefHet_inheritedVar; + public long HomRefHet_inheritedVar; @DataPoint(description="Number of ref alleles inherited from homVar/het parents", format = "%d") - long HomVarHet_inheritedRef; + public long HomVarHet_inheritedRef; @DataPoint(description="Number of var alleles inherited from homVar/het parents", format = "%d") - long HomVarHet_inheritedVar; + public long HomVarHet_inheritedVar; MendelianViolation mv; Map> families; @@ -110,10 +110,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { families = walker.getSampleDB().getFamilies(); } - public boolean enabled() { - return true; - } - public String getName() { return "mendelian_violations"; } @@ -122,7 +118,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator { return 1; // we only need to see each eval track } - public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci if(mv.countViolations(families,vc)>0){ @@ -161,11 +157,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator { else{ nSkipped++; } - - - return null; } - - return null; // we don't capture any interesting sites } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java index 90c2def0b..efc8d42f8 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java @@ -30,6 +30,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -88,12 +89,11 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva public String indelNoveltyRate = "NA"; - @Override public boolean enabled() { return true; } @Override public int getComparisonOrder() { return 2; } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( eval == null || eval.isMonomorphicInSamples() ) - return null; + return; // update counts switch ( eval.getType() ) { @@ -116,7 +116,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva throw new UserException.BadInput("Unexpected variant context type: " + eval); } - return null; // we don't capture any interesting sites + return; } private void calculatePairwiseTiTv(VariantContext vc) { @@ -157,7 +157,7 @@ public class MultiallelicSummary extends VariantEvaluator implements StandardEva TiTvRatio = (double)nTi / (double)nTv; - SNPNoveltyRate = formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs); - indelNoveltyRate = formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs); + SNPNoveltyRate = Utils.formattedNoveltyRate(knownSNPsPartial + knownSNPsComplete, nMultiSNPs); + indelNoveltyRate = Utils.formattedNoveltyRate(knownIndelsPartial + knownIndelsComplete, nMultiSNPs); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java index ed8909f19..a0cb662e0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java @@ -34,11 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; @Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites") public class PrintMissingComp extends VariantEvaluator { @DataPoint(description = "number of eval sites outside of comp sites", format = "%d") - long nMissing = 0; - - //public PrintMissingComp(VariantEvalWalker parent) { - // super(parent); - //} + public long nMissing = 0; public String getName() { return "PrintMissingComp"; @@ -48,20 +44,13 @@ public class PrintMissingComp extends VariantEvaluator { return 2; // we need to see each eval track and each comp track } - public boolean enabled() { - return true; - } - - - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP(); - boolean evalIsGood = eval != null && eval.isSNP(); + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + final boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP(); + final boolean evalIsGood = eval != null && eval.isSNP(); if ( compIsGood & ! evalIsGood ) { nMissing++; - return "MissingFrom" + comp.getSource(); - } else { - return null; + super.getWalker().getLogger().info("MissingFrom" + eval.toString() + " is missing from " + comp.getSource()); } } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java index ce4349717..106ac330d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java @@ -15,30 +15,26 @@ import java.util.concurrent.ConcurrentMap; @Analysis(description = "Computes different estimates of theta based on variant sites and genotypes") public class ThetaVariantEvaluator extends VariantEvaluator { @DataPoint(description = "Average heterozygosity at variant sites; note that missing genotypes are ignored when computing this value", format = "%.8f") - double avgHet = 0.0; + public double avgHet = 0.0; @DataPoint(description = "Average pairwise differences at aligned sequences; averaged over both number of sequeneces and number of variant sites; note that missing genotypes are ignored when computing this value", format = "%.8f") - double avgAvgDiffs = 0.0; + public double avgAvgDiffs = 0.0; @DataPoint(description = "Sum of heterozygosity over all variant sites; divide this by total target to get estimate of per base theta", format = "%.8f") - double totalHet = 0.0; + public double totalHet = 0.0; @DataPoint(description = "Sum of pairwise diffs over all variant sites; divide this by total target to get estimate of per base theta", format = "%.8f") - double totalAvgDiffs = 0.0; + public double totalAvgDiffs = 0.0; @DataPoint(description = "Theta for entire region estimated based on number of segregating sites; divide ths by total target to get estimate of per base theta", format = "%.8f") - double thetaRegionNumSites = 0.0; + public double thetaRegionNumSites = 0.0; //helper variables double numSites = 0; - public boolean enabled() { - return true; - } - public int getComparisonOrder() { return 1; } - public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphicInSamples()) { - return null; //no interesting sites + return; } //this maps allele to a count @@ -107,8 +103,6 @@ public class ThetaVariantEvaluator extends VariantEvaluator { this.totalAvgDiffs += numDiffs / numPairwise; } } - - return null; } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java index edb2b6ca6..6c4fcd26d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java @@ -11,29 +11,24 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; @Analysis(description = "Ti/Tv Variant Evaluator") public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEval { - @DataPoint(description = "number of transition loci", format = "%d") - long nTi = 0; + public long nTi = 0; @DataPoint(description = "number of transversion loci", format = "%d") - long nTv = 0; + public long nTv = 0; @DataPoint(description = "the transition to transversion ratio", format = "%.2f") - double tiTvRatio = 0.0; + public double tiTvRatio = 0.0; @DataPoint(description = "number of comp transition sites", format = "%d") - long nTiInComp = 0; + public long nTiInComp = 0; @DataPoint(description = "number of comp transversion sites", format = "%d") - long nTvInComp = 0; + public long nTvInComp = 0; @DataPoint(description = "the transition to transversion ratio for comp sites", format = "%.2f") - double TiTvRatioStandard = 0.0; + public double TiTvRatioStandard = 0.0; @DataPoint(description = "number of derived transition loci", format = "%d") - long nTiDerived = 0; + public long nTiDerived = 0; @DataPoint(description = "number of derived transversion loci", format = "%d") - long nTvDerived = 0; + public long nTvDerived = 0; @DataPoint(description = "the derived transition to transversion ratio", format = "%.2f") - double tiTvDerivedRatio = 0.0; - - public boolean enabled() { - return true; - } + public double tiTvDerivedRatio = 0.0; public int getComparisonOrder() { return 2; // we only need to see each eval track @@ -62,11 +57,9 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv } } - public String update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if (vc1 != null) updateTiTv(vc1, false); if (vc2 != null) updateTiTv(vc2, true); - - return null; // we don't capture any interesting sites } @Override diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java index 8ce8ec799..bf457f5c0 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java @@ -24,29 +24,29 @@ import java.util.Collection; @Analysis(description = "Assess site accuracy and sensitivity of callset against follow-up validation assay") public class ValidationReport extends VariantEvaluator implements StandardEval { // todo -- note this isn't strictly allele away. It's really focused on sites. A/T call at a validated A/G site is currently counted as a TP - @DataPoint(description = "nComp", format = "%d") int nComp = 0; - @DataPoint(description = "TP", format = "%d") int TP = 0; - @DataPoint(description = "FP", format = "%d") int FP = 0; - @DataPoint(description = "FN", format = "%d") int FN = 0; - @DataPoint(description = "TN", format = "%d") int TN = 0; + @DataPoint(description = "nComp", format = "%d") public int nComp = 0; + @DataPoint(description = "TP", format = "%d") public int TP = 0; + @DataPoint(description = "FP", format = "%d") public int FP = 0; + @DataPoint(description = "FN", format = "%d") public int FN = 0; + @DataPoint(description = "TN", format = "%d") public int TN = 0; - @DataPoint(description = "Sensitivity", format = "%.2f") double sensitivity = 0; - @DataPoint(description = "Specificity", format = "%.2f") double specificity = 0; - @DataPoint(description = "PPV", format = "%.2f") double PPV = 0; - @DataPoint(description = "FDR", format = "%.2f") double FDR = 0; + @DataPoint(description = "Sensitivity", format = "%.2f") public double sensitivity = 0; + @DataPoint(description = "Specificity", format = "%.2f") public double specificity = 0; + @DataPoint(description = "PPV", format = "%.2f") public double PPV = 0; + @DataPoint(description = "FDR", format = "%.2f") public double FDR = 0; - @DataPoint(description = "CompMonoEvalNoCall", format = "%d") int CompMonoEvalNoCall = 0; - @DataPoint(description = "CompMonoEvalFiltered", format = "%d") int CompMonoEvalFiltered = 0; - @DataPoint(description = "CompMonoEvalMono", format = "%d") int CompMonoEvalMono = 0; - @DataPoint(description = "CompMonoEvalPoly", format = "%d") int CompMonoEvalPoly = 0; + @DataPoint(description = "CompMonoEvalNoCall", format = "%d") public int CompMonoEvalNoCall = 0; + @DataPoint(description = "CompMonoEvalFiltered", format = "%d") public int CompMonoEvalFiltered = 0; + @DataPoint(description = "CompMonoEvalMono", format = "%d") public int CompMonoEvalMono = 0; + @DataPoint(description = "CompMonoEvalPoly", format = "%d") public int CompMonoEvalPoly = 0; - @DataPoint(description = "CompPolyEvalNoCall", format = "%d") int CompPolyEvalNoCall = 0; - @DataPoint(description = "CompPolyEvalFiltered", format = "%d") int CompPolyEvalFiltered = 0; - @DataPoint(description = "CompPolyEvalMono", format = "%d") int CompPolyEvalMono = 0; - @DataPoint(description = "CompPolyEvalPoly", format = "%d") int CompPolyEvalPoly = 0; + @DataPoint(description = "CompPolyEvalNoCall", format = "%d") public int CompPolyEvalNoCall = 0; + @DataPoint(description = "CompPolyEvalFiltered", format = "%d") public int CompPolyEvalFiltered = 0; + @DataPoint(description = "CompPolyEvalMono", format = "%d") public int CompPolyEvalMono = 0; + @DataPoint(description = "CompPolyEvalPoly", format = "%d") public int CompPolyEvalPoly = 0; - @DataPoint(description = "CompFiltered", format = "%d") int CompFiltered = 0; - @DataPoint(description = "Eval and comp have different alleles", format = "%d") int nDifferentAlleleSites = 0; + @DataPoint(description = "CompFiltered", format = "%d") public int CompFiltered = 0; + @DataPoint(description = "Eval and comp have different alleles", format = "%d") public int nDifferentAlleleSites = 0; private static final boolean TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED = true; private static final boolean REQUIRE_IDENTICAL_ALLELES = false; @@ -57,7 +57,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { final int[][] counts = new int[SiteStatus.values().length][SiteStatus.values().length]; @Override public int getComparisonOrder() { return 2; } - @Override public boolean enabled() { return true; } @Override public void finalizeEvaluation() { @@ -97,7 +96,7 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { } @Override - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( comp != null ) { // we only need to consider sites in comp if ( REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp))) nDifferentAlleleSites++; @@ -107,8 +106,6 @@ public class ValidationReport extends VariantEvaluator implements StandardEval { counts[compStatus.ordinal()][evalStatus.ordinal()]++; } } - - return null; // we don't capture any interesting sites } // diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java index 35a100bd9..039b155da 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java @@ -22,8 +22,6 @@ public abstract class VariantEvaluator implements Comparable { return walker; } - public abstract boolean enabled(); - // Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2 public abstract int getComparisonOrder(); @@ -31,12 +29,10 @@ public abstract class VariantEvaluator implements Comparable { // No longer available. The processed bp is kept in VEW itself for performance reasons // public void update0(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return null; + public void update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - return null; + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { } public void finalizeEvaluation() {} @@ -63,39 +59,6 @@ public abstract class VariantEvaluator implements Comparable { return eval.getAttributeAsBoolean(VariantEvalWalker.IS_SINGLETON_KEY, false); } - /** - * Convenience function that formats the novelty rate as a %.2f string - * - * @param known number of variants from all that are known - * @param all number of all variants - * @return a String novelty rate, or NA if all == 0 - */ - protected static String formattedNoveltyRate(final int known, final int all) { - return formattedPercent(all - known, all); - } - - /** - * Convenience function that formats the novelty rate as a %.2f string - * - * @param x number of objects part of total that meet some criteria - * @param total count of all objects, including x - * @return a String percent rate, or NA if total == 0 - */ - protected static String formattedPercent(final int x, final int total) { - return total == 0 ? "NA" : String.format("%.2f", x / (1.0*total)); - } - - /** - * Convenience function that formats a ratio as a %.2f string - * - * @param num number of observations in the numerator - * @param denom number of observations in the denumerator - * @return a String formatted ratio, or NA if all == 0 - */ - protected static String formattedRatio(final int num, final int denom) { - return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); - } - public String getSimpleName() { return simpleName; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java index 8417faf5f..347ca56b8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java @@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.VariantContext; @@ -44,207 +43,207 @@ import java.util.HashMap; * @since Apr 6, 2010 */ -@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score") -public class VariantQualityScore extends VariantEvaluator { +//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score") +@Deprecated +public class VariantQualityScore { + // TODO - this should really be a stratification - // a mapping from quality score histogram bin to Ti/Tv ratio - @DataPoint(description = "the Ti/Tv ratio broken out by variant quality") - TiTvStats titvStats = null; - - @DataPoint(description = "average variant quality for each allele count") - AlleleCountStats alleleCountStats = null; - - static class TiTvStats extends TableType { - final static int NUM_BINS = 20; - final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately - final long transitionByQuality[] = new long[NUM_BINS]; - final long transversionByQuality[] = new long[NUM_BINS]; - final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out - - public Object[] getRowKeys() { - return new String[]{"sample"}; - } - - public Object[] getColumnKeys() { - final String columnKeys[] = new String[NUM_BINS]; - for( int iii = 0; iii < NUM_BINS; iii++ ) { - columnKeys[iii] = "titvBin" + iii; - } - return columnKeys; - } - - public String getCell(int x, int y) { - return String.valueOf(titvByQuality[y]); - } - - public String toString() { - StringBuffer returnString = new StringBuffer(); - // output the ti/tv array - returnString.append("titvByQuality: "); - for( int iii = 0; iii < NUM_BINS; iii++ ) { - returnString.append(titvByQuality[iii]); - returnString.append(" "); - } - return returnString.toString(); - } - - public void incrValue( final double qual, final boolean isTransition ) { - final Integer qualKey = Math.round((float) qual); - final long numTransition = (isTransition ? 1L : 0L); - final long numTransversion = (isTransition ? 0L : 1L); - if( qualByIsTransition.containsKey(qualKey) ) { - Pair transitionPair = qualByIsTransition.get(qualKey); - transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion); - qualByIsTransition.put(qualKey, transitionPair); - } else { - qualByIsTransition.put(qualKey, new Pair(numTransition,numTransversion)); - } - } - - public void organizeTiTvTables() { - for( int iii = 0; iii < NUM_BINS; iii++ ) { - transitionByQuality[iii] = 0L; - transversionByQuality[iii] = 0L; - titvByQuality[iii] = 0.0; - } - - int maxQual = 0; - - // Calculate the maximum quality score in order to normalize and histogram - for( final Integer qual : qualByIsTransition.keySet() ) { - if( qual > maxQual ) { - maxQual = qual; - } - } - - final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); - - for( final Integer qual : qualByIsTransition.keySet() ) { - final int index = (int)Math.floor( ((double) qual) / binSize ); - if( index >= 0 ) { // BUGBUG: why is there overflow here? - Pair transitionPair = qualByIsTransition.get(qual); - transitionByQuality[index] += transitionPair.getFirst(); - transversionByQuality[index] += transitionPair.getSecond(); - } - } - - for( int iii = 0; iii < NUM_BINS; iii++ ) { - if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio - titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]); - } else { - titvByQuality[iii] = 0.0; - } - } - - } - } - - class AlleleCountStats extends TableType { - final HashMap> qualityListMap = new HashMap>(); - final HashMap qualityMap = new HashMap(); - - public Object[] getRowKeys() { - final int NUM_BINS = qualityListMap.keySet().size(); - final String rowKeys[] = new String[NUM_BINS]; - int iii = 0; - for( final Integer key : qualityListMap.keySet() ) { - rowKeys[iii] = "AC" + key; - iii++; - } - return rowKeys; - - } - - public Object[] getColumnKeys() { - return new String[]{"alleleCount","avgQual"}; - } - - public String getCell(int x, int y) { - int iii = 0; - for( final Integer key : qualityListMap.keySet() ) { - if(iii == x) { - if(y == 0) { return String.valueOf(key); } - else { return String.valueOf(qualityMap.get(key)); } - } - iii++; - } - return null; - } - - public String toString() { - String returnString = ""; - // output the quality map - returnString += "AlleleCountStats: "; - //for( int iii = 0; iii < NUM_BINS; iii++ ) { - // returnString += titvByQuality[iii] + " "; - //} - return returnString; - } - - public void incrValue( final double qual, final int alleleCount ) { - ArrayList list = qualityListMap.get(alleleCount); - if(list==null) { list = new ArrayList(); } - list.add(qual); - qualityListMap.put(alleleCount, list); - } - - public void organizeAlleleCountTables() { - for( final Integer key : qualityListMap.keySet() ) { - final ArrayList list = qualityListMap.get(key); - double meanQual = 0.0; - final double numQuals = (double)list.size(); - for( Double qual : list ) { - meanQual += qual / numQuals; - } - qualityMap.put(key, meanQual); - } - } - } - - //public VariantQualityScore(VariantEvalWalker parent) { - //super(parent); - //} - - public String getName() { - return "VariantQualityScore"; - } - - public int getComparisonOrder() { - return 1; // we only need to see each eval track - } - - public boolean enabled() { - return true; - } - - public String toString() { - return getName(); - } - - public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - final String interesting = null; - - if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) - if( titvStats == null ) { titvStats = new TiTvStats(); } - titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); - - if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); } - int alternateAlleleCount = 0; - for (final Allele a : eval.getAlternateAlleles()) { - alternateAlleleCount += eval.getCalledChrCount(a); - } - alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount); - } - - return interesting; // This module doesn't capture any interesting sites, so return null - } - - public void finalizeEvaluation() { - if( titvStats != null ) { - titvStats.organizeTiTvTables(); - } - if( alleleCountStats != null ) { - alleleCountStats.organizeAlleleCountTables(); - } - } +// public class VariantQualityScore extends VariantEvaluator { +// +// // a mapping from quality score histogram bin to Ti/Tv ratio +// @DataPoint(description = "the Ti/Tv ratio broken out by variant quality") +// TiTvStats titvStats = null; +// +// @DataPoint(description = "average variant quality for each allele count") +// AlleleCountStats alleleCountStats = null; +// +// static class TiTvStats extends TableType { +// final static int NUM_BINS = 20; +// final HashMap> qualByIsTransition = new HashMap>(); // A hashMap holds all the qualities until we are able to bin them appropriately +// final long transitionByQuality[] = new long[NUM_BINS]; +// final long transversionByQuality[] = new long[NUM_BINS]; +// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out +// +// public Object[] getRowKeys() { +// return new String[]{"sample"}; +// } +// +// public Object[] getColumnKeys() { +// final String columnKeys[] = new String[NUM_BINS]; +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// columnKeys[iii] = "titvBin" + iii; +// } +// return columnKeys; +// } +// +// public String getCell(int x, int y) { +// return String.valueOf(titvByQuality[y]); +// } +// +// public String toString() { +// StringBuffer returnString = new StringBuffer(); +// // output the ti/tv array +// returnString.append("titvByQuality: "); +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// returnString.append(titvByQuality[iii]); +// returnString.append(" "); +// } +// return returnString.toString(); +// } +// +// public void incrValue( final double qual, final boolean isTransition ) { +// final Integer qualKey = Math.round((float) qual); +// final long numTransition = (isTransition ? 1L : 0L); +// final long numTransversion = (isTransition ? 0L : 1L); +// if( qualByIsTransition.containsKey(qualKey) ) { +// Pair transitionPair = qualByIsTransition.get(qualKey); +// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion); +// qualByIsTransition.put(qualKey, transitionPair); +// } else { +// qualByIsTransition.put(qualKey, new Pair(numTransition,numTransversion)); +// } +// } +// +// public void organizeTiTvTables() { +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// transitionByQuality[iii] = 0L; +// transversionByQuality[iii] = 0L; +// titvByQuality[iii] = 0.0; +// } +// +// int maxQual = 0; +// +// // Calculate the maximum quality score in order to normalize and histogram +// for( final Integer qual : qualByIsTransition.keySet() ) { +// if( qual > maxQual ) { +// maxQual = qual; +// } +// } +// +// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); +// +// for( final Integer qual : qualByIsTransition.keySet() ) { +// final int index = (int)Math.floor( ((double) qual) / binSize ); +// if( index >= 0 ) { // BUGBUG: why is there overflow here? +// Pair transitionPair = qualByIsTransition.get(qual); +// transitionByQuality[index] += transitionPair.getFirst(); +// transversionByQuality[index] += transitionPair.getSecond(); +// } +// } +// +// for( int iii = 0; iii < NUM_BINS; iii++ ) { +// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio +// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]); +// } else { +// titvByQuality[iii] = 0.0; +// } +// } +// +// } +// } +// +// class AlleleCountStats extends TableType { +// final HashMap> qualityListMap = new HashMap>(); +// final HashMap qualityMap = new HashMap(); +// +// public Object[] getRowKeys() { +// final int NUM_BINS = qualityListMap.keySet().size(); +// final String rowKeys[] = new String[NUM_BINS]; +// int iii = 0; +// for( final Integer key : qualityListMap.keySet() ) { +// rowKeys[iii] = "AC" + key; +// iii++; +// } +// return rowKeys; +// +// } +// +// public Object[] getColumnKeys() { +// return new String[]{"alleleCount","avgQual"}; +// } +// +// public String getCell(int x, int y) { +// int iii = 0; +// for( final Integer key : qualityListMap.keySet() ) { +// if(iii == x) { +// if(y == 0) { return String.valueOf(key); } +// else { return String.valueOf(qualityMap.get(key)); } +// } +// iii++; +// } +// return null; +// } +// +// public String toString() { +// String returnString = ""; +// // output the quality map +// returnString += "AlleleCountStats: "; +// //for( int iii = 0; iii < NUM_BINS; iii++ ) { +// // returnString += titvByQuality[iii] + " "; +// //} +// return returnString; +// } +// +// public void incrValue( final double qual, final int alleleCount ) { +// ArrayList list = qualityListMap.get(alleleCount); +// if(list==null) { list = new ArrayList(); } +// list.add(qual); +// qualityListMap.put(alleleCount, list); +// } +// +// public void organizeAlleleCountTables() { +// for( final Integer key : qualityListMap.keySet() ) { +// final ArrayList list = qualityListMap.get(key); +// double meanQual = 0.0; +// final double numQuals = (double)list.size(); +// for( Double qual : list ) { +// meanQual += qual / numQuals; +// } +// qualityMap.put(key, meanQual); +// } +// } +// } +// +// //public VariantQualityScore(VariantEvalWalker parent) { +// //super(parent); +// //} +// +// public String getName() { +// return "VariantQualityScore"; +// } +// +// public int getComparisonOrder() { +// return 1; // we only need to see each eval track +// } +// +// public String toString() { +// return getName(); +// } +// +// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { +// final String interesting = null; +// +// if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites) +// if( titvStats == null ) { titvStats = new TiTvStats(); } +// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval)); +// +// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); } +// int alternateAlleleCount = 0; +// for (final Allele a : eval.getAlternateAlleles()) { +// alternateAlleleCount += eval.getCalledChrCount(a); +// } +// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount); +// } +// +// return interesting; // This module doesn't capture any interesting sites, so return null +// } +// +// public void finalizeEvaluation() { +// if( titvStats != null ) { +// titvStats.organizeTiTvTables(); +// } +// if( alleleCountStats != null ) { +// alleleCountStats.organizeAlleleCountTables(); +// } +// } } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java index 64161ac34..7b11704c7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -169,8 +170,6 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { } } - @Override public boolean enabled() { return true; } - public int getComparisonOrder() { return 2; // we only need to see each eval track } @@ -207,8 +206,8 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { return false; } - public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - if ( eval == null || eval.isMonomorphicInSamples() ) return null; + public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( eval == null || eval.isMonomorphicInSamples() ) return; final Type type = getType(eval); @@ -243,14 +242,12 @@ public class VariantSummary extends VariantEvaluator implements StandardEval { depthPerSample.inc(type, g.getSampleName()); } } - - return null; // we don't capture any interesting sites } private String noveltyRate(Type type) { final int all = allVariantCounts.all(type); final int known = knownVariantCounts.all(type); - return formattedNoveltyRate(known, all); + return Utils.formattedNoveltyRate(known, all); } public void finalizeEvaluation() { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java new file mode 100755 index 000000000..500ab8e65 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/GenotypePhasingEvaluator.java @@ -0,0 +1,361 @@ +//package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.genotypePhasingEvaluator; +// +//import org.apache.log4j.Logger; +//import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +//import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +//import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +//import org.broadinstitute.sting.gatk.walkers.phasing.AllelePair; +//import org.broadinstitute.sting.gatk.walkers.phasing.ReadBackedPhasingWalker; +//import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker; +//import org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.VariantEvaluator; +//import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; +//import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint; +//import org.broadinstitute.sting.gatk.walkers.varianteval.util.EvaluationContext; +//import org.broadinstitute.sting.utils.GenomeLoc; +//import org.broadinstitute.sting.utils.MathUtils; +//import org.broadinstitute.sting.utils.variantcontext.Genotype; +//import org.broadinstitute.sting.utils.variantcontext.GenotypesContext; +//import org.broadinstitute.sting.utils.variantcontext.VariantContext; +// +//import java.util.HashMap; +//import java.util.HashSet; +//import java.util.Set; +// +///* +// * Copyright (c) 2010 The Broad Institute +// * +// * Permission is hereby granted, free of charge, to any person +// * obtaining a copy of this software and associated documentation +// * files (the "Software"), to deal in the Software without +// * restriction, including without limitation the rights to use, +// * copy, modify, merge, publish, distribute, sublicense, and/or sell +// * copies of the Software, and to permit persons to whom the +// * Software is furnished to do so, subject to the following +// * conditions: +// * +// * The above copyright notice and this permission notice shall be +// * included in all copies or substantial portions of the Software. +// * +// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +// * THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// */ +// +//@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks") +//public class GenotypePhasingEvaluator extends VariantEvaluator { +// protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class); +// +// // a mapping from sample to stats +// @DataPoint(description = "the phasing statistics for each sample") +// public SamplePhasingStatistics samplePhasingStatistics = null; +// +// SamplePreviousGenotypes samplePrevGenotypes = null; +// +// double minPhaseQuality = 10.0; +// +// public void initialize(VariantEvalWalker walker) { +// super.initialize(walker); +// this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality()); +// this.samplePrevGenotypes = new SamplePreviousGenotypes(); +// } +// +// public String getName() { +// return "GenotypePhasingEvaluator"; +// } +// +// public int getComparisonOrder() { +// return 2; // we only need to see pairs of (comp, eval) +// } +// +// public String toString() { +// return getName() + ":
"; +// } +// +// public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { +// update2(eval,comp,tracker,ref,context,null); +// } +// +// public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, EvaluationContext group) { +// //public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) { +// Reasons interesting = new Reasons(); +// if (ref == null) +// return interesting.toString(); +// GenomeLoc curLocus = ref.getLocus(); +// +// logger.debug("update2() locus: " + curLocus); +// logger.debug("comp = " + comp + " eval = " + eval); +// +// Set allSamples = new HashSet(); +// +// GenotypesContext compSampGenotypes = null; +// if (isRelevantToPhasing(comp)) { +// allSamples.addAll(comp.getSampleNames()); +// compSampGenotypes = comp.getGenotypes(); +// } +// +// GenotypesContext evalSampGenotypes = null; +// if (isRelevantToPhasing(eval)) { +// allSamples.addAll(eval.getSampleNames()); +// evalSampGenotypes = eval.getGenotypes(); +// } +// +// for (String samp : allSamples) { +// logger.debug("sample = " + samp); +// +// Genotype compSampGt = null; +// if (compSampGenotypes != null) +// compSampGt = compSampGenotypes.get(samp); +// +// Genotype evalSampGt = null; +// if (evalSampGenotypes != null) +// evalSampGt = evalSampGenotypes.get(samp); +// +// if (compSampGt == null || evalSampGt == null || compSampGt.isNoCall() || evalSampGt.isNoCall()) { // Since either comp or eval (or both) are missing the site, the best we can do is hope to preserve phase [if the non-missing one preserves phase] +// // Having an unphased site breaks the phasing for the sample [does NOT permit "transitive phasing"] - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]: +// if (isNonNullButUnphased(compSampGt) || isNonNullButUnphased(evalSampGt)) +// samplePrevGenotypes.put(samp, null); +// } +// else { // Both comp and eval have a non-null Genotype at this site: +// AllelePair compAllelePair = new AllelePair(compSampGt); +// AllelePair evalAllelePair = new AllelePair(evalSampGt); +// +// boolean breakPhasing = false; +// if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom()) +// breakPhasing = true; // since they are not both het or both hom +// else { // both are het, or both are hom: +// boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair)); +// boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair)); +// if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop) +// breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample +// } +// +// if (breakPhasing) { +// samplePrevGenotypes.put(samp, null); // nothing to do for this site, AND must remove any history for the future +// } +// else if (compSampGt.isHet() && evalSampGt.isHet()) { +// /* comp and eval have the HET same Genotype at this site: +// [Note that if both are hom, then nothing is done here, but the het history IS preserved]. +// */ +// CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp); +// if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome +// prevCompAndEval = null; +// +// // Replace the previous hets with the current hets: +// samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt); +// +// if (prevCompAndEval != null) { +// GenomeLoc prevLocus = prevCompAndEval.getLocus(); +// logger.debug("Potentially phaseable het locus: " + curLocus + " [relative to previous het locus: " + prevLocus + "]"); +// PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp); +// +// boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt); +// boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt); +// if (compSampIsPhased || evalSampIsPhased) { +// if (!evalSampIsPhased) { +// ps.onlyCompPhased++; +// //interesting.addReason("ONLY_COMP", samp, group, prevLocus, ""); +// } +// else if (!compSampIsPhased) { +// ps.onlyEvalPhased++; +// //interesting.addReason("ONLY_EVAL", samp, group, prevLocus, ""); +// } +// else { // both comp and eval are phased: +// AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye()); +// AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype()); +// +// // Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample: +// boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair)); +// boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair)); +// +// if (topsMatch || topMatchesBottom) { +// ps.phasesAgree++; +// +// Double compPQ = getPQ(compSampGt); +// Double evalPQ = getPQ(evalSampGt); +// if (compPQ != null && evalPQ != null && MathUtils.compareDoubles(compPQ, evalPQ) != 0) { +// //interesting.addReason("PQ_CHANGE", samp, group, prevLocus, compPQ + " -> " + evalPQ); +// } +// } +// else { +// ps.phasesDisagree++; +// logger.debug("SWITCHED locus: " + curLocus); +// //interesting.addReason("SWITCH", samp, group, prevLocus, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair)); +// } +// } +// } +// else { +// ps.neitherPhased++; +// } +// } +// } +// } +// } +// logger.debug("\n" + samplePhasingStatistics + "\n"); +// +// return interesting.toString(); +// } +// +// public static boolean isRelevantToPhasing(VariantContext vc) { +// return (vc != null && !vc.isFiltered()); +// } +// +// public boolean isNonNullButUnphased(Genotype gt) { +// return (gt != null && !gt.isNoCall() && !genotypesArePhasedAboveThreshold(gt)); +// } +// +// public boolean genotypesArePhasedAboveThreshold(Genotype gt) { +// if (gt.isHom()) // Can always consider a hom site to be phased to its predecessor, since its successor will only be phased to it if it's hom or "truly" phased +// return true; +// +// if (!gt.isPhased()) +// return false; +// +// Double pq = getPQ(gt); +// return (pq == null || pq >= minPhaseQuality); +// } +// +// public static Double getPQ(Genotype gt) { +// Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1); +// return d == -1 ? null : d; +// } +// +// public static boolean topMatchesTop(AllelePair b1, AllelePair b2) { +// return b1.getTopAllele().equals(b2.getTopAllele()); +// } +// +// public static boolean topMatchesBottom(AllelePair b1, AllelePair b2) { +// return b1.getTopAllele().equals(b2.getBottomAllele()); +// } +// +// public static boolean bottomMatchesTop(AllelePair b1, AllelePair b2) { +// return topMatchesBottom(b2, b1); +// } +// +// public static boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) { +// return b1.getBottomAllele().equals(b2.getBottomAllele()); +// } +// +// public String toString(AllelePair prev, AllelePair cur) { +// return prev.getTopAllele().getBaseString() + "+" + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "+" + cur.getBottomAllele().getBaseString(); +// } +// +// public void finalizeEvaluation() { +// } +// +// private static class Reasons { +// private StringBuilder sb; +// +// public Reasons() { +// sb = new StringBuilder(); +// } +// +//// public void addReason(String category, String sample, VariantEvalWalker.EvaluationContext evalGroup, GenomeLoc prevLoc, String reason) { +//// sb.append(category + "(" + sample + ", previous: " + prevLoc + " [" + evalGroup.compTrackName + ", " + evalGroup.evalTrackName + "]): " + reason + ";"); +//// } +// +// public String toString() { +// if (sb.length() == 0) +// return null; +// +// return "reasons=" + sb.toString(); +// } +// } +//} +// +//class CompEvalGenotypes { +// private GenomeLoc loc; +// private Genotype compGt; +// private Genotype evalGt; +// +// public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) { +// this.loc = loc; +// this.compGt = compGt; +// this.evalGt = evalGt; +// } +// +// public GenomeLoc getLocus() { +// return loc; +// } +// +// public Genotype getCompGenotpye() { +// return compGt; +// } +// public Genotype getEvalGenotype() { +// return evalGt; +// } +//} +// +//class SamplePreviousGenotypes { +// private HashMap sampleGenotypes = null; +// +// public SamplePreviousGenotypes() { +// this.sampleGenotypes = new HashMap(); +// } +// +// public CompEvalGenotypes get(String sample) { +// return sampleGenotypes.get(sample); +// } +// +// public void put(String sample, CompEvalGenotypes compEvalGts) { +// sampleGenotypes.put(sample, compEvalGts); +// } +// +// public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) { +// sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt)); +// } +//} +// +//class PhaseStats { +// public int neitherPhased; +// public int onlyCompPhased; +// public int onlyEvalPhased; +// public int phasesAgree; +// public int phasesDisagree; +// +// public PhaseStats() { +// this.neitherPhased = 0; +// this.onlyCompPhased = 0; +// this.onlyEvalPhased = 0; +// this.phasesAgree = 0; +// this.phasesDisagree = 0; +// } +// +// public String toString() { +// StringBuilder sb = new StringBuilder(); +// sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree); +// return sb.toString(); +// } +// +// public static String[] getFieldNamesArray() { +// return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"}; +// } +// +// public Object getField(int index) { +// switch (index) { +// case (0): +// return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree); +// case (1): +// return neitherPhased; +// case (2): +// return onlyCompPhased; +// case (3): +// return onlyEvalPhased; +// case (4): +// return (phasesAgree + phasesDisagree); +// case (5): +// return phasesAgree; +// case (6): +// return phasesDisagree; +// case (7): +// return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree)); +// default: +// return -1; +// } +// } +//} +// diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/SamplePhasingStatistics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/SamplePhasingStatistics.java new file mode 100644 index 000000000..6b81ce14c --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/genotypePhasingEvaluator/SamplePhasingStatistics.java @@ -0,0 +1,89 @@ +///* +// * Copyright (c) 2012, The Broad Institute +// * +// * Permission is hereby granted, free of charge, to any person +// * obtaining a copy of this software and associated documentation +// * files (the "Software"), to deal in the Software without +// * restriction, including without limitation the rights to use, +// * copy, modify, merge, publish, distribute, sublicense, and/or sell +// * copies of the Software, and to permit persons to whom the +// * Software is furnished to do so, subject to the following +// * conditions: +// * +// * The above copyright notice and this permission notice shall be +// * included in all copies or substantial portions of the Software. +// * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// * OTHER DEALINGS IN THE SOFTWARE. +// */ +// +//package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators.genotypePhasingEvaluator; +// +//import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType; +// +//import java.util.HashMap; +//import java.util.Map; +// +///** +// * a table of sample names to genotype phasing statistics +// */ +//class SamplePhasingStatistics extends TableType { +// private HashMap sampleStats = null; +// private double minPhaseQuality; +// +// public SamplePhasingStatistics(double minPhaseQuality) { +// this.sampleStats = new HashMap(); +// this.minPhaseQuality = minPhaseQuality; +// } +// +// public PhaseStats ensureSampleStats(String samp) { +// PhaseStats ps = sampleStats.get(samp); +// if (ps == null) { +// ps = new PhaseStats(); +// sampleStats.put(samp, ps); +// } +// return ps; +// } +// +// /** +// * @return one row per sample +// */ +// public String[] getRowKeys() { +// return sampleStats.keySet().toArray(new String[sampleStats.size()]); +// } +// +// /** +// * get the column keys +// * +// * @return a list of objects, in this case strings, that are the column names +// */ +// public String[] getColumnKeys() { +// return PhaseStats.getFieldNamesArray(); +// } +// +// public Object getCell(int x, int y) { +// String[] rowKeys = getRowKeys(); +// PhaseStats ps = sampleStats.get(rowKeys[x]); +// return ps.getField(y); +// } +// +// public String getName() { +// return "Sample Phasing Statistics (for PQ >= " + minPhaseQuality + ")"; +// } +// +// public String toString() { +// StringBuilder sb = new StringBuilder(); +// for (Map.Entry sampPhaseStatsEnt : sampleStats.entrySet()) { +// String sample = sampPhaseStatsEnt.getKey(); +// PhaseStats ps = sampPhaseStatsEnt.getValue(); +// +// sb.append(sample + "\t" + ps); +// } +// return sb.toString(); +// } +//} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 319ab96b2..072962436 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -41,15 +41,15 @@ public class AlleleCount extends VariantStratifier { public List getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) { if (eval != null) { - int AC = -1; + int AC = 0; // by default, the site is considered monomorphic + if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) { AC = eval.getAttributeAsInt("AC", 0); } else if ( eval.isVariant() ) { for (Allele allele : eval.getAlternateAlleles()) AC = Math.max(AC, eval.getCalledChrCount(allele)); - } else - // by default, the site is considered monomorphic - AC = 0; + } + return Collections.singletonList((Object) AC); } else { return Collections.emptyList(); @@ -60,4 +60,9 @@ public class AlleleCount extends VariantStratifier { public Set> getIncompatibleEvaluators() { return new HashSet>(Arrays.asList(VariantSummary.class)); } + + @Override + public String getFormat() { + return "%d"; + } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java index 9c70ef00f..01b10c502 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java @@ -16,6 +16,7 @@ import java.util.List; */ public class IndelSize extends VariantStratifier { static final int MAX_INDEL_SIZE = 100; + @Override public void initialize() { for( int a=-MAX_INDEL_SIZE; a <=MAX_INDEL_SIZE; a++ ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java index ec902704e..07ba424a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java @@ -64,7 +64,9 @@ public abstract class VariantStratifier implements Comparable public final String getName() { return name; } - + + public String getFormat() { return "%s"; } + public final ArrayList getAllStates() { return states; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java index 2b37ce210..7f66aad39 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java @@ -7,4 +7,5 @@ import java.lang.annotation.RetentionPolicy; public @interface Analysis { String name() default ""; // its description, required String description(); // its description, required + boolean molten() default false; // if true we'll look for a @Molten map } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java index 793bafdd0..d4e9afd64 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java @@ -46,7 +46,10 @@ public class AnalysisModuleScanner { // what we extracted from the class private Map datums = new LinkedHashMap(); // the data we've discovered private Analysis analysis; // the analysis annotation - + + private Field moltenField = null; + private Molten moltenAnnotation = null; + // private storage of the class type private final Class cls; @@ -85,14 +88,38 @@ public class AnalysisModuleScanner { private void scanFields() { // get the fields from the class, and extract for ( Class superCls = cls; superCls != null; superCls=superCls.getSuperclass() ) { - for (Field f : superCls.getDeclaredFields()) + for (Field f : superCls.getDeclaredFields()) { for (Annotation annotation : getAnnotations(f)) { if (annotation.annotationType().equals(DataPoint.class)) datums.put(f,(DataPoint) annotation); + if ( annotation.annotationType().equals(Molten.class)) { + if ( hasMoltenField() ) + throw new ReviewedStingException("Analysis " + analysis.name() + " has multiple @Molten fields, which is forbidden"); + moltenField = f; + moltenAnnotation = (Molten)annotation; + } } + } + } + + if ( hasMoltenField() ) { + if ( datums.size() > 0 ) + throw new ReviewedStingException("Analysis " + analysis.name() + " has an @Molten field as well as @DataPoint fields, which is forbidden"); } } - + + public Field getMoltenField() { + return moltenField; + } + + public Molten getMoltenAnnotation() { + return moltenAnnotation; + } + + public boolean hasMoltenField() { + return getMoltenField() != null; + } + private Annotation[] getAnnotations(final Field field) { final String fieldName = field.toString(); Annotation[] annotations = annotationCache.get(fieldName); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java index 5679299e2..9363bbd79 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java @@ -25,9 +25,9 @@ public final class EvaluationContext { eval.initialize(walker); evaluationInstances.add(eval); } catch (InstantiationException e) { - throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'"); + throw new ReviewedStingException("Unable to instantiate eval module '" + c.getSimpleName() + "'", e); } catch (IllegalAccessException e) { - throw new StingException("Illegal access error when trying to instantiate eval module '" + c.getSimpleName() + "'"); + throw new ReviewedStingException("Illegal access error when trying to instantiate eval module '" + c.getSimpleName() + "'", e); } } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java deleted file mode 100644 index a6c86d3da..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/IndelHistogram.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2012, The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.gatk.walkers.varianteval.util; - -import org.broadinstitute.sting.utils.variantcontext.Allele; - -import java.util.*; - -/** - * Simple utility for histogramming indel lengths - * - * Based on code from chartl - * - * @author Mark DePristo - * @since 3/21/12 - */ -public class IndelHistogram extends TableType { - private final boolean asFrequencies; - int nIndels = 0, nTooBigDeletions = 0, nTooBigInsertions = 0; - private final Integer[] rowKeys; - - private Map frequencies = null; - private final Map counts = new HashMap(); - - public IndelHistogram(int maxSize, boolean asFrequencies) { - this.asFrequencies = asFrequencies; - initializeCounts(maxSize); - this.rowKeys = new ArrayList(counts.keySet()).toArray(new Integer[maxSize]); - } - - private void initializeCounts(int size) { - for ( int i = -size; i <= size; i++ ) { - if ( i != 0 ) counts.put(i, 0); - } - } - - @Override - public String getRowName() { - return "Length"; - } - - @Override - public Object[] getColumnKeys() { - return new String[]{"Count"}; - } - - @Override - public Object[] getRowKeys() { - return rowKeys; - } - - @Override - public Object getCell(int row, int col) { - final int key = (Integer)getRowKeys()[row]; - if ( asFrequencies ) { - if ( frequencies == null ) { - frequencies = new HashMap(); - for ( final int len : counts.keySet() ) { - final double value = nIndels == 0 ? 0.0 : counts.get(len) / (1.0 * nIndels); - frequencies.put(len, value); - } - } - return frequencies.get(key); - } - return counts.get(key); - } - - public int getnTooBigDeletions() { - return nTooBigDeletions; - } - - public int getnTooBigInsertions() { - return nTooBigInsertions; - } - - public void update(final Allele ref, final Allele alt) { - final int alleleSize = alt.length() - ref.length(); - update(alleleSize); - } - - public void update(int len) { - if ( counts.containsKey(len) ) { - nIndels++; - counts.put(len, counts.get(len) + 1); - } else if ( len < 0 ) { - nTooBigDeletions++; - } else { - nTooBigInsertions++; - } - } -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java new file mode 100755 index 000000000..4d9b74912 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.varianteval.util; + +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; + +/** + * @Molten for @Analysis modules. + * + * If you are flagged as a molten analysis, then there must be one and + * only one annotation in that evaluation module: @Molten which + * must have time Map. This data set will then + * be represented in the VE output as: + * + * variable value + * key1 value1 + * key2 value1 + * ... + * keyN valueN + * + * in the output table + */ +@Retention(RetentionPolicy.RUNTIME) +public @interface Molten { + String description() default ""; // the description, optional + String variableFormat() default ""; + String valueFormat() default ""; +} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java deleted file mode 100644 index 6ab7d1af3..000000000 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/TableType.java +++ /dev/null @@ -1,19 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.varianteval.util; - - -/** - * - * @author aaron - * - * Class TableType - * - * an interface for turning arbritary objects into tables - */ -public abstract class TableType { - public abstract Object[] getRowKeys(); - public abstract Object[] getColumnKeys(); - public abstract Object getCell(int x, int y); - public String getName() { return getClass().getSimpleName(); } - public String getRowName() { return "row"; } - -} diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index 965a18118..8a62bd032 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -180,56 +180,6 @@ public class VariantEvalUtils { return evals; } - /** - * Initialize the output report - * - * @param stratificationObjects the stratifications to use - * @param evaluationObjects the evaluations to use - * @return an initialized report object - */ - public GATKReport initializeGATKReport(Collection stratificationObjects, Set> evaluationObjects) { - final GATKReport report = new GATKReport(); - - for (Class ve : evaluationObjects) { - final String tableName = ve.getSimpleName(); - final String tableDesc = ve.getAnnotation(Analysis.class).description(); - - report.addTable(tableName, tableDesc); - - final GATKReportTable table = report.getTable(tableName); - table.addPrimaryKey("entry", false); - table.addColumn(tableName, tableName); - - for (final VariantStratifier vs : stratificationObjects) { - final String columnName = vs.getName(); - table.addColumn(columnName, "unknown"); - } - - try { - final VariantEvaluator vei = ve.newInstance(); - vei.initialize(variantEvalWalker); - - AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei); - Map datamap = scanner.getData(); - - for (Field field : datamap.keySet()) { - field.setAccessible(true); - - if (!(field.get(vei) instanceof TableType)) { - final String format = datamap.get(field).format(); - table.addColumn(field.getName(), true, format); - } - } - } catch (InstantiationException e) { - throw new StingException("InstantiationException: " + e); - } catch (IllegalAccessException e) { - throw new StingException("IllegalAccessException: " + e); - } - } - - return report; - } - /** * Subset a VariantContext to a single sample * diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java index f91066b0c..4817966fe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java @@ -743,4 +743,37 @@ public class Utils { return nStates; } } + + /** + * Convenience function that formats the novelty rate as a %.2f string + * + * @param known number of variants from all that are known + * @param all number of all variants + * @return a String novelty rate, or NA if all == 0 + */ + public static String formattedNoveltyRate(final int known, final int all) { + return formattedPercent(all - known, all); + } + + /** + * Convenience function that formats the novelty rate as a %.2f string + * + * @param x number of objects part of total that meet some criteria + * @param total count of all objects, including x + * @return a String percent rate, or NA if total == 0 + */ + public static String formattedPercent(final long x, final long total) { + return total == 0 ? "NA" : String.format("%.2f", (100.0*x) / total); + } + + /** + * Convenience function that formats a ratio as a %.2f string + * + * @param num number of observations in the numerator + * @param denom number of observations in the denumerator + * @return a String formatted ratio, or NA if all == 0 + */ + public static String formattedRatio(final long num, final long denom) { + return denom == 0 ? "NA" : String.format("%.2f", num / (1.0 * denom)); + } } diff --git a/public/java/test/org/broadinstitute/sting/BaseTest.java b/public/java/test/org/broadinstitute/sting/BaseTest.java index a415481fd..c49adf805 100755 --- a/public/java/test/org/broadinstitute/sting/BaseTest.java +++ b/public/java/test/org/broadinstitute/sting/BaseTest.java @@ -84,7 +84,7 @@ public abstract class BaseTest { public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list"; public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list"; - public static final boolean REQUIRE_NETWORK_CONNECTION = false; + public static final boolean REQUIRE_NETWORK_CONNECTION = true; public static final String networkTempDir; public static final File networkTempDirFile; diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 14bf24b29..a796f2214 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -55,7 +55,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("add8b2213c091a41f5d7a2c8dd68c03a") + Arrays.asList("e87932ffa1d310cecee49e7829a0f056") ); executeTest("testFunctionClassWithSnpeff", spec); } @@ -75,7 +75,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("621a712deb01e7fc7e5a13d3627b11ba") + Arrays.asList("8279ee42a6785f9c2b3dda8d82674e00") ); executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec); } @@ -95,7 +95,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("7a726ecbedd722fa7cd4de3e023b7a82") + Arrays.asList("0bac64d5615f901d3005247c6d016549") ); executeTest("testFundamentalsCountVariantsSNPsandIndels", spec); } @@ -116,7 +116,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("95bb4a4267a8f29dd7a8169561499f20") + Arrays.asList("b84d8b4429116c887ceb5489c8782f00") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec); } @@ -138,7 +138,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9b51029083495935823fb0447a2857b9") + Arrays.asList("e4f37642d9113a65fbe8bc1d091c206f") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec); } @@ -159,7 +159,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("318b5fbbc61e2fc11d49369359812edd") + Arrays.asList("c5412ee824b4815dc8eea62a4c5462ef") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithCpG", spec); } @@ -180,7 +180,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("74c02df2ef69dda231a2aec2a948747b") + Arrays.asList("1d42e97643afd3e7f5f8c9f6416c5883") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithFunctionalClass", spec); } @@ -201,7 +201,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2d97b1fe15e532e89803ba7ba347ff20") + Arrays.asList("8c2ba70bed2f0fdb0ca371f7038819ef") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithDegeneracy", spec); } @@ -222,7 +222,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("474cbc231ddbc4ba299ffe61a17405b6") + Arrays.asList("c912b4b0bf1925d042119b301c183b93") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithSample", spec); } @@ -245,7 +245,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("2cc9bc4bbe8b4edb6dc27642ec41f66e") + Arrays.asList("dea3d2cc53265ff8ed2f0030c40f3747") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithJexlExpression", spec); } @@ -270,7 +270,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("00c94cf3e14bc2855d39bbefa27f9bb2") + Arrays.asList("dede22b15936c38e29b850c805c7b706") ); executeTest("testFundamentalsCountVariantsSNPsandIndelsWithMultipleJexlExpressions", spec); } @@ -289,7 +289,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("a0c0d4805db1245aa30a306aa506096f") + Arrays.asList("9a94c4c613bf69feb3d9579c353baaf2") ); executeTest("testFundamentalsCountVariantsNoCompRod", spec); } @@ -312,7 +312,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", 1, - Arrays.asList("70da6a0f91a9f1052d68fc360cc99aed")); + Arrays.asList("9bbc762f459023af0480774eb2986af4")); executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); } @@ -330,7 +330,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("2282523336c24d434d1cc0eb1697b4f9")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("8a7f23063fd7f3a292e5da36778e109e")); executeTestParallel("testCompVsEvalAC",spec); } @@ -348,7 +348,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompOverlap() { String extraArgs = "-T VariantEval -R " + b37KGReference + " -L " + validationDataLocation + "VariantEval/pacbio.hg19.intervals --comp:comphapmap " + comparisonDataLocation + "Validated/HapMap/3.3/genotypes_r27_nr.b37_fwd.vcf --eval " + validationDataLocation + "VariantEval/pacbio.ts.recalibrated.vcf -noEV -EV CompOverlap -sn NA12878 -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("00241ce70476187a2f910606b9242697")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("98f9c2f5fef43dbda688d32360908615")); executeTestParallel("testCompOverlap",spec); } @@ -360,7 +360,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --dbsnp " + b37dbSNP132 + " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ec321fcc424fbad74a4a74e739173d03")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("70b0e5b154f3e59e06188e876bbf083f")); executeTestParallel("testEvalTrackWithoutGenotypes",spec); } @@ -372,7 +372,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval:evalBI " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bi.sites.vcf" + " --eval:evalBC " + validationDataLocation + "VariantEval/ALL.20100201.chr20.bc.sites.vcf" + " -noST -ST Novelty -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("ccaea6245086552cd63f828eabddfaf3")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("9fe68cc45d9afc5210ccfc8d555722fd")); executeTestParallel("testMultipleEvalTracksWithoutGenotypes",spec); } @@ -389,13 +389,13 @@ public class VariantEvalIntegrationTest extends WalkerTest { " -noST -noEV -ST Novelty -EV CompOverlap" + " -o %s"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("15f6a6ba4f7fed49c617589ce9fdcbc5")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("d0218c5435c8601f2355b7d183ab032f")); executeTestParallel("testMultipleCompTracks",spec); } @Test public void testPerSampleAndSubsettedSampleHaveSameResults1() { - String md5 = "bcf55537db0762b8fd68f7f02439c475"; + String md5 = "b5cd5c286d459b8edd4ca54320e560a3"; WalkerTestSpec spec = new WalkerTestSpec( buildCommandLine( @@ -450,7 +450,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9954c769ef37c47d3b61481ab0807be0") + Arrays.asList("1198bfea6183bd43219071a84c79a386") ); executeTest("testAlleleCountStrat", spec); } @@ -471,7 +471,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("c0d69ce7647a575d166d8bab5aa16299") + Arrays.asList("6decba040051daafad4ecad5a411e1e1") ); executeTest("testIntervalStrat", spec); } @@ -488,7 +488,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9a8ffb506118c1bde6f7bfadc4fb6f10") + Arrays.asList("c428a76df5039e1e035e3ce45e819d4f") ); executeTest("testModernVCFWithLargeIndels", spec); }