Fixed output report to properly handle evaluation modules with TableType objects. Promoted CpG to a standard stratification. Demoted Filter to a non-standard stratification. Now, if the filter stratification is not specified, VariantEval only evaluates PASSing sites.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5084 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2dcce58279
commit
22e599ec76
|
|
@ -132,7 +132,7 @@ public class GATKReportTable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a primary key column. This becomes the unique identifier for every column in the table, and will always be printed as the first column.
|
* Add a primary key column. This becomes the unique identifier for every column in the table.
|
||||||
*
|
*
|
||||||
* @param primaryKeyName the name of the primary key column
|
* @param primaryKeyName the name of the primary key column
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -79,6 +80,10 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
@Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)")
|
@Argument(fullName="doNotUseAllStandardModules", shortName="noEV", doc="Do not use the standard modules by default (instead, only those that are specified with the -E option)")
|
||||||
protected Boolean NO_STANDARD_MODULES = false;
|
protected Boolean NO_STANDARD_MODULES = false;
|
||||||
|
|
||||||
|
// Other arguments
|
||||||
|
@Argument(fullName="minPhaseQuality", shortName="mpq", doc="Minimum phasing quality", required=false)
|
||||||
|
protected double MIN_PHASE_QUALITY = 10.0;
|
||||||
|
|
||||||
// Variables
|
// Variables
|
||||||
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions = new TreeSet<VariantContextUtils.JexlVCMatchExp>();
|
private Set<VariantContextUtils.JexlVCMatchExp> jexlExpressions = new TreeSet<VariantContextUtils.JexlVCMatchExp>();
|
||||||
private Set<String> compNames = new TreeSet<String>();
|
private Set<String> compNames = new TreeSet<String>();
|
||||||
|
|
@ -258,16 +263,16 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
} else {
|
} else {
|
||||||
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
|
HashMap<StateKey, NewEvaluationContext> necs = new HashMap<StateKey, NewEvaluationContext>();
|
||||||
|
|
||||||
StateKey statekey = new StateKey();
|
StateKey stateKey = new StateKey();
|
||||||
for ( VariantStratifier vs : ec.keySet() ) {
|
for ( VariantStratifier vs : ec.keySet() ) {
|
||||||
String state = ec.get(vs);
|
String state = ec.get(vs);
|
||||||
|
|
||||||
statekey.put(vs.getClass().getSimpleName(), state);
|
stateKey.put(vs.getClass().getSimpleName(), state);
|
||||||
}
|
}
|
||||||
|
|
||||||
ec.addEvaluationClassList(evaluationObjects);
|
ec.addEvaluationClassList(this, stateKey, evaluationObjects);
|
||||||
|
|
||||||
necs.put(statekey, ec);
|
necs.put(stateKey, ec);
|
||||||
|
|
||||||
return necs;
|
return necs;
|
||||||
}
|
}
|
||||||
|
|
@ -301,13 +306,33 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
table.addColumn(columnName, "unknown");
|
table.addColumn(columnName, "unknown");
|
||||||
}
|
}
|
||||||
|
|
||||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
try {
|
||||||
|
VariantEvaluator vei = ve.newInstance();
|
||||||
|
vei.initialize(this);
|
||||||
|
|
||||||
|
AnalysisModuleScanner scanner = new AnalysisModuleScanner(vei);
|
||||||
Map<Field, DataPoint> datamap = scanner.getData();
|
Map<Field, DataPoint> datamap = scanner.getData();
|
||||||
|
|
||||||
for (Field field : datamap.keySet()) {
|
for (Field field : datamap.keySet()) {
|
||||||
|
field.setAccessible(true);
|
||||||
|
|
||||||
|
if (field.get(vei) instanceof TableType) {
|
||||||
|
TableType t = (TableType) field.get(vei);
|
||||||
|
|
||||||
|
for ( Object o : t.getColumnKeys() ) {
|
||||||
|
String c = (String) o;
|
||||||
|
table.addColumn(c, 0.0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
table.addColumn(field.getName(), 0.0);
|
table.addColumn(field.getName(), 0.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch (InstantiationException e) {
|
||||||
|
throw new StingException("InstantiationException: " + e);
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
throw new StingException("IllegalAccessException: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return report;
|
return report;
|
||||||
}
|
}
|
||||||
|
|
@ -385,6 +410,13 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
return allowableTypes;
|
return allowableTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subset a VariantContext to a single sample
|
||||||
|
*
|
||||||
|
* @param vc the VariantContext object containing multiple samples
|
||||||
|
* @param sampleName the sample to pull out of the VariantContext
|
||||||
|
* @return a new VariantContext with just the requested sample
|
||||||
|
*/
|
||||||
private VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
|
private VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
|
||||||
ArrayList<String> sampleNames = new ArrayList<String>();
|
ArrayList<String> sampleNames = new ArrayList<String>();
|
||||||
sampleNames.add(sampleName);
|
sampleNames.add(sampleName);
|
||||||
|
|
@ -392,6 +424,13 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
return getSubsetOfVariantContext(vc, sampleNames);
|
return getSubsetOfVariantContext(vc, sampleNames);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subset a VariantContext to a set of samples
|
||||||
|
*
|
||||||
|
* @param vc the VariantContext object containing multiple samples
|
||||||
|
* @param sampleNames the samples to pull out of the VariantContext
|
||||||
|
* @return a new VariantContext with just the requested samples
|
||||||
|
*/
|
||||||
private VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
private VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
||||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
|
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
|
||||||
|
|
||||||
|
|
@ -424,7 +463,7 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
* @param allowableTypes a set of allowable variation types
|
* @param allowableTypes a set of allowable variation types
|
||||||
* @return a mapping of track names to a list of VariantContext objects
|
* @return a mapping of track names to a list of VariantContext objects
|
||||||
*/
|
*/
|
||||||
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, Set<String> sampleNames, EnumSet<VariantContext.Type> allowableTypes) {
|
private HashMap<String, HashMap<String, VariantContext>> bindVariantContexts(RefMetaDataTracker tracker, ReferenceContext ref, Set<String> trackNames, Set<String> sampleNames, EnumSet<VariantContext.Type> allowableTypes, boolean byFilter) {
|
||||||
HashMap<String, HashMap<String, VariantContext>> bindings = new HashMap<String, HashMap<String, VariantContext>>();
|
HashMap<String, HashMap<String, VariantContext>> bindings = new HashMap<String, HashMap<String, VariantContext>>();
|
||||||
|
|
||||||
for ( String trackName : trackNames ) {
|
for ( String trackName : trackNames ) {
|
||||||
|
|
@ -445,15 +484,18 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
sampleNamesMinusAll.add(sampleName);
|
sampleNamesMinusAll.add(sampleName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (byFilter || !vcsub.isFiltered()) {
|
||||||
vcs.put(sampleName, vcsub);
|
vcs.put(sampleName, vcsub);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if ( trackName.contains("eval") ) {
|
if ( trackName.contains("eval") ) {
|
||||||
//logger.info(sampleNamesMinusAll);
|
|
||||||
vc = getSubsetOfVariantContext(vc, sampleNamesMinusAll);
|
vc = getSubsetOfVariantContext(vc, sampleNamesMinusAll);
|
||||||
//logger.info(vc);
|
|
||||||
|
if (byFilter || !vc.isFiltered()) {
|
||||||
vcs.put(ALL_SAMPLE_NAME, vc);
|
vcs.put(ALL_SAMPLE_NAME, vc);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bindings.put(trackName, vcs);
|
bindings.put(trackName, vcs);
|
||||||
}
|
}
|
||||||
|
|
@ -481,24 +523,25 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
|
|
||||||
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames);
|
EnumSet<VariantContext.Type> allowableTypes = getAllowableVariationTypes(tracker, ref, evalNames);
|
||||||
|
|
||||||
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes);
|
|
||||||
|
|
||||||
HashMap<String, HashMap<String, VariantContext>> evalBindings;
|
|
||||||
|
|
||||||
boolean perSampleIsEnabled = false;
|
boolean perSampleIsEnabled = false;
|
||||||
|
boolean byFilter = false;
|
||||||
for (VariantStratifier vs : stratificationObjects) {
|
for (VariantStratifier vs : stratificationObjects) {
|
||||||
if (vs.getClass().getSimpleName().equals("Sample")) {
|
if (vs.getClass().getSimpleName().equals("Sample")) {
|
||||||
perSampleIsEnabled = true;
|
perSampleIsEnabled = true;
|
||||||
break;
|
} else if (vs.getClass().getSimpleName().equals("Filter")) {
|
||||||
|
byFilter = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HashMap<String, HashMap<String, VariantContext>> evalBindings;
|
||||||
if (perSampleIsEnabled) {
|
if (perSampleIsEnabled) {
|
||||||
evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes);
|
evalBindings = bindVariantContexts(tracker, ref, evalNames, sampleNames, allowableTypes, byFilter);
|
||||||
} else {
|
} else {
|
||||||
evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes);
|
evalBindings = bindVariantContexts(tracker, ref, evalNames, allSamplesList, allowableTypes, byFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HashMap<String, HashMap<String, VariantContext>> compBindings = bindVariantContexts(tracker, ref, compNames, allSamplesList, allowableTypes, byFilter);
|
||||||
|
|
||||||
vcs.putAll(compBindings);
|
vcs.putAll(compBindings);
|
||||||
vcs.putAll(evalBindings);
|
vcs.putAll(evalBindings);
|
||||||
|
|
||||||
|
|
@ -552,6 +595,22 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
return stateKeys;
|
return stateKeys;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the number of samples being used
|
||||||
|
* @return the number of samples
|
||||||
|
*/
|
||||||
|
public int getNumSamples() {
|
||||||
|
return sampleNames.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the minimum phasing quality to be used with the GenotypePhasingEvaluator module
|
||||||
|
* @return the minimum phasing quality
|
||||||
|
*/
|
||||||
|
public double getMinPhaseQuality() {
|
||||||
|
return MIN_PHASE_QUALITY;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Collect relevant information from each variant in the supplied VCFs
|
* Collect relevant information from each variant in the supplied VCFs
|
||||||
*/
|
*/
|
||||||
|
|
@ -638,11 +697,40 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
|
|
||||||
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
|
for ( VariantEvaluator ve : nec.getEvaluationClassList().values() ) {
|
||||||
ve.finalizeEvaluation();
|
ve.finalizeEvaluation();
|
||||||
|
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
|
||||||
|
|
||||||
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
AnalysisModuleScanner scanner = new AnalysisModuleScanner(ve);
|
||||||
Map<Field, DataPoint> datamap = scanner.getData();
|
Map<Field, DataPoint> datamap = scanner.getData();
|
||||||
|
|
||||||
GATKReportTable table = report.getTable(ve.getClass().getSimpleName());
|
// handle TableTypes
|
||||||
|
for (Field field : datamap.keySet()) {
|
||||||
|
try {
|
||||||
|
field.setAccessible(true);
|
||||||
|
|
||||||
|
if (field.get(ve) instanceof TableType) {
|
||||||
|
TableType t = (TableType) field.get(ve);
|
||||||
|
|
||||||
|
for (int row = 0; row < t.getRowKeys().length; row++) {
|
||||||
|
String r = (String) t.getRowKeys()[row];
|
||||||
|
|
||||||
|
for ( VariantStratifier vs : stratificationObjects ) {
|
||||||
|
String columnName = vs.getClass().getSimpleName();
|
||||||
|
|
||||||
|
table.set(stateKey.toString() + r, columnName, stateKey.get(vs.getClass().getSimpleName()));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int col = 0; col < t.getColumnKeys().length; col++) {
|
||||||
|
String c = (String) t.getColumnKeys()[col];
|
||||||
|
|
||||||
|
String newStateKey = stateKey.toString() + r;
|
||||||
|
table.set(newStateKey, c, t.getCell(row, col));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
throw new StingException("IllegalAccessException: " + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for ( VariantStratifier vs : stratificationObjects ) {
|
for ( VariantStratifier vs : stratificationObjects ) {
|
||||||
String columnName = vs.getClass().getSimpleName();
|
String columnName = vs.getClass().getSimpleName();
|
||||||
|
|
@ -653,7 +741,10 @@ public class NewVariantEvalWalker extends RodWalker<Integer, Integer> implements
|
||||||
for (Field field : datamap.keySet()) {
|
for (Field field : datamap.keySet()) {
|
||||||
try {
|
try {
|
||||||
field.setAccessible(true);
|
field.setAccessible(true);
|
||||||
|
|
||||||
|
if ( !(field.get(ve) instanceof TableType) ) {
|
||||||
table.set(stateKey.toString(), field.getName(), field.get(ve));
|
table.set(stateKey.toString(), field.getName(), field.get(ve));
|
||||||
|
}
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
throw new ReviewedStingException("Unable to access a data field in a VariantEval analysis module: " + e);
|
throw new ReviewedStingException("Unable to access a data field in a VariantEval analysis module: " + e);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,12 +3,12 @@ package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluato
|
||||||
import org.broad.tribble.util.variantcontext.Genotype;
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
class CompEvalGenotypes {
|
class NewCompEvalGenotypes {
|
||||||
private GenomeLoc loc;
|
private GenomeLoc loc;
|
||||||
private Genotype compGt;
|
private Genotype compGt;
|
||||||
private Genotype evalGt;
|
private Genotype evalGt;
|
||||||
|
|
||||||
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
public NewCompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||||
this.loc = loc;
|
this.loc = loc;
|
||||||
this.compGt = compGt;
|
this.compGt = compGt;
|
||||||
this.evalGt = evalGt;
|
this.evalGt = evalGt;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,793 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.StandardEval;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Genotype Concordance", description = "Determine the genotype concordance between the genotypes in difference tracks")
|
||||||
|
public class GenotypeConcordance extends org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator implements StandardEval {
|
||||||
|
private static final boolean PRINT_INTERESTING_SITES = true;
|
||||||
|
|
||||||
|
protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class);
|
||||||
|
|
||||||
|
// a mapping from allele count to stats
|
||||||
|
@DataPoint(description = "the frequency statistics for each allele")
|
||||||
|
FrequencyStats alleleFreqStats = new FrequencyStats();
|
||||||
|
|
||||||
|
// a mapping from sample to stats
|
||||||
|
@DataPoint(description = "the concordance statistics for each sample")
|
||||||
|
SampleStats sampleStats = null;
|
||||||
|
|
||||||
|
// a mapping from sample to stats summary
|
||||||
|
@DataPoint(description = "the concordance statistics summary for each sample")
|
||||||
|
SampleSummaryStats sampleSummaryStats = null;
|
||||||
|
|
||||||
|
// two histograms of variant quality scores, for true positive and false positive calls
|
||||||
|
@DataPoint(description = "the variant quality score histograms for true positive and false positive calls")
|
||||||
|
QualityScoreHistograms qualityScoreHistograms = null;
|
||||||
|
|
||||||
|
@DataPoint(description = "the concordance statistics summary by allele count")
|
||||||
|
ACSummaryStats alleleCountSummary = null;
|
||||||
|
|
||||||
|
@DataPoint(description = "the concordance statistics by allele count")
|
||||||
|
ACStats alleleCountStats = null;
|
||||||
|
|
||||||
|
private static final int MAX_MISSED_VALIDATION_DATA = 100;
|
||||||
|
|
||||||
|
private boolean discordantInteresting = false;
|
||||||
|
|
||||||
|
private VariantEvalWalker.EvaluationContext group = null;
|
||||||
|
|
||||||
|
static class FrequencyStats implements TableType {
|
||||||
|
class Stats {
|
||||||
|
public Stats(int found, int missed) { nFound = found; nMissed = missed; }
|
||||||
|
public long nFound = 0;
|
||||||
|
public long nMissed = 0;
|
||||||
|
}
|
||||||
|
public HashMap<Integer, Stats> foundMissedByAC = new HashMap<Integer, Stats>();
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
String rows[] = new String[foundMissedByAC.size()];
|
||||||
|
int index = 0;
|
||||||
|
for (int i : foundMissedByAC.keySet()) rows[index++] = "Allele Count " + i;
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return new String[]{"number_found", "number_missing"};
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "FrequencyStats";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCell(int x, int y) {
|
||||||
|
if (x >= foundMissedByAC.size()) throw new IllegalStateException(x + " is greater than the max index of " + (foundMissedByAC.size()-1));
|
||||||
|
if (y == 0) return String.valueOf(foundMissedByAC.get(foundMissedByAC.keySet().toArray(new Integer[foundMissedByAC.size()])[x]).nFound);
|
||||||
|
else return String.valueOf(foundMissedByAC.get(foundMissedByAC.keySet().toArray(new Integer[foundMissedByAC.size()])[x]).nMissed);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementFoundCount(int alleleFreq) {
|
||||||
|
if (!foundMissedByAC.containsKey(alleleFreq))
|
||||||
|
foundMissedByAC.put(alleleFreq,new Stats(1,0));
|
||||||
|
else
|
||||||
|
foundMissedByAC.get(alleleFreq).nFound++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrementMissedCount(int alleleFreq) {
|
||||||
|
if (!foundMissedByAC.containsKey(alleleFreq))
|
||||||
|
foundMissedByAC.put(alleleFreq,new Stats(0,1));
|
||||||
|
else
|
||||||
|
foundMissedByAC.get(alleleFreq).nMissed++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class QualityScoreHistograms implements TableType {
|
||||||
|
final static int NUM_BINS = 20;
|
||||||
|
final HashMap<Integer,Integer> truePositiveQualityScoreMap = new HashMap<Integer,Integer>(); // A HashMap holds all the quality scores until we are able to bin them appropriately
|
||||||
|
final HashMap<Integer,Integer> falsePositiveQualityScoreMap = new HashMap<Integer,Integer>();
|
||||||
|
final int truePositiveHist[] = new int[NUM_BINS]; // the final histograms that get reported out
|
||||||
|
final int falsePositiveHist[] = new int[NUM_BINS];
|
||||||
|
final String[] rowKeys = new String[]{"true_positive_hist", "false_positive_hist"};
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return rowKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
final String columnKeys[] = new String[NUM_BINS];
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
columnKeys[iii] = "histBin" + iii;
|
||||||
|
}
|
||||||
|
return columnKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "QualityScoreHistogram";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCell(int x, int y) {
|
||||||
|
if( x == 0 ) {
|
||||||
|
return String.valueOf(truePositiveHist[y]);
|
||||||
|
} else if ( x == 1 ) {
|
||||||
|
return String.valueOf(falsePositiveHist[y]);
|
||||||
|
} else {
|
||||||
|
throw new ReviewedStingException( "Unknown row in " + getName() + ", row = " + x );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
String returnString = "";
|
||||||
|
// output both histogram arrays
|
||||||
|
returnString += "TP: ";
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
returnString += truePositiveHist[iii] + " ";
|
||||||
|
}
|
||||||
|
returnString += "\nFP: ";
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
returnString += falsePositiveHist[iii] + " ";
|
||||||
|
}
|
||||||
|
return returnString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrValue( final double qual, final boolean isTruePositiveCall ) {
|
||||||
|
HashMap<Integer,Integer> qualScoreMap;
|
||||||
|
if( isTruePositiveCall ) {
|
||||||
|
qualScoreMap = truePositiveQualityScoreMap;
|
||||||
|
} else {
|
||||||
|
qualScoreMap = falsePositiveQualityScoreMap;
|
||||||
|
}
|
||||||
|
final Integer qualKey = Math.round((float) qual);
|
||||||
|
if( qualScoreMap.containsKey(qualKey) ) {
|
||||||
|
qualScoreMap.put(qualKey, qualScoreMap.get(qualKey) + 1);
|
||||||
|
} else {
|
||||||
|
qualScoreMap.put(qualKey, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void organizeHistogramTables() {
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
truePositiveHist[iii] = 0;
|
||||||
|
falsePositiveHist[iii] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int maxQual = 0;
|
||||||
|
|
||||||
|
// Calculate the maximum quality score for both TP and FP calls in order to normalize and histogram
|
||||||
|
for( final Integer qual : truePositiveQualityScoreMap.keySet()) {
|
||||||
|
if( qual > maxQual ) {
|
||||||
|
maxQual = qual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for( final Integer qual : falsePositiveQualityScoreMap.keySet()) {
|
||||||
|
if( qual > maxQual ) {
|
||||||
|
maxQual = qual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1)); //BUGBUG: should be normalized max to min, not max to 0
|
||||||
|
|
||||||
|
for( final Integer qual : truePositiveQualityScoreMap.keySet()) {
|
||||||
|
final int index = (int)Math.floor( ((double)qual) / binSize );
|
||||||
|
if(index >= 0) { //BUGBUG: problem when maxQual is zero?
|
||||||
|
truePositiveHist[ index ] += truePositiveQualityScoreMap.get(qual);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for( final Integer qual : falsePositiveQualityScoreMap.keySet()) {
|
||||||
|
final int index = (int)Math.floor( ((double)qual) / binSize );
|
||||||
|
if(index >= 0) {
|
||||||
|
falsePositiveHist[ index ] += falsePositiveQualityScoreMap.get(qual);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// keep a list of the validation data we saw before the first eval data
|
||||||
|
private HashSet<VariantContext> missedValidationData = new HashSet<VariantContext>();
|
||||||
|
|
||||||
|
|
||||||
|
//public GenotypeConcordance(VariantEvalWalker parent) {
|
||||||
|
// super(parent);
|
||||||
|
// discordantInteresting = parent.DISCORDANT_INTERESTING;
|
||||||
|
//}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "genotypeConcordance";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 2; // we need to see each eval track and each comp track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName() + ": <table>";
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean warnedAboutValidationData = false;
|
||||||
|
|
||||||
|
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||||
|
this.group = group;
|
||||||
|
|
||||||
|
String interesting = null;
|
||||||
|
|
||||||
|
// sanity check that we at least have either eval or validation data
|
||||||
|
if (eval == null && !isValidVC(validation)) {
|
||||||
|
return interesting;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( qualityScoreHistograms == null ) {
|
||||||
|
qualityScoreHistograms = new QualityScoreHistograms();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( alleleCountStats == null && eval != null && validation != null ) {
|
||||||
|
alleleCountStats = new ACStats(eval,validation,Genotype.Type.values().length);
|
||||||
|
alleleCountSummary = new ACSummaryStats(eval, validation);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sampleStats == null) {
|
||||||
|
if (eval != null) {
|
||||||
|
// initialize the concordance table
|
||||||
|
sampleStats = new SampleStats(eval,Genotype.Type.values().length);
|
||||||
|
sampleSummaryStats = new SampleSummaryStats(eval);
|
||||||
|
for (final VariantContext vc : missedValidationData) {
|
||||||
|
determineStats(null, vc);
|
||||||
|
}
|
||||||
|
missedValidationData = null;
|
||||||
|
} else {
|
||||||
|
// todo -- Eric, this results in a memory problem when eval is WEx data but you are using CG calls genome-wide
|
||||||
|
// todo -- perhaps you need should extend the evaluators with an initialize
|
||||||
|
// todo -- method that gets the header (or samples) for the first eval sites?
|
||||||
|
if (missedValidationData.size() > MAX_MISSED_VALIDATION_DATA) {
|
||||||
|
if (!warnedAboutValidationData) {
|
||||||
|
//logger.warn("Too many genotype sites missed before eval site appeared; ignoring");
|
||||||
|
warnedAboutValidationData = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
missedValidationData.add(validation);
|
||||||
|
}
|
||||||
|
return interesting;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
interesting = determineStats(eval, validation);
|
||||||
|
|
||||||
|
return interesting; // we don't capture any interesting sites
|
||||||
|
}
|
||||||
|
|
||||||
|
private String determineStats(final VariantContext eval, final VariantContext validation) {
|
||||||
|
|
||||||
|
String interesting = null;
|
||||||
|
|
||||||
|
final boolean validationIsValidVC = isValidVC(validation);
|
||||||
|
final String evalAC = ( vcHasGoodAC(eval) ) ? String.format("evalAC%d",getAC(eval)) : null ;
|
||||||
|
final String validationAC = ( vcHasGoodAC(validation) ) ? String.format("compAC%d",getAC(validation)) : null;
|
||||||
|
|
||||||
|
// determine concordance for eval data
|
||||||
|
if (eval != null) {
|
||||||
|
for (final String sample : eval.getGenotypes().keySet()) {
|
||||||
|
final Genotype.Type called = eval.getGenotype(sample).getType();
|
||||||
|
final Genotype.Type truth;
|
||||||
|
|
||||||
|
if (!validationIsValidVC || !validation.hasGenotype(sample)) {
|
||||||
|
truth = Genotype.Type.NO_CALL;
|
||||||
|
} else {
|
||||||
|
truth = validation.getGenotype(sample).getType();
|
||||||
|
// interesting = "ConcordanceStatus=FP";
|
||||||
|
if (discordantInteresting && truth.ordinal() != called.ordinal())
|
||||||
|
{
|
||||||
|
interesting = "ConcordanceStatus=" + truth + "/" + called;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sampleStats.incrValue(sample, truth, called);
|
||||||
|
if ( evalAC != null && validationAC != null) {
|
||||||
|
alleleCountStats.incrValue(evalAC,truth,called);
|
||||||
|
alleleCountStats.incrValue(validationAC,truth,called);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// otherwise, mark no-calls for all samples
|
||||||
|
else {
|
||||||
|
final Genotype.Type called = Genotype.Type.NO_CALL;
|
||||||
|
|
||||||
|
for (final String sample : validation.getGenotypes().keySet()) {
|
||||||
|
final Genotype.Type truth = validation.getGenotype(sample).getType();
|
||||||
|
sampleStats.incrValue(sample, truth, called);
|
||||||
|
if ( evalAC != null ) {
|
||||||
|
alleleCountStats.incrValue(evalAC,truth,called);
|
||||||
|
}
|
||||||
|
// print out interesting sites
|
||||||
|
/*
|
||||||
|
if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) {
|
||||||
|
if ( (truth == Genotype.Type.HOM_VAR || truth == Genotype.Type.HET) && called == Genotype.Type.NO_CALL ) {
|
||||||
|
super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation);
|
||||||
|
}
|
||||||
|
if ( (called == Genotype.Type.HOM_VAR || called == Genotype.Type.HET) && truth == Genotype.Type.HOM_REF ) {
|
||||||
|
super.getVEWalker().gcLog.printf("%s FP %s%n", group, validation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// determine allele count concordance () // this is really a FN rate estimate -- CH
|
||||||
|
if (validationIsValidVC && validation.isPolymorphic()) {
|
||||||
|
int trueAlleleCount = 0;
|
||||||
|
for (final Allele a : validation.getAlternateAlleles()) {
|
||||||
|
trueAlleleCount += validation.getChromosomeCount(a);
|
||||||
|
}
|
||||||
|
if (eval != null) {
|
||||||
|
alleleFreqStats.incrementFoundCount(trueAlleleCount);
|
||||||
|
} else {
|
||||||
|
alleleFreqStats.incrementMissedCount(trueAlleleCount);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TP & FP quality score histograms
|
||||||
|
if( eval != null && eval.isPolymorphic() && validationIsValidVC ) {
|
||||||
|
if( eval.getGenotypes().keySet().size() == 1 ) { // single sample calls
|
||||||
|
for( final String sample : eval.getGenotypes().keySet() ) { // only one sample
|
||||||
|
if( validation.hasGenotype(sample) ) {
|
||||||
|
final Genotype truth = validation.getGenotype(sample);
|
||||||
|
qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), !truth.isHomRef() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // multi sample calls
|
||||||
|
qualityScoreHistograms.incrValue( eval.getPhredScaledQual(), validation.isPolymorphic() );
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return interesting;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isValidVC(final VariantContext vc) {
|
||||||
|
return (vc != null && !vc.isFiltered());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finalizeEvaluation() {
|
||||||
|
if( qualityScoreHistograms != null ) {
|
||||||
|
qualityScoreHistograms.organizeHistogramTables();
|
||||||
|
}
|
||||||
|
|
||||||
|
if( sampleSummaryStats != null && sampleStats != null ) {
|
||||||
|
sampleSummaryStats.generateSampleSummaryStats( sampleStats );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( alleleCountSummary != null && alleleCountStats != null ) {
|
||||||
|
alleleCountSummary.generateSampleSummaryStats( alleleCountStats );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean vcHasGoodAC(VariantContext vc) {
|
||||||
|
return ( vc != null && vc.getAlternateAlleles().size() == 1 && vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) );
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getAC(VariantContext vc) {
|
||||||
|
if ( List.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) {
|
||||||
|
return ((List<Integer>) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY)).get(0);
|
||||||
|
} else if ( Integer.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass())) {
|
||||||
|
return (Integer) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY);
|
||||||
|
} else if ( String.class.isAssignableFrom(vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()) ) {
|
||||||
|
// two ways of parsing
|
||||||
|
String ac = (String) vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY);
|
||||||
|
if ( ac.startsWith("[") ) {
|
||||||
|
return Integer.parseInt(ac.replaceAll("\\[","").replaceAll("\\]",""));
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
return Integer.parseInt(ac);
|
||||||
|
} catch ( NumberFormatException e ) {
|
||||||
|
throw new UserException(String.format("The format of the AC field is improperly formatted: AC=%s",ac));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw new UserException(String.format("The format of the AC field does not appear to be of integer-list or String format, class was %s",vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY).getClass()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a table of sample names to genotype concordance figures
|
||||||
|
*/
|
||||||
|
class SampleStats implements TableType {
|
||||||
|
private final int nGenotypeTypes;
|
||||||
|
|
||||||
|
// sample to concordance stats object
|
||||||
|
public final HashMap<String, long[][]> concordanceStats = new HashMap<String, long[][]>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return concordanceStats.keySet().toArray(new String[concordanceStats.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* increment the specified value
|
||||||
|
* @param sample the sample name
|
||||||
|
* @param truth the truth type
|
||||||
|
* @param called the called type
|
||||||
|
*/
|
||||||
|
public void incrValue(String sample, Genotype.Type truth, Genotype.Type called) {
|
||||||
|
if ( concordanceStats.containsKey(sample) )
|
||||||
|
concordanceStats.get(sample)[truth.ordinal()][called.ordinal()]++;
|
||||||
|
else if ( called != Genotype.Type.NO_CALL )
|
||||||
|
throw new UserException.CommandLineException("Sample " + sample + " has not been seen in a previous eval; this analysis module assumes that all samples are present in each variant context");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return new String[]{"total_true_ref","%_ref/ref","n_ref/no-call",
|
||||||
|
"n_ref/ref","n_ref/het","n_ref/hom",
|
||||||
|
"total_true_het","%_het/het","n_het/no-call",
|
||||||
|
"n_het/ref","n_het/het","n_het/hom",
|
||||||
|
"total_true_hom","%_hom/hom","n_hom/no-call",
|
||||||
|
"n_hom/ref","n_hom/het","n_hom/hom"};
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public SampleStats(VariantContext vc, int nGenotypeTypes) {
|
||||||
|
this.nGenotypeTypes = nGenotypeTypes;
|
||||||
|
for (String sample : vc.getGenotypes().keySet())
|
||||||
|
concordanceStats.put(sample, new long[nGenotypeTypes][nGenotypeTypes]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SampleStats(int genotypeTypes) {
|
||||||
|
nGenotypeTypes = genotypeTypes;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
// we have three rows of 6 right now for output (rows: ref, het, hom)
|
||||||
|
Genotype.Type type = Genotype.Type.values()[(y/6)+1]; // get the row type
|
||||||
|
// save some repeat work, get the total every time
|
||||||
|
long total = 0;
|
||||||
|
Object[] rowKeys = getRowKeys();
|
||||||
|
for (int called = 0; called < nGenotypeTypes; called++) {
|
||||||
|
total += concordanceStats.get(rowKeys[x])[type.ordinal()][called];
|
||||||
|
}
|
||||||
|
|
||||||
|
// now get the cell they're interested in
|
||||||
|
switch (y % 6) {
|
||||||
|
case (0): // get the total_true for this type
|
||||||
|
return total;
|
||||||
|
case (1):
|
||||||
|
return total == 0 ? 0.0 : (100.0 * (double) concordanceStats.get(rowKeys[x])[type.ordinal()][type.ordinal()] / (double) total);
|
||||||
|
default:
|
||||||
|
return concordanceStats.get(rowKeys[x])[type.ordinal()][(y % 6) - 2];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Sample Statistics";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sample stats, but for AC
|
||||||
|
*/
|
||||||
|
class ACStats extends SampleStats {
|
||||||
|
private String[] rowKeys;
|
||||||
|
|
||||||
|
public ACStats(VariantContext evalvc, VariantContext compvc, int nGenotypeTypes) {
|
||||||
|
super(nGenotypeTypes);
|
||||||
|
rowKeys = new String[1+2*evalvc.getGenotypes().size()+1+2*compvc.getGenotypes().size()];
|
||||||
|
for ( int i = 0; i <= 2*evalvc.getGenotypes().size(); i++ ) { // todo -- assuming ploidy 2 here...
|
||||||
|
concordanceStats.put(String.format("evalAC%d",i),new long[nGenotypeTypes][nGenotypeTypes]);
|
||||||
|
rowKeys[i] = String.format("evalAC%d",i);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int i = 0; i <= 2*compvc.getGenotypes().size(); i++ ) {
|
||||||
|
concordanceStats.put(String.format("compAC%d",i), new long[nGenotypeTypes][nGenotypeTypes]);
|
||||||
|
rowKeys[1+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Allele Count Statistics";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
if ( rowKeys == null ) {
|
||||||
|
throw new StingException("RowKeys is null!");
|
||||||
|
}
|
||||||
|
return rowKeys;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a table of sample names to genotype concordance summary statistics
|
||||||
|
*/
|
||||||
|
class SampleSummaryStats implements TableType {
|
||||||
|
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||||
|
protected final static String[] COLUMN_KEYS = new String[]{
|
||||||
|
"percent_comp_ref_called_var",
|
||||||
|
"percent_comp_het_called_het",
|
||||||
|
"percent_comp_het_called_var",
|
||||||
|
"percent_comp_hom_called_hom",
|
||||||
|
"percent_comp_hom_called_var",
|
||||||
|
"percent_non-reference_sensitivity",
|
||||||
|
"percent_overall_genotype_concordance",
|
||||||
|
"percent_non-reference_discrepancy_rate"};
|
||||||
|
|
||||||
|
// sample to concordance stats object
|
||||||
|
protected final HashMap<String, double[]> concordanceSummary = new HashMap<String, double[]>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return concordanceSummary.keySet().toArray(new String[concordanceSummary.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return COLUMN_KEYS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SampleSummaryStats(final VariantContext vc) {
|
||||||
|
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
|
||||||
|
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||||
|
concordanceSummary.put(sample, new double[COLUMN_KEYS.length]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SampleSummaryStats() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
final Object[] rowKeys = getRowKeys();
|
||||||
|
return String.format("%.2f",concordanceSummary.get(rowKeys[x])[y]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper routine that sums up all columns / rows found in stats specified by all pairs in d1 x d2
|
||||||
|
*
|
||||||
|
* @param stats
|
||||||
|
* @param d1
|
||||||
|
* @param d2
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private long sumStatsAllPairs( final long[][] stats, EnumSet<Genotype.Type> d1, EnumSet<Genotype.Type> d2 ) {
|
||||||
|
long sum = 0L;
|
||||||
|
|
||||||
|
for(final Genotype.Type e1 : d1 ) {
|
||||||
|
for(final Genotype.Type e2 : d2 ) {
|
||||||
|
sum += stats[e1.ordinal()][e2.ordinal()];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long sumStatsDiag( final long[][] stats, EnumSet<Genotype.Type> d1) {
|
||||||
|
long sum = 0L;
|
||||||
|
|
||||||
|
for(final Genotype.Type e1 : d1 ) {
|
||||||
|
sum += stats[e1.ordinal()][e1.ordinal()];
|
||||||
|
}
|
||||||
|
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double ratio(long numer, long denom) {
|
||||||
|
return denom != 0L ? 100.0 * ( ((double)numer) / ((double)denom) ) : 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
final long[] allSamplesNumerators = new long[COLUMN_KEYS.length];
|
||||||
|
final long[] allSamplesDenominators = new long[COLUMN_KEYS.length];
|
||||||
|
|
||||||
|
private void updateSummaries(int i, double[] summary, long numer, long denom ) {
|
||||||
|
allSamplesNumerators[i] += numer;
|
||||||
|
allSamplesDenominators[i] += denom;
|
||||||
|
summary[i] = ratio(numer, denom);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the five summary stats per sample
|
||||||
|
* @param sampleStats The Map which holds concordance values per sample
|
||||||
|
*/
|
||||||
|
public void generateSampleSummaryStats( final SampleStats sampleStats ) {
|
||||||
|
EnumSet<Genotype.Type> allVariantGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET);
|
||||||
|
EnumSet<Genotype.Type> allCalledGenotypes = EnumSet.of(Genotype.Type.HOM_VAR, Genotype.Type.HET, Genotype.Type.HOM_REF);
|
||||||
|
EnumSet<Genotype.Type> allGenotypes = EnumSet.allOf(Genotype.Type.class);
|
||||||
|
|
||||||
|
for( final String sample : concordanceSummary.keySet() ) {
|
||||||
|
if ( sample.equals(ALL_SAMPLES_KEY) ) continue;
|
||||||
|
|
||||||
|
final long[][] stats = sampleStats.concordanceStats.get(sample);
|
||||||
|
final double[] summary = concordanceSummary.get(sample);
|
||||||
|
if( stats == null ) { throw new ReviewedStingException( "SampleStats and SampleSummaryStats contain different samples! sample = " + sample ); }
|
||||||
|
|
||||||
|
long numer, denom;
|
||||||
|
|
||||||
|
// Summary 0: % ref called as var
|
||||||
|
numer = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_REF), allVariantGenotypes);
|
||||||
|
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_REF), allGenotypes);
|
||||||
|
updateSummaries(0, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 1: % het called as het
|
||||||
|
numer = stats[Genotype.Type.HET.ordinal()][Genotype.Type.HET.ordinal()];
|
||||||
|
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allGenotypes);
|
||||||
|
updateSummaries(1, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 2: % het called as var
|
||||||
|
numer = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allVariantGenotypes);
|
||||||
|
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allGenotypes);
|
||||||
|
updateSummaries(2, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 3: % homVar called as homVar
|
||||||
|
numer = stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||||
|
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allGenotypes);
|
||||||
|
updateSummaries(3, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 4: % homVars called as var
|
||||||
|
numer = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allVariantGenotypes);
|
||||||
|
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allGenotypes);
|
||||||
|
updateSummaries(4, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 5: % non-ref called as non-ref
|
||||||
|
// MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
|
||||||
|
numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes);
|
||||||
|
denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes);
|
||||||
|
updateSummaries(5, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 6: overall genotype concordance of sites called in eval track
|
||||||
|
// MAD: this is the tradition genotype concordance
|
||||||
|
numer = sumStatsDiag(stats, allCalledGenotypes);
|
||||||
|
denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes);
|
||||||
|
updateSummaries(6, summary, numer, denom);
|
||||||
|
|
||||||
|
// Summary 7: overall genotype concordance of sites called non-ref in eval track
|
||||||
|
long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||||
|
long diag = sumStatsDiag(stats, allVariantGenotypes);
|
||||||
|
long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||||
|
numer = allNoHomRef - diag;
|
||||||
|
denom = allNoHomRef;
|
||||||
|
updateSummaries(7, summary, numer, denom);
|
||||||
|
}
|
||||||
|
|
||||||
|
// update the final summary stats
|
||||||
|
final double[] allSamplesSummary = concordanceSummary.get(ALL_SAMPLES_KEY);
|
||||||
|
for ( int i = 0; i < allSamplesSummary.length; i++) {
|
||||||
|
allSamplesSummary[i] = ratio(allSamplesNumerators[i], allSamplesDenominators[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Sample Summary Statistics";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SampleSummaryStats .. but for allele counts
|
||||||
|
*/
|
||||||
|
class ACSummaryStats extends SampleSummaryStats {
|
||||||
|
private String[] rowKeys;
|
||||||
|
|
||||||
|
public ACSummaryStats (final VariantContext evalvc, final VariantContext compvc) {
|
||||||
|
concordanceSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
|
||||||
|
rowKeys = new String[3+2*evalvc.getGenotypes().size() + 2*compvc.getGenotypes().size()];
|
||||||
|
rowKeys[0] = ALL_SAMPLES_KEY;
|
||||||
|
for( int i = 0; i <= 2*evalvc.getGenotypes().size() ; i ++ ) {
|
||||||
|
concordanceSummary.put(String.format("evalAC%d",i), new double[COLUMN_KEYS.length]);
|
||||||
|
rowKeys[i+1] = String.format("evalAC%d",i);
|
||||||
|
}
|
||||||
|
for( int i = 0; i <= 2*compvc.getGenotypes().size() ; i ++ ) {
|
||||||
|
concordanceSummary.put(String.format("compAC%d",i), new double[COLUMN_KEYS.length]);
|
||||||
|
rowKeys[2+2*evalvc.getGenotypes().size()+i] = String.format("compAC%d",i);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Allele Count Summary Statistics";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
if ( rowKeys == null) {
|
||||||
|
throw new StingException("rowKeys is null!!");
|
||||||
|
}
|
||||||
|
return rowKeys;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class CompACNames implements Comparator{
|
||||||
|
|
||||||
|
final Logger myLogger;
|
||||||
|
private boolean info = true;
|
||||||
|
|
||||||
|
public CompACNames(Logger l) {
|
||||||
|
myLogger = l;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
return ( o.getClass() == CompACNames.class );
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compare(Object o1, Object o2) {
|
||||||
|
if ( info ) {
|
||||||
|
myLogger.info("Sorting AC names");
|
||||||
|
info = false;
|
||||||
|
}
|
||||||
|
//System.out.printf("Objects %s %s get ranks %d %d%n",o1.toString(),o2.toString(),getRank(o1),getRank(o2));
|
||||||
|
return getRank(o1) - getRank(o2);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getRank(Object o) {
|
||||||
|
if ( o.getClass() != String.class ) {
|
||||||
|
return Integer.MIN_VALUE/4;
|
||||||
|
} else {
|
||||||
|
String s = (String) o;
|
||||||
|
if ( s.startsWith("eval") ) {
|
||||||
|
return Integer.MIN_VALUE/4 + 1 + parseAC(s);
|
||||||
|
} else if ( s.startsWith("comp") ) {
|
||||||
|
return 1+ parseAC(s);
|
||||||
|
} else {
|
||||||
|
return Integer.MIN_VALUE/4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int parseAC(String s) {
|
||||||
|
String[] g = s.split("AC");
|
||||||
|
return Integer.parseInt(g[1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,422 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.*;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.phasing.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.*;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Genotype Phasing Evaluation", description = "Evaluates the phasing of genotypes in different tracks")
|
||||||
|
public class GenotypePhasingEvaluator extends org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvaluator {
|
||||||
|
protected final static Logger logger = Logger.getLogger(GenotypePhasingEvaluator.class);
|
||||||
|
|
||||||
|
// a mapping from sample to stats
|
||||||
|
@DataPoint(description = "the phasing statistics for each sample")
|
||||||
|
SamplePhasingStatistics samplePhasingStatistics = null;
|
||||||
|
|
||||||
|
SamplePreviousGenotypes samplePrevGenotypes = null;
|
||||||
|
|
||||||
|
double minPhaseQuality = 10.0;
|
||||||
|
|
||||||
|
public void initialize(NewVariantEvalWalker walker) {
|
||||||
|
this.samplePhasingStatistics = new SamplePhasingStatistics(walker.getMinPhaseQuality());
|
||||||
|
this.samplePrevGenotypes = new SamplePreviousGenotypes();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "GenotypePhasingEvaluator";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 2; // we only need to see pairs of (comp, eval)
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName() + ": <table>";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||||
|
Reasons interesting = new Reasons();
|
||||||
|
if (ref == null)
|
||||||
|
return interesting.toString();
|
||||||
|
GenomeLoc curLocus = ref.getLocus();
|
||||||
|
|
||||||
|
logger.debug("update2() locus: " + curLocus);
|
||||||
|
logger.debug("comp = " + comp + " eval = " + eval);
|
||||||
|
|
||||||
|
Set<String> allSamples = new HashSet<String>();
|
||||||
|
|
||||||
|
Map<String, Genotype> compSampGenotypes = null;
|
||||||
|
if (isRelevantToPhasing(comp)) {
|
||||||
|
allSamples.addAll(comp.getSampleNames());
|
||||||
|
compSampGenotypes = comp.getGenotypes();
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, Genotype> evalSampGenotypes = null;
|
||||||
|
if (isRelevantToPhasing(eval)) {
|
||||||
|
allSamples.addAll(eval.getSampleNames());
|
||||||
|
evalSampGenotypes = eval.getGenotypes();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String samp : allSamples) {
|
||||||
|
logger.debug("sample = " + samp);
|
||||||
|
|
||||||
|
Genotype compSampGt = null;
|
||||||
|
if (compSampGenotypes != null)
|
||||||
|
compSampGt = compSampGenotypes.get(samp);
|
||||||
|
|
||||||
|
Genotype evalSampGt = null;
|
||||||
|
if (evalSampGenotypes != null)
|
||||||
|
evalSampGt = evalSampGenotypes.get(samp);
|
||||||
|
|
||||||
|
if (compSampGt == null || evalSampGt == null) { // Since either comp or eval (or both) are missing the site, the best we can do is hope to preserve phase [if the non-missing one preserves phase]
|
||||||
|
// Having an unphased site breaks the phasing for the sample [does NOT permit "transitive phasing"] - hence, must reset phasing knowledge for both comp and eval [put a null CompEvalGenotypes]:
|
||||||
|
if (isNonNullButUnphased(compSampGt) || isNonNullButUnphased(evalSampGt))
|
||||||
|
samplePrevGenotypes.put(samp, null);
|
||||||
|
}
|
||||||
|
else { // Both comp and eval have a non-null Genotype at this site:
|
||||||
|
AllelePair compAllelePair = new AllelePair(compSampGt);
|
||||||
|
AllelePair evalAllelePair = new AllelePair(evalSampGt);
|
||||||
|
|
||||||
|
boolean breakPhasing = false;
|
||||||
|
if (compSampGt.isHet() != evalSampGt.isHet() || compSampGt.isHom() != evalSampGt.isHom())
|
||||||
|
breakPhasing = true; // since they are not both het or both hom
|
||||||
|
else { // both are het, or both are hom:
|
||||||
|
boolean topMatchesTopAndBottomMatchesBottom = (topMatchesTop(compAllelePair, evalAllelePair) && bottomMatchesBottom(compAllelePair, evalAllelePair));
|
||||||
|
boolean topMatchesBottomAndBottomMatchesTop = (topMatchesBottom(compAllelePair, evalAllelePair) && bottomMatchesTop(compAllelePair, evalAllelePair));
|
||||||
|
if (!topMatchesTopAndBottomMatchesBottom && !topMatchesBottomAndBottomMatchesTop)
|
||||||
|
breakPhasing = true; // since the 2 VCFs have different diploid genotypes for this sample
|
||||||
|
}
|
||||||
|
|
||||||
|
if (breakPhasing) {
|
||||||
|
samplePrevGenotypes.put(samp, null); // nothing to do for this site, AND must remove any history for the future
|
||||||
|
}
|
||||||
|
else if (compSampGt.isHet() && evalSampGt.isHet()) {
|
||||||
|
/* comp and eval have the HET same Genotype at this site:
|
||||||
|
[Note that if both are hom, then nothing is done here, but the het history IS preserved].
|
||||||
|
*/
|
||||||
|
CompEvalGenotypes prevCompAndEval = samplePrevGenotypes.get(samp);
|
||||||
|
if (prevCompAndEval != null && !prevCompAndEval.getLocus().onSameContig(curLocus)) // exclude curLocus if it is "phased" relative to a different chromosome
|
||||||
|
prevCompAndEval = null;
|
||||||
|
|
||||||
|
// Replace the previous hets with the current hets:
|
||||||
|
samplePrevGenotypes.put(samp, curLocus, compSampGt, evalSampGt);
|
||||||
|
|
||||||
|
if (prevCompAndEval != null) {
|
||||||
|
GenomeLoc prevLocus = prevCompAndEval.getLocus();
|
||||||
|
logger.debug("Potentially phaseable het locus: " + curLocus + " [relative to previous het locus: " + prevLocus + "]");
|
||||||
|
PhaseStats ps = samplePhasingStatistics.ensureSampleStats(samp);
|
||||||
|
|
||||||
|
boolean compSampIsPhased = genotypesArePhasedAboveThreshold(compSampGt);
|
||||||
|
boolean evalSampIsPhased = genotypesArePhasedAboveThreshold(evalSampGt);
|
||||||
|
if (compSampIsPhased || evalSampIsPhased) {
|
||||||
|
if (!evalSampIsPhased) {
|
||||||
|
ps.onlyCompPhased++;
|
||||||
|
interesting.addReason("ONLY_COMP", samp, group, prevLocus, "");
|
||||||
|
}
|
||||||
|
else if (!compSampIsPhased) {
|
||||||
|
ps.onlyEvalPhased++;
|
||||||
|
interesting.addReason("ONLY_EVAL", samp, group, prevLocus, "");
|
||||||
|
}
|
||||||
|
else { // both comp and eval are phased:
|
||||||
|
AllelePair prevCompAllelePair = new AllelePair(prevCompAndEval.getCompGenotpye());
|
||||||
|
AllelePair prevEvalAllelePair = new AllelePair(prevCompAndEval.getEvalGenotype());
|
||||||
|
|
||||||
|
// Sufficient to check only the top of comp, since we ensured that comp and eval have the same diploid genotypes for this sample:
|
||||||
|
boolean topsMatch = (topMatchesTop(prevCompAllelePair, prevEvalAllelePair) && topMatchesTop(compAllelePair, evalAllelePair));
|
||||||
|
boolean topMatchesBottom = (topMatchesBottom(prevCompAllelePair, prevEvalAllelePair) && topMatchesBottom(compAllelePair, evalAllelePair));
|
||||||
|
|
||||||
|
if (topsMatch || topMatchesBottom) {
|
||||||
|
ps.phasesAgree++;
|
||||||
|
|
||||||
|
Double compPQ = getPQ(compSampGt);
|
||||||
|
Double evalPQ = getPQ(evalSampGt);
|
||||||
|
if (compPQ != null && evalPQ != null && MathUtils.compareDoubles(compPQ, evalPQ) != 0)
|
||||||
|
interesting.addReason("PQ_CHANGE", samp, group, prevLocus, compPQ + " -> " + evalPQ);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.phasesDisagree++;
|
||||||
|
logger.debug("SWITCHED locus: " + curLocus);
|
||||||
|
interesting.addReason("SWITCH", samp, group, prevLocus, toString(prevCompAllelePair, compAllelePair) + " -> " + toString(prevEvalAllelePair, evalAllelePair));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ps.neitherPhased++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
logger.debug("\n" + samplePhasingStatistics + "\n");
|
||||||
|
|
||||||
|
return interesting.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isRelevantToPhasing(VariantContext vc) {
|
||||||
|
return (vc != null && !vc.isFiltered());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isNonNullButUnphased(Genotype gt) {
|
||||||
|
return (gt != null && !genotypesArePhasedAboveThreshold(gt));
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean genotypesArePhasedAboveThreshold(Genotype gt) {
|
||||||
|
if (gt.isHom()) // Can always consider a hom site to be phased to its predecessor, since its successor will only be phased to it if it's hom or "truly" phased
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!gt.isPhased())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
Double pq = getPQ(gt);
|
||||||
|
return (pq == null || pq >= minPhaseQuality);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Double getPQ(Genotype gt) {
|
||||||
|
return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||||
|
return b1.getTopAllele().equals(b2.getTopAllele());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean topMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||||
|
return b1.getTopAllele().equals(b2.getBottomAllele());
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean bottomMatchesTop(AllelePair b1, AllelePair b2) {
|
||||||
|
return topMatchesBottom(b2, b1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean bottomMatchesBottom(AllelePair b1, AllelePair b2) {
|
||||||
|
return b1.getBottomAllele().equals(b2.getBottomAllele());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString(AllelePair prev, AllelePair cur) {
|
||||||
|
return prev.getTopAllele().getBaseString() + "+" + cur.getTopAllele().getBaseString() + "|" + prev.getBottomAllele().getBaseString() + "+" + cur.getBottomAllele().getBaseString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finalizeEvaluation() {
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class Reasons {
|
||||||
|
private StringBuilder sb;
|
||||||
|
|
||||||
|
public Reasons() {
|
||||||
|
sb = new StringBuilder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addReason(String category, String sample, VariantEvalWalker.EvaluationContext evalGroup, GenomeLoc prevLoc, String reason) {
|
||||||
|
sb.append(category + "(" + sample + ", previous: " + prevLoc + " [" + evalGroup.compTrackName + ", " + evalGroup.evalTrackName + "]): " + reason + ";");
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
if (sb.length() == 0)
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return "reasons=" + sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class CompEvalGenotypes {
|
||||||
|
private GenomeLoc loc;
|
||||||
|
private Genotype compGt;
|
||||||
|
private Genotype evalGt;
|
||||||
|
|
||||||
|
public CompEvalGenotypes(GenomeLoc loc, Genotype compGt, Genotype evalGt) {
|
||||||
|
this.loc = loc;
|
||||||
|
this.compGt = compGt;
|
||||||
|
this.evalGt = evalGt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GenomeLoc getLocus() {
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Genotype getCompGenotpye() {
|
||||||
|
return compGt;
|
||||||
|
}
|
||||||
|
public Genotype getEvalGenotype() {
|
||||||
|
return evalGt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setCompGenotype(Genotype compGt) {
|
||||||
|
this.compGt = compGt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setEvalGenotype(Genotype evalGt) {
|
||||||
|
this.evalGt = evalGt;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SamplePreviousGenotypes {
|
||||||
|
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||||
|
|
||||||
|
public SamplePreviousGenotypes() {
|
||||||
|
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||||
|
}
|
||||||
|
|
||||||
|
public CompEvalGenotypes get(String sample) {
|
||||||
|
return sampleGenotypes.get(sample);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void put(String sample, CompEvalGenotypes compEvalGts) {
|
||||||
|
sampleGenotypes.put(sample, compEvalGts);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void put(String sample, GenomeLoc locus, Genotype compGt, Genotype evalGt) {
|
||||||
|
sampleGenotypes.put(sample, new CompEvalGenotypes(locus, compGt, evalGt));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class PhaseStats {
|
||||||
|
public int neitherPhased;
|
||||||
|
public int onlyCompPhased;
|
||||||
|
public int onlyEvalPhased;
|
||||||
|
public int phasesAgree;
|
||||||
|
public int phasesDisagree;
|
||||||
|
|
||||||
|
public PhaseStats() {
|
||||||
|
this.neitherPhased = 0;
|
||||||
|
this.onlyCompPhased = 0;
|
||||||
|
this.onlyEvalPhased = 0;
|
||||||
|
this.phasesAgree = 0;
|
||||||
|
this.phasesDisagree = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
sb.append("Neither phased: " + neitherPhased + "\tOnly Comp: " + onlyCompPhased + "\tOnly Eval: " + onlyEvalPhased + "\tSame phase: " + phasesAgree + "\tOpposite phase: " + phasesDisagree);
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String[] getFieldNamesArray() {
|
||||||
|
return new String[]{"total", "neither", "only_comp", "only_eval", "both", "match", "switch", "switch_rate"};
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getField(int index) {
|
||||||
|
switch (index) {
|
||||||
|
case (0):
|
||||||
|
return (neitherPhased + onlyCompPhased + onlyEvalPhased + phasesAgree + phasesDisagree);
|
||||||
|
case (1):
|
||||||
|
return neitherPhased;
|
||||||
|
case (2):
|
||||||
|
return onlyCompPhased;
|
||||||
|
case (3):
|
||||||
|
return onlyEvalPhased;
|
||||||
|
case (4):
|
||||||
|
return (phasesAgree + phasesDisagree);
|
||||||
|
case (5):
|
||||||
|
return phasesAgree;
|
||||||
|
case (6):
|
||||||
|
return phasesDisagree;
|
||||||
|
case (7):
|
||||||
|
return ((phasesDisagree == 0) ? 0 : ((double) phasesDisagree) / (phasesAgree + phasesDisagree));
|
||||||
|
default:
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a table of sample names to genotype phasing statistics
|
||||||
|
*/
|
||||||
|
class SamplePhasingStatistics implements TableType {
|
||||||
|
private HashMap<String, PhaseStats> sampleStats = null;
|
||||||
|
private double minPhaseQuality;
|
||||||
|
|
||||||
|
public SamplePhasingStatistics(double minPhaseQuality) {
|
||||||
|
this.sampleStats = new HashMap<String, PhaseStats>();
|
||||||
|
this.minPhaseQuality = minPhaseQuality;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PhaseStats ensureSampleStats(String samp) {
|
||||||
|
PhaseStats ps = sampleStats.get(samp);
|
||||||
|
if (ps == null) {
|
||||||
|
ps = new PhaseStats();
|
||||||
|
sampleStats.put(samp, ps);
|
||||||
|
}
|
||||||
|
return ps;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public String[] getRowKeys() {
|
||||||
|
return sampleStats.keySet().toArray(new String[sampleStats.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
*
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public String[] getColumnKeys() {
|
||||||
|
return PhaseStats.getFieldNamesArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
String[] rowKeys = getRowKeys();
|
||||||
|
PhaseStats ps = sampleStats.get(rowKeys[x]);
|
||||||
|
return ps.getField(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "Sample Phasing Statistics (for PQ >= " + minPhaseQuality + ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (Map.Entry<String, PhaseStats> sampPhaseStatsEnt : sampleStats.entrySet()) {
|
||||||
|
String sample = sampPhaseStatsEnt.getKey();
|
||||||
|
PhaseStats ps = sampPhaseStatsEnt.getValue();
|
||||||
|
|
||||||
|
sb.append(sample + "\t" + ps);
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,112 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
|
||||||
|
*
|
||||||
|
* @Author chartl
|
||||||
|
* @Date May 26, 2010
|
||||||
|
*/
|
||||||
|
@Analysis(name = "Indel length histograms", description = "Shows the distrbution of insertion/deletion event lengths (negative for deletion, positive for insertion)")
|
||||||
|
public class IndelLengthHistogram extends VariantEvaluator {
|
||||||
|
private static final int SIZE_LIMIT = 50;
|
||||||
|
@DataPoint(description="Histogram of indel lengths")
|
||||||
|
IndelHistogram indelHistogram = new IndelHistogram(SIZE_LIMIT);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Indel length histogram table object
|
||||||
|
*/
|
||||||
|
|
||||||
|
static class IndelHistogram implements TableType {
|
||||||
|
private Integer[] colKeys;
|
||||||
|
private int limit;
|
||||||
|
private String[] rowKeys = {"EventLength"};
|
||||||
|
private Integer[] indelHistogram;
|
||||||
|
|
||||||
|
public IndelHistogram(int limit) {
|
||||||
|
colKeys = initColKeys(limit);
|
||||||
|
indelHistogram = initHistogram(limit);
|
||||||
|
this.limit = limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return colKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return rowKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object getCell(int row, int col) {
|
||||||
|
return indelHistogram[col];
|
||||||
|
}
|
||||||
|
|
||||||
|
private Integer[] initColKeys(int size) {
|
||||||
|
Integer[] cK = new Integer[size*2+1];
|
||||||
|
int index = 0;
|
||||||
|
for ( int i = -size; i <= size; i ++ ) {
|
||||||
|
cK[index] = i;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return cK;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Integer[] initHistogram(int size) {
|
||||||
|
Integer[] hist = new Integer[size*2+1];
|
||||||
|
for ( int i = 0; i < 2*size+1; i ++ ) {
|
||||||
|
hist[i] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hist;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() { return "indelHistTable"; }
|
||||||
|
|
||||||
|
public void update(int eLength) {
|
||||||
|
indelHistogram[len2index(eLength)]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int len2index(int len) {
|
||||||
|
if ( len > limit || len < -limit ) {
|
||||||
|
throw new ReviewedStingException("Indel length exceeds limit of "+limit+" please increase indel limit size");
|
||||||
|
}
|
||||||
|
return len + limit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() { return false; }
|
||||||
|
|
||||||
|
public String getName() { return "IndelLengthHistogram"; }
|
||||||
|
|
||||||
|
public int getComparisonOrder() { return 1; } // need only the evals
|
||||||
|
|
||||||
|
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
//System.out.println("Update1 called");
|
||||||
|
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
|
||||||
|
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||||
|
return vc1.toString(); // biallelic sites are output
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( vc1.isIndel() ) {
|
||||||
|
//System.out.println("Is indel");
|
||||||
|
if ( vc1.isInsertion() ) {
|
||||||
|
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||||
|
} else if ( vc1.isDeletion() ) {
|
||||||
|
indelHistogram.update(-vc1.getReference().length());
|
||||||
|
} else {
|
||||||
|
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,222 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author delangel
|
||||||
|
* @since Apr 11, 2010
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Indel Metrics by allele count", description = "Shows various stats binned by allele count")
|
||||||
|
public class IndelMetricsByAC extends VariantEvaluator {
|
||||||
|
// a mapping from quality score histogram bin to Ti/Tv ratio
|
||||||
|
@DataPoint(description = "Indel Metrics by allele count")
|
||||||
|
IndelMetricsByAc metrics = null;
|
||||||
|
|
||||||
|
int numSamples = 0;
|
||||||
|
|
||||||
|
public void initialize(NewVariantEvalWalker walker) {
|
||||||
|
numSamples = walker.getNumSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
//@DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count")
|
||||||
|
//AlleleCountStats alleleCountStats = null;
|
||||||
|
private static final int INDEL_SIZE_LIMIT = 100;
|
||||||
|
private static final int NUM_SCALAR_COLUMNS = 6;
|
||||||
|
static int len2Index(int ind) {
|
||||||
|
return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int index2len(int ind) {
|
||||||
|
return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected final static String[] METRIC_COLUMNS;
|
||||||
|
static {
|
||||||
|
METRIC_COLUMNS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
|
||||||
|
METRIC_COLUMNS[0] = "AC";
|
||||||
|
METRIC_COLUMNS[1] = "nIns";
|
||||||
|
METRIC_COLUMNS[2] = "nDels";
|
||||||
|
METRIC_COLUMNS[3] = "n";
|
||||||
|
METRIC_COLUMNS[4] = "nComplex";
|
||||||
|
METRIC_COLUMNS[5] = "nLong";
|
||||||
|
|
||||||
|
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
|
||||||
|
METRIC_COLUMNS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
|
||||||
|
}
|
||||||
|
|
||||||
|
class IndelMetricsAtAC {
|
||||||
|
public int ac = -1, nIns =0, nDel = 0, nComplex = 0, nLong;
|
||||||
|
public int sizeCount[] = new int[2*INDEL_SIZE_LIMIT+1];
|
||||||
|
|
||||||
|
public IndelMetricsAtAC(int ac) { this.ac = ac; }
|
||||||
|
|
||||||
|
public void update(VariantContext eval) {
|
||||||
|
int eventLength = 0;
|
||||||
|
if ( eval.isInsertion() ) {
|
||||||
|
eventLength = eval.getAlternateAllele(0).length();
|
||||||
|
nIns++;
|
||||||
|
} else if ( eval.isDeletion() ) {
|
||||||
|
eventLength = -eval.getReference().length();
|
||||||
|
nDel++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
nComplex++;
|
||||||
|
}
|
||||||
|
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
|
||||||
|
sizeCount[len2Index(eventLength)]++;
|
||||||
|
else
|
||||||
|
nLong++;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// corresponding to METRIC_COLUMNS
|
||||||
|
public String getColumn(int i) {
|
||||||
|
if (i >= NUM_SCALAR_COLUMNS && i <=NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT)
|
||||||
|
return String.valueOf(sizeCount[i-NUM_SCALAR_COLUMNS]);
|
||||||
|
|
||||||
|
switch (i) {
|
||||||
|
case 0: return String.valueOf(ac);
|
||||||
|
case 1: return String.valueOf(nIns);
|
||||||
|
case 2: return String.valueOf(nDel);
|
||||||
|
case 3: return String.valueOf(nIns + nDel);
|
||||||
|
case 4: return String.valueOf(nComplex);
|
||||||
|
case 5: return String.valueOf(nLong);
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("Unexpected column " + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class IndelMetricsByAc implements TableType {
|
||||||
|
ArrayList<IndelMetricsAtAC> metrics = new ArrayList<IndelMetricsAtAC>();
|
||||||
|
Object[] rows = null;
|
||||||
|
|
||||||
|
public IndelMetricsByAc( int nchromosomes ) {
|
||||||
|
rows = new Object[nchromosomes+1];
|
||||||
|
metrics = new ArrayList<IndelMetricsAtAC>(nchromosomes+1);
|
||||||
|
for ( int i = 0; i < nchromosomes + 1; i++ ) {
|
||||||
|
metrics.add(new IndelMetricsAtAC(i));
|
||||||
|
rows[i] = "ac" + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return METRIC_COLUMNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "IndelMetricsByAc";
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
public String getCell(int ac, int y) {
|
||||||
|
return metrics.get(ac).getColumn(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
String returnString = "";
|
||||||
|
return returnString;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrValue( VariantContext eval ) {
|
||||||
|
int ac = -1;
|
||||||
|
|
||||||
|
if ( eval.hasGenotypes() )
|
||||||
|
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||||
|
else if ( eval.hasAttribute("AC") ) {
|
||||||
|
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ac != -1 )
|
||||||
|
metrics.get(ac).update(eval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//public IndelMetricsByAC(VariantEvalWalker parent) {
|
||||||
|
//super(parent);
|
||||||
|
// don't do anything
|
||||||
|
//}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "IndelMetricsByAC";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
final String interesting = null;
|
||||||
|
|
||||||
|
if (eval != null ) {
|
||||||
|
if ( metrics == null ) {
|
||||||
|
int nSamples = numSamples;
|
||||||
|
//int nSamples = 2;
|
||||||
|
if ( nSamples != -1 )
|
||||||
|
metrics = new IndelMetricsByAc(2 * nSamples);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( eval.isIndel() && eval.isBiallelic() &&
|
||||||
|
metrics != null ) {
|
||||||
|
metrics.incrValue(eval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||||
|
}
|
||||||
|
|
||||||
|
//public void finalizeEvaluation() {
|
||||||
|
//
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,514 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "IndelStatistics", description = "Shows various indel metrics and statistics")
|
||||||
|
public class IndelStatistics extends VariantEvaluator {
|
||||||
|
@DataPoint(description = "Indel Statistics")
|
||||||
|
IndelStats indelStats = null;
|
||||||
|
|
||||||
|
@DataPoint(description = "Indel Classification")
|
||||||
|
IndelClasses indelClasses = null;
|
||||||
|
|
||||||
|
int numSamples = 0;
|
||||||
|
|
||||||
|
public void initialize(NewVariantEvalWalker walker) {
|
||||||
|
numSamples = walker.getNumSamples();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int INDEL_SIZE_LIMIT = 100;
|
||||||
|
private static final int NUM_SCALAR_COLUMNS = 10;
|
||||||
|
|
||||||
|
static int len2Index(int ind) {
|
||||||
|
return ind+INDEL_SIZE_LIMIT+NUM_SCALAR_COLUMNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int index2len(int ind) {
|
||||||
|
return ind-INDEL_SIZE_LIMIT-NUM_SCALAR_COLUMNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
static class IndelStats implements TableType {
|
||||||
|
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||||
|
protected final static String[] COLUMN_KEYS;
|
||||||
|
static {
|
||||||
|
COLUMN_KEYS= new String[NUM_SCALAR_COLUMNS+2*INDEL_SIZE_LIMIT+1];
|
||||||
|
COLUMN_KEYS[0] = "heterozygosity";
|
||||||
|
COLUMN_KEYS[1] = "number_of_insertions";
|
||||||
|
COLUMN_KEYS[2] = "number_of_deletions";
|
||||||
|
COLUMN_KEYS[3] = "number_het_insertions";
|
||||||
|
COLUMN_KEYS[4] = "number_homozygous_insertions";
|
||||||
|
COLUMN_KEYS[5] = "number_het_deletions";
|
||||||
|
COLUMN_KEYS[6] = "number_homozygous_deletions";
|
||||||
|
COLUMN_KEYS[7] = "number of homozygous reference sites";
|
||||||
|
COLUMN_KEYS[8] = "number of complex events";
|
||||||
|
COLUMN_KEYS[9] = "number of long indels";
|
||||||
|
|
||||||
|
for (int k=NUM_SCALAR_COLUMNS; k < NUM_SCALAR_COLUMNS+ 2*INDEL_SIZE_LIMIT+1; k++)
|
||||||
|
COLUMN_KEYS[k] = "indel_size_len"+Integer.valueOf(index2len(k));
|
||||||
|
}
|
||||||
|
|
||||||
|
// map of sample to statistics
|
||||||
|
protected final HashMap<String, double[]> indelSummary = new HashMap<String, double[]>();
|
||||||
|
|
||||||
|
public IndelStats(final VariantContext vc) {
|
||||||
|
indelSummary.put(ALL_SAMPLES_KEY, new double[COLUMN_KEYS.length]);
|
||||||
|
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||||
|
indelSummary.put(sample, new double[COLUMN_KEYS.length]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return indelSummary.keySet().toArray(new String[indelSummary.size()]);
|
||||||
|
}
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
final Object[] rowKeys = getRowKeys();
|
||||||
|
return String.format("%4.2f",indelSummary.get(rowKeys[x])[y]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return COLUMN_KEYS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "IndelStats";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* increment the specified value
|
||||||
|
*/
|
||||||
|
public void incrValue(VariantContext vc) {
|
||||||
|
int eventLength = 0;
|
||||||
|
boolean isInsertion = false, isDeletion = false;
|
||||||
|
|
||||||
|
if ( vc.isInsertion() ) {
|
||||||
|
eventLength = vc.getAlternateAllele(0).length();
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[1]++;
|
||||||
|
isInsertion = true;
|
||||||
|
} else if ( vc.isDeletion() ) {
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[2]++;
|
||||||
|
eventLength = -vc.getReference().length();
|
||||||
|
isDeletion = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[8]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// make sure event doesn't overstep array boundaries
|
||||||
|
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[len2Index(eventLength)]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(ALL_SAMPLES_KEY)[9]++;
|
||||||
|
|
||||||
|
|
||||||
|
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||||
|
if ( indelSummary.containsKey(sample) ) {
|
||||||
|
Genotype g = vc.getGenotype(sample);
|
||||||
|
boolean isVariant = (g.isCalled() && !g.isHomRef());
|
||||||
|
if (isVariant) {
|
||||||
|
// update ins/del count
|
||||||
|
if (isInsertion) {
|
||||||
|
indelSummary.get(sample)[1]++;
|
||||||
|
}
|
||||||
|
else if (isDeletion)
|
||||||
|
indelSummary.get(sample)[2]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[8]++;
|
||||||
|
|
||||||
|
// update histogram
|
||||||
|
if (Math.abs(eventLength) < INDEL_SIZE_LIMIT)
|
||||||
|
indelSummary.get(sample)[len2Index(eventLength)]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[9]++;
|
||||||
|
|
||||||
|
if (g.isHet())
|
||||||
|
if (isInsertion)
|
||||||
|
indelSummary.get(sample)[3]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[5]++;
|
||||||
|
else
|
||||||
|
if (isInsertion)
|
||||||
|
indelSummary.get(sample)[4]++;
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[6]++;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
indelSummary.get(sample)[7]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static class IndelClasses implements TableType {
|
||||||
|
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||||
|
protected final static String[] COLUMN_KEYS;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static {
|
||||||
|
COLUMN_KEYS= new String[41];
|
||||||
|
COLUMN_KEYS[0] = "Novel_A";
|
||||||
|
COLUMN_KEYS[1] = "Novel_C";
|
||||||
|
COLUMN_KEYS[2] = "Novel_G";
|
||||||
|
COLUMN_KEYS[3] = "Novel_T";
|
||||||
|
COLUMN_KEYS[4] = "NOVEL_1";
|
||||||
|
COLUMN_KEYS[5] = "NOVEL_2";
|
||||||
|
COLUMN_KEYS[6] = "NOVEL_3";
|
||||||
|
COLUMN_KEYS[7] = "NOVEL_4";
|
||||||
|
COLUMN_KEYS[8] = "NOVEL_5";
|
||||||
|
COLUMN_KEYS[9] = "NOVEL_6";
|
||||||
|
COLUMN_KEYS[10] = "NOVEL_7";
|
||||||
|
COLUMN_KEYS[11] = "NOVEL_8";
|
||||||
|
COLUMN_KEYS[12] = "NOVEL_9";
|
||||||
|
COLUMN_KEYS[13] = "NOVEL_10orMore";
|
||||||
|
COLUMN_KEYS[14] = "RepeatExpansion_A";
|
||||||
|
COLUMN_KEYS[15] = "RepeatExpansion_C";
|
||||||
|
COLUMN_KEYS[16] = "RepeatExpansion_G";
|
||||||
|
COLUMN_KEYS[17] = "RepeatExpansion_T";
|
||||||
|
COLUMN_KEYS[18] = "RepeatExpansion_AC";
|
||||||
|
COLUMN_KEYS[19] = "RepeatExpansion_AG";
|
||||||
|
COLUMN_KEYS[20] = "RepeatExpansion_AT";
|
||||||
|
COLUMN_KEYS[21] = "RepeatExpansion_CA";
|
||||||
|
COLUMN_KEYS[22] = "RepeatExpansion_CG";
|
||||||
|
COLUMN_KEYS[23] = "RepeatExpansion_CT";
|
||||||
|
COLUMN_KEYS[24] = "RepeatExpansion_GA";
|
||||||
|
COLUMN_KEYS[25] = "RepeatExpansion_GC";
|
||||||
|
COLUMN_KEYS[26] = "RepeatExpansion_GT";
|
||||||
|
COLUMN_KEYS[27] = "RepeatExpansion_TA";
|
||||||
|
COLUMN_KEYS[28] = "RepeatExpansion_TC";
|
||||||
|
COLUMN_KEYS[29] = "RepeatExpansion_TG";
|
||||||
|
COLUMN_KEYS[30] = "RepeatExpansion_1";
|
||||||
|
COLUMN_KEYS[31] = "RepeatExpansion_2";
|
||||||
|
COLUMN_KEYS[32] = "RepeatExpansion_3";
|
||||||
|
COLUMN_KEYS[33] = "RepeatExpansion_4";
|
||||||
|
COLUMN_KEYS[34] = "RepeatExpansion_5";
|
||||||
|
COLUMN_KEYS[35] = "RepeatExpansion_6";
|
||||||
|
COLUMN_KEYS[36] = "RepeatExpansion_7";
|
||||||
|
COLUMN_KEYS[37] = "RepeatExpansion_8";
|
||||||
|
COLUMN_KEYS[38] = "RepeatExpansion_9";
|
||||||
|
COLUMN_KEYS[39] = "RepeatExpansion_10orMore";
|
||||||
|
COLUMN_KEYS[40] = "Other";
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int START_IND_NOVEL = 4;
|
||||||
|
private static final int STOP_IND_NOVEL = 13;
|
||||||
|
private static final int START_IND_FOR_REPEAT_EXPANSION_1 = 14;
|
||||||
|
private static final int STOP_IND_FOR_REPEAT_EXPANSION_2 = 29;
|
||||||
|
private static final int START_IND_FOR_REPEAT_EXPANSION_COUNTS = 30;
|
||||||
|
private static final int STOP_IND_FOR_REPEAT_EXPANSION_COUNTS = 39;
|
||||||
|
private static final int IND_FOR_OTHER_EVENT = 40;
|
||||||
|
private static final int START_IND_NOVEL_PER_BASE = 0;
|
||||||
|
private static final int STOP_IND_NOVEL_PER_BASE = 3;
|
||||||
|
|
||||||
|
|
||||||
|
// map of sample to statistics
|
||||||
|
protected final HashMap<String, int[]> indelClassSummary = new HashMap<String, int[]>();
|
||||||
|
|
||||||
|
public IndelClasses(final VariantContext vc) {
|
||||||
|
indelClassSummary.put(ALL_SAMPLES_KEY, new int[COLUMN_KEYS.length]);
|
||||||
|
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||||
|
indelClassSummary.put(sample, new int[COLUMN_KEYS.length]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return one row per sample
|
||||||
|
*/
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return indelClassSummary.keySet().toArray(new String[indelClassSummary.size()]);
|
||||||
|
}
|
||||||
|
public Object getCell(int x, int y) {
|
||||||
|
final Object[] rowKeys = getRowKeys();
|
||||||
|
return String.format("%d",indelClassSummary.get(rowKeys[x])[y]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the column keys
|
||||||
|
* @return a list of objects, in this case strings, that are the column names
|
||||||
|
*/
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return COLUMN_KEYS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "IndelClasses";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void incrementSampleStat(VariantContext vc, int index) {
|
||||||
|
indelClassSummary.get(ALL_SAMPLES_KEY)[index]++;
|
||||||
|
for( final String sample : vc.getGenotypes().keySet() ) {
|
||||||
|
if ( indelClassSummary.containsKey(sample) ) {
|
||||||
|
Genotype g = vc.getGenotype(sample);
|
||||||
|
boolean isVariant = (g.isCalled() && !g.isHomRef());
|
||||||
|
if (isVariant)
|
||||||
|
// update count
|
||||||
|
indelClassSummary.get(sample)[index]++;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* increment the specified value
|
||||||
|
*/
|
||||||
|
private String findMinimalEvent(String eventString) {
|
||||||
|
|
||||||
|
// for each length up to given string length, see if event string is a repetition of units of size N
|
||||||
|
boolean foundSubstring = false;
|
||||||
|
String minEvent = eventString;
|
||||||
|
for (int k=1; k < eventString.length(); k++) {
|
||||||
|
if (eventString.length() % k > 0)
|
||||||
|
continue;
|
||||||
|
String str = eventString.substring(0,k);
|
||||||
|
// now see if event string is a repetition of str
|
||||||
|
int numReps = eventString.length() / k;
|
||||||
|
String r = "";
|
||||||
|
for (int j=0; j < numReps; j++)
|
||||||
|
r = r.concat(str);
|
||||||
|
|
||||||
|
if (r.matches(eventString)) {
|
||||||
|
foundSubstring = true;
|
||||||
|
minEvent = str;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return minEvent;
|
||||||
|
}
|
||||||
|
public void incrValue(VariantContext vc, ReferenceContext ref) {
|
||||||
|
int eventLength = 0;
|
||||||
|
boolean isInsertion = false, isDeletion = false;
|
||||||
|
String indelAlleleString;
|
||||||
|
|
||||||
|
if ( vc.isInsertion() ) {
|
||||||
|
isInsertion = true;
|
||||||
|
indelAlleleString = vc.getAlternateAllele(0).getDisplayString();
|
||||||
|
} else if ( vc.isDeletion() ) {
|
||||||
|
isDeletion = true;
|
||||||
|
indelAlleleString = vc.getReference().getDisplayString();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
incrementSampleStat(vc, IND_FOR_OTHER_EVENT);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] refBases = ref.getBases();
|
||||||
|
|
||||||
|
indelAlleleString = findMinimalEvent(indelAlleleString);
|
||||||
|
eventLength = indelAlleleString.length();
|
||||||
|
|
||||||
|
// See first if indel is a repetition of bases before current
|
||||||
|
int indStart = refBases.length/2-eventLength+1;
|
||||||
|
|
||||||
|
boolean done = false;
|
||||||
|
int numRepetitions = 0;
|
||||||
|
while (!done) {
|
||||||
|
if (indStart < 0)
|
||||||
|
done = true;
|
||||||
|
else {
|
||||||
|
String refPiece = new String(Arrays.copyOfRange(refBases,indStart,indStart+eventLength));
|
||||||
|
if (refPiece.matches(indelAlleleString))
|
||||||
|
{
|
||||||
|
numRepetitions++;
|
||||||
|
indStart = indStart - eventLength;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
done = true;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// now do it forward
|
||||||
|
done = false;
|
||||||
|
indStart = refBases.length/2+1;
|
||||||
|
while (!done) {
|
||||||
|
if (indStart + eventLength >= refBases.length)
|
||||||
|
break;
|
||||||
|
else {
|
||||||
|
String refPiece = new String(Arrays.copyOfRange(refBases,indStart,indStart+eventLength));
|
||||||
|
if (refPiece.matches(indelAlleleString))
|
||||||
|
{
|
||||||
|
numRepetitions++;
|
||||||
|
indStart = indStart + eventLength;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
done = true;
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (numRepetitions == 0) {
|
||||||
|
//unrepeated sequence from surroundings
|
||||||
|
int ind = START_IND_NOVEL + (eventLength-1);
|
||||||
|
if (ind > STOP_IND_NOVEL)
|
||||||
|
ind = STOP_IND_NOVEL;
|
||||||
|
incrementSampleStat(vc, ind);
|
||||||
|
|
||||||
|
if (eventLength == 1) {
|
||||||
|
// log single base indels additionally by base
|
||||||
|
String keyStr = "Novel_" + indelAlleleString;
|
||||||
|
int k;
|
||||||
|
for (k=START_IND_NOVEL_PER_BASE; k <= STOP_IND_NOVEL_PER_BASE; k++) {
|
||||||
|
if (keyStr.matches(COLUMN_KEYS[k]))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// log now event
|
||||||
|
incrementSampleStat(vc, k);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int ind = START_IND_FOR_REPEAT_EXPANSION_COUNTS + (numRepetitions-1);
|
||||||
|
if (ind > STOP_IND_FOR_REPEAT_EXPANSION_COUNTS)
|
||||||
|
ind = STOP_IND_FOR_REPEAT_EXPANSION_COUNTS;
|
||||||
|
incrementSampleStat(vc, ind);
|
||||||
|
|
||||||
|
if (eventLength<=2) {
|
||||||
|
// for single or dinucleotide indels, we further log the base in which they occurred
|
||||||
|
String keyStr = "RepeatExpansion_" + indelAlleleString;
|
||||||
|
int k;
|
||||||
|
for (k=START_IND_FOR_REPEAT_EXPANSION_1; k <= STOP_IND_FOR_REPEAT_EXPANSION_2; k++) {
|
||||||
|
if (keyStr.matches(COLUMN_KEYS[k]))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// log now event
|
||||||
|
incrementSampleStat(vc, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
//g+
|
||||||
|
/*
|
||||||
|
System.out.format("RefBefore: %s\n",new String(refBefore));
|
||||||
|
System.out.format("RefAfter: %s\n",new String(refAfter));
|
||||||
|
System.out.format("Indel Allele: %s\n", indelAlleleString);
|
||||||
|
System.out.format("Num Repetitions: %d\n", numRepetitions);
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//public IndelStatistics(VariantEvalWalker parent) {
|
||||||
|
//super(parent);
|
||||||
|
// don't do anything
|
||||||
|
//}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "IndelStatistics";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
//public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context, VariantEvalWalker.EvaluationContext group) {
|
||||||
|
//return null;
|
||||||
|
//}
|
||||||
|
|
||||||
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
|
if (eval != null ) {
|
||||||
|
if ( indelStats == null ) {
|
||||||
|
int nSamples = numSamples;
|
||||||
|
|
||||||
|
if ( nSamples != -1 )
|
||||||
|
indelStats = new IndelStats(eval);
|
||||||
|
}
|
||||||
|
if ( indelClasses == null ) {
|
||||||
|
indelClasses = new IndelClasses(eval);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( eval.isIndel() && eval.isBiallelic() ) {
|
||||||
|
if (indelStats != null )
|
||||||
|
indelStats.incrValue(eval);
|
||||||
|
|
||||||
|
if (indelClasses != null)
|
||||||
|
indelClasses.incrValue(eval, ref);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null; // This module doesn't capture any interesting sites, so return null
|
||||||
|
}
|
||||||
|
public String update0(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
public void finalizeEvaluation() {
|
||||||
|
//
|
||||||
|
int k=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,186 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.Genotype;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mendelian violation detection and counting
|
||||||
|
* <p/>
|
||||||
|
* a violation looks like:
|
||||||
|
* Suppose dad = A/B and mom = C/D
|
||||||
|
* The child can be [A or B] / [C or D].
|
||||||
|
* If the child doesn't match this, the site is a violation
|
||||||
|
* <p/>
|
||||||
|
* Some examples:
|
||||||
|
* <p/>
|
||||||
|
* mom = A/A, dad = C/C
|
||||||
|
* child can be A/C only
|
||||||
|
* <p/>
|
||||||
|
* mom = A/C, dad = C/C
|
||||||
|
* child can be A/C or C/C
|
||||||
|
* <p/>
|
||||||
|
* mom = A/C, dad = A/C
|
||||||
|
* child can be A/A, A/C, C/C
|
||||||
|
* <p/>
|
||||||
|
* The easiest way to do this calculation is to:
|
||||||
|
* <p/>
|
||||||
|
* Get alleles for mom => A/B
|
||||||
|
* Get alleles for dad => C/D
|
||||||
|
* Make allowed genotypes for child: A/C, A/D, B/C, B/D
|
||||||
|
* Check that the child is one of these.
|
||||||
|
*/
|
||||||
|
@Analysis(name = "Mendelian Violation Evaluator", description = "Mendelian Violation Evaluator")
|
||||||
|
public class MendelianViolationEvaluator extends VariantEvaluator {
|
||||||
|
|
||||||
|
@DataPoint(description = "Number of mendelian variants found")
|
||||||
|
long nVariants;
|
||||||
|
|
||||||
|
@DataPoint(description = "Number of mendelian violations found")
|
||||||
|
long nViolations;
|
||||||
|
|
||||||
|
@DataPoint(description = "number of child hom ref calls where the parent was hom variant")
|
||||||
|
long KidHomRef_ParentHomVar;
|
||||||
|
@DataPoint(description = "number of child het calls where the parent was hom ref")
|
||||||
|
long KidHet_ParentsHomRef;
|
||||||
|
@DataPoint(description = "number of child het calls where the parent was hom variant")
|
||||||
|
long KidHet_ParentsHomVar;
|
||||||
|
@DataPoint(description = "number of child hom variant calls where the parent was hom ref")
|
||||||
|
long KidHomVar_ParentHomRef;
|
||||||
|
|
||||||
|
TrioStructure trio;
|
||||||
|
|
||||||
|
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||||
|
|
||||||
|
public static class TrioStructure {
|
||||||
|
public String mom, dad, child;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static TrioStructure parseTrioDescription(String family) {
|
||||||
|
Matcher m = FAMILY_PATTERN.matcher(family);
|
||||||
|
if (m.matches()) {
|
||||||
|
TrioStructure trio = new TrioStructure();
|
||||||
|
//System.out.printf("Found a family pattern: %s%n", parent.FAMILY_STRUCTURE);
|
||||||
|
trio.mom = m.group(1);
|
||||||
|
trio.dad = m.group(2);
|
||||||
|
trio.child = m.group(3);
|
||||||
|
return trio;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Malformatted family structure string: " + family + " required format is mom+dad=child");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo: fix
|
||||||
|
|
||||||
|
//public MendelianViolationEvaluator(VariantEvalWalker parent) {
|
||||||
|
//super(parent);
|
||||||
|
|
||||||
|
//if (enabled()) {
|
||||||
|
//trio = parseTrioDescription(parent.FAMILY_STRUCTURE);
|
||||||
|
//parent.getLogger().debug(String.format("Found a family pattern: %s mom=%s dad=%s child=%s",
|
||||||
|
//parent.FAMILY_STRUCTURE, trio.mom, trio.dad, trio.child));
|
||||||
|
//}
|
||||||
|
//}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
//return getVEWalker().FAMILY_STRUCTURE != null;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private double getQThreshold() {
|
||||||
|
//return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||||
|
return 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "mendelian_violations";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if (vc.isBiallelic() && vc.hasGenotypes()) { // todo -- currently limited to biallelic loci
|
||||||
|
Genotype momG = vc.getGenotype(trio.mom);
|
||||||
|
Genotype dadG = vc.getGenotype(trio.dad);
|
||||||
|
Genotype childG = vc.getGenotype(trio.child);
|
||||||
|
|
||||||
|
if (includeGenotype(momG) && includeGenotype(dadG) && includeGenotype(childG)) {
|
||||||
|
nVariants++;
|
||||||
|
|
||||||
|
if (momG == null || dadG == null || childG == null)
|
||||||
|
throw new IllegalArgumentException(String.format("VariantContext didn't contain genotypes for expected trio members: mom=%s dad=%s child=%s", trio.mom, trio.dad, trio.child));
|
||||||
|
|
||||||
|
// all genotypes are good, so let's see if child is a violation
|
||||||
|
|
||||||
|
if (isViolation(vc, momG, dadG, childG)) {
|
||||||
|
nViolations++;
|
||||||
|
|
||||||
|
String label;
|
||||||
|
if (childG.isHomRef() && (momG.isHomVar() || dadG.isHomVar())) {
|
||||||
|
label = "KidHomRef_ParentHomVar";
|
||||||
|
KidHomRef_ParentHomVar++;
|
||||||
|
} else if (childG.isHet() && (momG.isHomRef() && dadG.isHomRef())) {
|
||||||
|
label = "KidHet_ParentsHomRef";
|
||||||
|
KidHet_ParentsHomRef++;
|
||||||
|
} else if (childG.isHet() && (momG.isHomVar() && dadG.isHomVar())) {
|
||||||
|
label = "KidHet_ParentsHomVar";
|
||||||
|
KidHet_ParentsHomVar++;
|
||||||
|
} else if (childG.isHomVar() && (momG.isHomRef() || dadG.isHomRef())) {
|
||||||
|
label = "KidHomVar_ParentHomRef";
|
||||||
|
KidHomVar_ParentHomRef++;
|
||||||
|
} else {
|
||||||
|
throw new ReviewedStingException("BUG: unexpected child genotype class " + childG);
|
||||||
|
}
|
||||||
|
|
||||||
|
return "MendelViolation=" + label;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null; // we don't capture any intersting sites
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean includeGenotype(Genotype g) {
|
||||||
|
return g.getNegLog10PError() > getQThreshold() && g.isCalled();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isViolation(VariantContext vc, Genotype momG, Genotype dadG, Genotype childG) {
|
||||||
|
return isViolation(vc, momG.getAlleles(), dadG.getAlleles(), childG.getAlleles());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isViolation(VariantContext vc, TrioStructure trio ) {
|
||||||
|
return isViolation(vc, vc.getGenotype(trio.mom), vc.getGenotype(trio.dad), vc.getGenotype(trio.child) );
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isViolation(VariantContext vc, List<Allele> momA, List<Allele> dadA, List<Allele> childA) {
|
||||||
|
//VariantContext momVC = vc.subContextFromGenotypes(momG);
|
||||||
|
//VariantContext dadVC = vc.subContextFromGenotypes(dadG);
|
||||||
|
int i = 0;
|
||||||
|
Genotype childG = new Genotype("kidG", childA);
|
||||||
|
for (Allele momAllele : momA) {
|
||||||
|
for (Allele dadAllele : dadA) {
|
||||||
|
if (momAllele.isCalled() && dadAllele.isCalled()) {
|
||||||
|
Genotype possibleChild = new Genotype("possibleGenotype" + i, Arrays.asList(momAllele, dadAllele));
|
||||||
|
if (childG.sameGenotype(possibleChild, false)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,14 +4,14 @@ package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluato
|
||||||
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
|
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
|
||||||
* | File Templates.
|
* | File Templates.
|
||||||
*/
|
*/
|
||||||
class PhaseStats {
|
class NewPhaseStats {
|
||||||
public int neitherPhased;
|
public int neitherPhased;
|
||||||
public int onlyCompPhased;
|
public int onlyCompPhased;
|
||||||
public int onlyEvalPhased;
|
public int onlyEvalPhased;
|
||||||
public int phasesAgree;
|
public int phasesAgree;
|
||||||
public int phasesDisagree;
|
public int phasesDisagree;
|
||||||
|
|
||||||
public PhaseStats() {
|
public NewPhaseStats() {
|
||||||
this.neitherPhased = 0;
|
this.neitherPhased = 0;
|
||||||
this.onlyCompPhased = 0;
|
this.onlyCompPhased = 0;
|
||||||
this.onlyEvalPhased = 0;
|
this.onlyEvalPhased = 0;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,67 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
|
||||||
|
@Analysis(name = "PrintMissingComp", description = "the overlap between eval and comp sites")
|
||||||
|
public class PrintMissingComp extends VariantEvaluator {
|
||||||
|
@DataPoint(description = "number of eval sites outside of comp sites")
|
||||||
|
long nMissing = 0;
|
||||||
|
|
||||||
|
//public PrintMissingComp(VariantEvalWalker parent) {
|
||||||
|
// super(parent);
|
||||||
|
//}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "PrintMissingComp";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 2; // we need to see each eval track and each comp track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
boolean compIsGood = comp != null && comp.isNotFiltered() && comp.isSNP();
|
||||||
|
boolean evalIsGood = eval != null && eval.isSNP();
|
||||||
|
|
||||||
|
if ( compIsGood & ! evalIsGood ) {
|
||||||
|
nMissing++;
|
||||||
|
return "MissingFrom" + comp.getSource();
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,10 +9,10 @@ import java.util.HashMap;
|
||||||
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
|
* Created by IntelliJ IDEA. User: kiran Date: Nov 29, 2010 Time: 3:25:59 PM To change this template use File | Settings
|
||||||
* | File Templates.
|
* | File Templates.
|
||||||
*/
|
*/
|
||||||
class SamplePreviousGenotypes {
|
class NewSamplePreviousGenotypes {
|
||||||
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
private HashMap<String, CompEvalGenotypes> sampleGenotypes = null;
|
||||||
|
|
||||||
public SamplePreviousGenotypes() {
|
public NewSamplePreviousGenotypes() {
|
||||||
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
this.sampleGenotypes = new HashMap<String, CompEvalGenotypes>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,178 @@
|
||||||
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.Sample;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.StateKey;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2010 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author depristo
|
||||||
|
* @since Apr 11, 2010
|
||||||
|
*/
|
||||||
|
|
||||||
|
@Analysis(name = "Quality Metrics by allele count", description = "Shows various stats binned by allele count")
|
||||||
|
public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval {
|
||||||
|
// a mapping from quality score histogram bin to Ti/Tv ratio
|
||||||
|
@DataPoint(description = "TiTv by allele count")
|
||||||
|
MetricsByAc metrics = null;
|
||||||
|
|
||||||
|
private final static Object[] METRIC_COLUMNS = {"AC", "nTi", "nTv", "n", "TiTv"};
|
||||||
|
private int numSamples;
|
||||||
|
|
||||||
|
class MetricsAtAC {
|
||||||
|
public int ac = -1, nTi = 0, nTv = 0;
|
||||||
|
|
||||||
|
public MetricsAtAC(int ac) { this.ac = ac; }
|
||||||
|
|
||||||
|
public void update(VariantContext eval) {
|
||||||
|
if ( VariantContextUtils.isTransition(eval) )
|
||||||
|
nTi++;
|
||||||
|
else
|
||||||
|
nTv++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// corresponding to METRIC_COLUMNS
|
||||||
|
public String getColumn(int i) {
|
||||||
|
switch (i) {
|
||||||
|
case 0: return String.valueOf(ac);
|
||||||
|
case 1: return String.valueOf(nTi);
|
||||||
|
case 2: return String.valueOf(nTv);
|
||||||
|
case 3: return String.valueOf(nTi + nTv);
|
||||||
|
case 4: return String.valueOf(ratio(nTi, nTv));
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("Unexpected column " + i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MetricsByAc implements TableType {
|
||||||
|
ArrayList<MetricsAtAC> metrics = new ArrayList<MetricsAtAC>();
|
||||||
|
Object[] rows = null;
|
||||||
|
|
||||||
|
public MetricsByAc( int nchromosomes ) {
|
||||||
|
rows = new Object[nchromosomes+1];
|
||||||
|
metrics = new ArrayList<MetricsAtAC>(nchromosomes+1);
|
||||||
|
for ( int i = 0; i < nchromosomes + 1; i++ ) {
|
||||||
|
metrics.add(new MetricsAtAC(i));
|
||||||
|
rows[i] = "ac" + i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
return rows;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return METRIC_COLUMNS;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "MetricsByAc";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCell(int ac, int y) {
|
||||||
|
return metrics.get(ac).getColumn(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrValue( VariantContext eval ) {
|
||||||
|
int ac = -1;
|
||||||
|
|
||||||
|
if ( eval.hasGenotypes() )
|
||||||
|
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||||
|
else if ( eval.hasAttribute("AC") ) {
|
||||||
|
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ac != -1 )
|
||||||
|
metrics.get(ac).update(eval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize(NewVariantEvalWalker walker) {
|
||||||
|
numSamples = walker.getNumSamples();
|
||||||
|
metrics = new MetricsByAc(2*numSamples);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "SimpleMetricsByAC";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
final String interesting = null;
|
||||||
|
|
||||||
|
if (eval != null ) {
|
||||||
|
if ( metrics == null ) {
|
||||||
|
int nSamples = numSamples;
|
||||||
|
|
||||||
|
if ( nSamples != -1 )
|
||||||
|
metrics = new MetricsByAc(2 * nSamples);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( eval.isSNP() &&
|
||||||
|
eval.isBiallelic() &&
|
||||||
|
metrics != null ) {
|
||||||
|
metrics.incrValue(eval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean stateIsApplicable(StateKey stateKey) {
|
||||||
|
String className = Sample.class.getSimpleName();
|
||||||
|
|
||||||
|
return !(stateKey.containsKey(className) && !stateKey.get(className).equalsIgnoreCase("all"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -4,9 +4,13 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.NewEvaluationContext;
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.NewEvaluationContext;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.util.StateKey;
|
||||||
|
|
||||||
public abstract class VariantEvaluator {
|
public abstract class VariantEvaluator {
|
||||||
|
public void initialize(NewVariantEvalWalker walker) {}
|
||||||
|
|
||||||
public abstract boolean enabled();
|
public abstract boolean enabled();
|
||||||
|
|
||||||
// Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2
|
// Should return the number of VariantContexts expected as inputs to update. Can be 1 or 2
|
||||||
|
|
@ -46,4 +50,8 @@ public abstract class VariantEvaluator {
|
||||||
return ((double)num) / (Math.max(denom, 1));
|
return ((double)num) / (Math.max(denom, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean stateIsApplicable(StateKey stateKey) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,221 +25,234 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
package org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.report.utils.TableType;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author rpoplin
|
* @author rpoplin
|
||||||
* @since Apr 6, 2010
|
* @since Apr 6, 2010
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
|
||||||
//public class VariantQualityScore extends VariantEvaluator {
|
public class VariantQualityScore extends VariantEvaluator {
|
||||||
//
|
|
||||||
// // a mapping from quality score histogram bin to Ti/Tv ratio
|
// a mapping from quality score histogram bin to Ti/Tv ratio
|
||||||
// @DataPoint(name="TiTv by Quality", description = "the Ti/Tv ratio broken out by variant quality")
|
@DataPoint(description = "the Ti/Tv ratio broken out by variant quality")
|
||||||
// TiTvStats titvStats;
|
TiTvStats titvStats = null;
|
||||||
//
|
|
||||||
// @DataPoint(name="Quality by Allele Count", description = "average variant quality for each allele count")
|
@DataPoint(description = "average variant quality for each allele count")
|
||||||
// AlleleCountStats alleleCountStats = null;
|
AlleleCountStats alleleCountStats = null;
|
||||||
//
|
|
||||||
// static class TiTvStats implements TableType {
|
static class TiTvStats implements TableType {
|
||||||
// final static int NUM_BINS = 20;
|
final static int NUM_BINS = 20;
|
||||||
// final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
|
||||||
// final long transitionByQuality[] = new long[NUM_BINS];
|
final long transitionByQuality[] = new long[NUM_BINS];
|
||||||
// final long transversionByQuality[] = new long[NUM_BINS];
|
final long transversionByQuality[] = new long[NUM_BINS];
|
||||||
// final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
|
||||||
//
|
|
||||||
// public Object[] getRowKeys() {
|
public Object[] getRowKeys() {
|
||||||
// return new String[]{"sample"};
|
return new String[]{"sample"};
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// public Object[] getColumnKeys() {
|
public Object[] getColumnKeys() {
|
||||||
// final String columnKeys[] = new String[NUM_BINS];
|
final String columnKeys[] = new String[NUM_BINS];
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
columnKeys[iii] = "titvBin" + iii;
|
||||||
|
}
|
||||||
|
return columnKeys;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "TiTvStats";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCell(int x, int y) {
|
||||||
|
return String.valueOf(titvByQuality[y]);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer returnString = new StringBuffer();
|
||||||
|
// output the ti/tv array
|
||||||
|
returnString.append("titvByQuality: ");
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
returnString.append(titvByQuality[iii]);
|
||||||
|
returnString.append(" ");
|
||||||
|
}
|
||||||
|
return returnString.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void incrValue( final double qual, final boolean isTransition ) {
|
||||||
|
final Integer qualKey = Math.round((float) qual);
|
||||||
|
final long numTransition = (isTransition ? 1L : 0L);
|
||||||
|
final long numTransversion = (isTransition ? 0L : 1L);
|
||||||
|
if( qualByIsTransition.containsKey(qualKey) ) {
|
||||||
|
Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
|
||||||
|
transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
|
||||||
|
qualByIsTransition.put(qualKey, transitionPair);
|
||||||
|
} else {
|
||||||
|
qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void organizeTiTvTables() {
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
transitionByQuality[iii] = 0L;
|
||||||
|
transversionByQuality[iii] = 0L;
|
||||||
|
titvByQuality[iii] = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int maxQual = 0;
|
||||||
|
|
||||||
|
// Calculate the maximum quality score in order to normalize and histogram
|
||||||
|
for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||||
|
if( qual > maxQual ) {
|
||||||
|
maxQual = qual;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
|
||||||
|
|
||||||
|
for( final Integer qual : qualByIsTransition.keySet() ) {
|
||||||
|
final int index = (int)Math.floor( ((double) qual) / binSize );
|
||||||
|
if( index >= 0 ) { // BUGBUG: why is there overflow here?
|
||||||
|
Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
|
||||||
|
transitionByQuality[index] += transitionPair.getFirst();
|
||||||
|
transversionByQuality[index] += transitionPair.getSecond();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
|
if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
||||||
|
titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
||||||
|
} else {
|
||||||
|
titvByQuality[iii] = 0.0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class AlleleCountStats implements TableType {
|
||||||
|
final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
||||||
|
final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
||||||
|
|
||||||
|
public Object[] getRowKeys() {
|
||||||
|
final int NUM_BINS = qualityListMap.keySet().size();
|
||||||
|
final String rowKeys[] = new String[NUM_BINS];
|
||||||
|
int iii = 0;
|
||||||
|
for( final Integer key : qualityListMap.keySet() ) {
|
||||||
|
rowKeys[iii] = "AC" + key;
|
||||||
|
iii++;
|
||||||
|
}
|
||||||
|
return rowKeys;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object[] getColumnKeys() {
|
||||||
|
return new String[]{"alleleCount","avgQual"};
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "AlleleCountStats";
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCell(int x, int y) {
|
||||||
|
int iii = 0;
|
||||||
|
for( final Integer key : qualityListMap.keySet() ) {
|
||||||
|
if(iii == x) {
|
||||||
|
if(y == 0) { return String.valueOf(key); }
|
||||||
|
else { return String.valueOf(qualityMap.get(key)); }
|
||||||
|
}
|
||||||
|
iii++;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
String returnString = "";
|
||||||
|
// output the quality map
|
||||||
|
returnString += "AlleleCountStats: ";
|
||||||
//for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
//for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
||||||
// columnKeys[iii] = "titvBin" + iii;
|
// returnString += titvByQuality[iii] + " ";
|
||||||
//}
|
//}
|
||||||
// return columnKeys;
|
return returnString;
|
||||||
// }
|
}
|
||||||
//
|
|
||||||
// public String getName() {
|
public void incrValue( final double qual, final int alleleCount ) {
|
||||||
// return "TiTvStats";
|
ArrayList<Double> list = qualityListMap.get(alleleCount);
|
||||||
// }
|
if(list==null) { list = new ArrayList<Double>(); }
|
||||||
//
|
list.add(qual);
|
||||||
// public String getCell(int x, int y) {
|
qualityListMap.put(alleleCount, list);
|
||||||
// return String.valueOf(titvByQuality[y]);
|
}
|
||||||
// }
|
|
||||||
//
|
public void organizeAlleleCountTables() {
|
||||||
// public String toString() {
|
for( final Integer key : qualityListMap.keySet() ) {
|
||||||
// StringBuffer returnString = new StringBuffer();
|
final ArrayList<Double> list = qualityListMap.get(key);
|
||||||
// // output the ti/tv array
|
double meanQual = 0.0;
|
||||||
// returnString.append("titvByQuality: ");
|
final double numQuals = (double)list.size();
|
||||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
for( Double qual : list ) {
|
||||||
// returnString.append(titvByQuality[iii]);
|
meanQual += qual / numQuals;
|
||||||
// returnString.append(" ");
|
}
|
||||||
// }
|
qualityMap.put(key, meanQual);
|
||||||
// return returnString.toString();
|
}
|
||||||
// }
|
}
|
||||||
//
|
}
|
||||||
// public void incrValue( final double qual, final boolean isTransition ) {
|
|
||||||
// final Integer qualKey = Math.round((float) qual);
|
|
||||||
// final long numTransition = (isTransition ? 1L : 0L);
|
|
||||||
// final long numTransversion = (isTransition ? 0L : 1L);
|
|
||||||
// if( qualByIsTransition.containsKey(qualKey) ) {
|
|
||||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
|
|
||||||
// transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
|
|
||||||
// qualByIsTransition.put(qualKey, transitionPair);
|
|
||||||
// } else {
|
|
||||||
// qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void organizeTiTvTables() {
|
|
||||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
|
||||||
// transitionByQuality[iii] = 0L;
|
|
||||||
// transversionByQuality[iii] = 0L;
|
|
||||||
// titvByQuality[iii] = 0.0;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// int maxQual = 0;
|
|
||||||
//
|
|
||||||
// // Calculate the maximum quality score in order to normalize and histogram
|
|
||||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
|
||||||
// if( qual > maxQual ) {
|
|
||||||
// maxQual = qual;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
|
|
||||||
//
|
|
||||||
// for( final Integer qual : qualByIsTransition.keySet() ) {
|
|
||||||
// final int index = (int)Math.floor( ((double) qual) / binSize );
|
|
||||||
// if( index >= 0 ) { // BUGBUG: why is there overflow here?
|
|
||||||
// Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
|
|
||||||
// transitionByQuality[index] += transitionPair.getFirst();
|
|
||||||
// transversionByQuality[index] += transitionPair.getSecond();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
|
||||||
// if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
|
|
||||||
// titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
|
|
||||||
// } else {
|
|
||||||
// titvByQuality[iii] = 0.0;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// class AlleleCountStats implements TableType {
|
|
||||||
// final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
|
|
||||||
// final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
|
|
||||||
//
|
|
||||||
// public Object[] getRowKeys() {
|
|
||||||
// final int NUM_BINS = qualityListMap.keySet().size();
|
|
||||||
// final String rowKeys[] = new String[NUM_BINS];
|
|
||||||
// int iii = 0;
|
|
||||||
// for( final Integer key : qualityListMap.keySet() ) {
|
|
||||||
// rowKeys[iii] = "AC" + key;
|
|
||||||
// iii++;
|
|
||||||
// }
|
|
||||||
// return rowKeys;
|
|
||||||
//
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public Object[] getColumnKeys() {
|
|
||||||
// return new String[]{"alleleCount","avgQual"};
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public String getName() {
|
|
||||||
// return "AlleleCountStats";
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public String getCell(int x, int y) {
|
|
||||||
// int iii = 0;
|
|
||||||
// for( final Integer key : qualityListMap.keySet() ) {
|
|
||||||
// if(iii == x) {
|
|
||||||
// if(y == 0) { return String.valueOf(key); }
|
|
||||||
// else { return String.valueOf(qualityMap.get(key)); }
|
|
||||||
// }
|
|
||||||
// iii++;
|
|
||||||
// }
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public String toString() {
|
|
||||||
// String returnString = "";
|
|
||||||
// // output the quality map
|
|
||||||
// returnString += "AlleleCountStats: ";
|
|
||||||
// //for( int iii = 0; iii < NUM_BINS; iii++ ) {
|
|
||||||
// // returnString += titvByQuality[iii] + " ";
|
|
||||||
// //}
|
|
||||||
// return returnString;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void incrValue( final double qual, final int alleleCount ) {
|
|
||||||
// ArrayList<Double> list = qualityListMap.get(alleleCount);
|
|
||||||
// if(list==null) { list = new ArrayList<Double>(); }
|
|
||||||
// list.add(qual);
|
|
||||||
// qualityListMap.put(alleleCount, list);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void organizeAlleleCountTables() {
|
|
||||||
// for( final Integer key : qualityListMap.keySet() ) {
|
|
||||||
// final ArrayList<Double> list = qualityListMap.get(key);
|
|
||||||
// double meanQual = 0.0;
|
|
||||||
// final double numQuals = (double)list.size();
|
|
||||||
// for( Double qual : list ) {
|
|
||||||
// meanQual += qual / numQuals;
|
|
||||||
// }
|
|
||||||
// qualityMap.put(key, meanQual);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
//public VariantQualityScore(VariantEvalWalker parent) {
|
//public VariantQualityScore(VariantEvalWalker parent) {
|
||||||
//super(parent);
|
//super(parent);
|
||||||
// titvStats = null;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public String getName() {
|
|
||||||
// return "VariantQualityScore";
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public int getComparisonOrder() {
|
|
||||||
// return 1; // we only need to see each eval track
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public boolean enabled() {
|
|
||||||
// return true;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public String toString() {
|
|
||||||
// return getName();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
|
||||||
// final String interesting = null;
|
|
||||||
//
|
|
||||||
// if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
|
||||||
// if( titvStats == null ) { titvStats = new TiTvStats(); }
|
|
||||||
// titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
|
||||||
//
|
|
||||||
// if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
|
||||||
// int alternateAlleleCount = 0;
|
|
||||||
// for (final Allele a : eval.getAlternateAlleles()) {
|
|
||||||
// alternateAlleleCount += eval.getChromosomeCount(a);
|
|
||||||
// }
|
|
||||||
// alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return interesting; // This module doesn't capture any interesting sites, so return null
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// public void finalizeEvaluation() {
|
|
||||||
// if( titvStats != null ) {
|
|
||||||
// titvStats.organizeTiTvTables();
|
|
||||||
// }
|
|
||||||
// if( alleleCountStats != null ) {
|
|
||||||
// alleleCountStats.organizeAlleleCountTables();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//}
|
//}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return "VariantQualityScore";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getComparisonOrder() {
|
||||||
|
return 1; // we only need to see each eval track
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean enabled() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
return getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
final String interesting = null;
|
||||||
|
|
||||||
|
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||||
|
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||||
|
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||||
|
|
||||||
|
if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
|
||||||
|
int alternateAlleleCount = 0;
|
||||||
|
for (final Allele a : eval.getAlternateAlleles()) {
|
||||||
|
alternateAlleleCount += eval.getChromosomeCount(a);
|
||||||
|
}
|
||||||
|
alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finalizeEvaluation() {
|
||||||
|
if( titvStats != null ) {
|
||||||
|
titvStats.organizeTiTvTables();
|
||||||
|
}
|
||||||
|
if( alleleCountStats != null ) {
|
||||||
|
alleleCountStats.organizeAlleleCountTables();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
public class CpG extends VariantStratifier {
|
public class CpG extends VariantStratifier implements StandardStratification {
|
||||||
private ArrayList<String> states;
|
private ArrayList<String> states;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
public class Filter extends VariantStratifier implements StandardStratification {
|
public class Filter extends VariantStratifier {
|
||||||
// needs to know the variant context
|
// needs to know the variant context
|
||||||
private ArrayList<String> states;
|
private ArrayList<String> states;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.NewVariantEvalWalker;
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator;
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.evaluators.VariantEvaluator;
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.VariantStratifier;
|
import org.broadinstitute.sting.playground.gatk.walkers.newvarianteval.stratifications.VariantStratifier;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
@ -26,13 +27,17 @@ public class NewEvaluationContext extends HashMap<VariantStratifier, String> {
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addEvaluationClassList(Set<Class<? extends VariantEvaluator>> evaluationClasses) {
|
public void addEvaluationClassList(NewVariantEvalWalker walker, StateKey stateKey, Set<Class<? extends VariantEvaluator>> evaluationClasses) {
|
||||||
evaluationInstances = new TreeMap<String, VariantEvaluator>();
|
evaluationInstances = new TreeMap<String, VariantEvaluator>();
|
||||||
|
|
||||||
for ( Class<? extends VariantEvaluator> c : evaluationClasses ) {
|
for ( Class<? extends VariantEvaluator> c : evaluationClasses ) {
|
||||||
try {
|
try {
|
||||||
VariantEvaluator eval = c.newInstance();
|
VariantEvaluator eval = c.newInstance();
|
||||||
|
eval.initialize(walker);
|
||||||
|
|
||||||
|
if (eval.stateIsApplicable(stateKey)) {
|
||||||
evaluationInstances.put(c.getSimpleName(), eval);
|
evaluationInstances.put(c.getSimpleName(), eval);
|
||||||
|
}
|
||||||
} catch (InstantiationException e) {
|
} catch (InstantiationException e) {
|
||||||
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
|
throw new StingException("Unable to instantiate eval module '" + c.getSimpleName() + "'");
|
||||||
} catch (IllegalAccessException e) {
|
} catch (IllegalAccessException e) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue