Added support for -nSamples to varianteval (and getNSamplesForEval function). Allows you to calculate AC based metrics for files without genotypes
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3350 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
2c55ac1374
commit
3f07611187
|
|
@ -42,7 +42,7 @@ public class CompOverlap extends VariantEvaluator {
|
|||
double concordantRate = 0.0;
|
||||
|
||||
public CompOverlap(VariantEvalWalker parent) {
|
||||
// don't do anything
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ public class CountVariants extends VariantEvaluator {
|
|||
double deletionInsertionRatio = 0;
|
||||
|
||||
public CountVariants(VariantEvalWalker parent) {
|
||||
// don't do anything
|
||||
super(parent);
|
||||
}
|
||||
|
||||
private double perLocusRate(long n) {
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
|
||||
|
||||
public GenotypeConcordance(VariantEvalWalker parent) {
|
||||
// don't do anything
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
|||
|
|
@ -59,8 +59,6 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
@DataPoint(name="KHV->PHR",description = "number of child hom variant calls where the parent was hom ref")
|
||||
long KidHomVar_ParentHomRef;
|
||||
|
||||
VariantEvalWalker parent;
|
||||
|
||||
TrioStructure trio;
|
||||
|
||||
private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");
|
||||
|
|
@ -84,7 +82,7 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public MendelianViolationEvaluator(VariantEvalWalker parent) {
|
||||
this.parent = parent;
|
||||
super(parent);
|
||||
|
||||
if (enabled()) {
|
||||
trio = parseTrioDescription(parent.FAMILY_STRUCTURE);
|
||||
|
|
@ -94,11 +92,11 @@ public class MendelianViolationEvaluator extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return parent.FAMILY_STRUCTURE != null;
|
||||
return getVEWalker().FAMILY_STRUCTURE != null;
|
||||
}
|
||||
|
||||
private double getQThreshold() {
|
||||
return parent.MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
return getVEWalker().MENDELIAN_VIOLATION_QUAL_THRESHOLD / 10; // we aren't 10x scaled in the GATK a la phred
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
|||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
import org.broadinstitute.sting.playground.utils.report.utils.TableType;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broad.tribble.vcf.VCFGenotypeRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/*
|
||||
|
|
@ -113,12 +115,21 @@ public class SimpleMetricsByAC extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public void incrValue( VariantContext eval ) {
|
||||
int ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||
metrics.get(ac).update(eval);
|
||||
int ac = -1;
|
||||
|
||||
if ( eval.hasGenotypes() )
|
||||
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||
else if ( eval.hasAttribute("AC") ) {
|
||||
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
||||
}
|
||||
|
||||
if ( ac != -1 )
|
||||
metrics.get(ac).update(eval);
|
||||
}
|
||||
}
|
||||
|
||||
public SimpleMetricsByAC(VariantEvalWalker parent) {
|
||||
super(parent);
|
||||
// don't do anything
|
||||
}
|
||||
|
||||
|
|
@ -141,13 +152,18 @@ public class SimpleMetricsByAC extends VariantEvaluator {
|
|||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
|
||||
if (eval != null &&
|
||||
eval.isSNP() &&
|
||||
eval.isBiallelic() &&
|
||||
eval.hasGenotypes()) {
|
||||
if ( metrics == null )
|
||||
metrics = new MetricsByAc(2 * eval.getNSamples());
|
||||
metrics.incrValue(eval);
|
||||
if (eval != null ) {
|
||||
if ( metrics == null ) {
|
||||
int nSamples = this.getVEWalker().getNSamplesForEval(eval);
|
||||
if ( nSamples != -1 )
|
||||
metrics = new MetricsByAc(2 * nSamples);
|
||||
}
|
||||
|
||||
if ( eval.isSNP() &&
|
||||
eval.isBiallelic() &&
|
||||
metrics != null ) {
|
||||
metrics.incrValue(eval);
|
||||
}
|
||||
}
|
||||
|
||||
return interesting; // This module doesn't capture any interesting sites, so return null
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
|
|||
double TiTvRatioStandard = 0.0;
|
||||
|
||||
public TiTvVariantEvaluator(VariantEvalWalker parent) {
|
||||
// don't do anything
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ public class ValidationRate extends VariantEvaluator {
|
|||
private SiteStats evalOverlapAtPoly = new SiteStats();
|
||||
|
||||
public ValidationRate(VariantEvalWalker parent) {
|
||||
// don't do anything
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
@ -126,7 +126,8 @@ public class ValidationRate extends VariantEvaluator {
|
|||
|
||||
if (eval == null)
|
||||
overlap.nNoCall++;
|
||||
else if (eval.isPolymorphic())
|
||||
//else if (eval.isPolymorphic()) // requires genotypes
|
||||
else if (eval.isSNP())
|
||||
overlap.nPoly++;
|
||||
else
|
||||
overlap.nMono++;
|
||||
|
|
|
|||
|
|
@ -169,6 +169,9 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
@Argument(shortName="reportLocation", fullName="reportLocation", doc="If provided, set the base file for reports (Required for output formats with more than one file per analysis)", required=false)
|
||||
protected File outputLocation = null;
|
||||
|
||||
@Argument(shortName="nSamples", fullName="nSamples", doc="If provided, analyses that need the number of samples in an eval track that has no genotype information will receive this number as the number of samples", required=false)
|
||||
protected int nSamples = -1;
|
||||
|
||||
Set<String> rsIDsToExclude = null;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -702,5 +705,21 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> {
|
|||
return Arrays.asList(evalNode,compNode,jexlNode,filterNode,noveltyNode);
|
||||
}
|
||||
|
||||
//
|
||||
// utility functions
|
||||
//
|
||||
|
||||
/**
|
||||
* Takes an eval generated VariantContext and attempts to return the number of samples in the
|
||||
* VC. If there are genotypes, it returns that value first. Otherwise it returns nSamples, if
|
||||
* provided. Returns -1 if no sample counts can be obtained.
|
||||
*
|
||||
* @param eval
|
||||
* @return
|
||||
*/
|
||||
public int getNSamplesForEval( VariantContext eval ) {
|
||||
return eval.hasGenotypes() ? eval.getNSamples() : nSamples;
|
||||
}
|
||||
|
||||
protected Logger getLogger() { return logger; }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,16 @@ abstract class VariantEvaluator {
|
|||
// System.out.printf("%40s %s%n", interestingSitePrefix, why);
|
||||
// }
|
||||
|
||||
protected VariantEvalWalker veWalker = null;
|
||||
public VariantEvaluator(VariantEvalWalker parent) {
|
||||
veWalker = parent;
|
||||
// don't do anything
|
||||
}
|
||||
|
||||
protected VariantEvalWalker getVEWalker() {
|
||||
return veWalker;
|
||||
}
|
||||
|
||||
public abstract boolean enabled();
|
||||
//public boolean processedAnySites() { return processedASite; }
|
||||
//protected void markSiteAsProcessed() { processedASite = true; }
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
|||
}
|
||||
|
||||
public VariantQualityScore(VariantEvalWalker parent) {
|
||||
// don't do anything
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
|
|
|
|||
Loading…
Reference in New Issue