Hidden, unsupported ability of VariantEval to run AlleleCount stratification on sites-only VCFs. I'll expose it/add tests on it if people think this is generaly useful. User needs to specify total # of samples as command line argument since genotypes are not available.

Also, fixes to large-scale validation script: lower -minIndelFrac threshold or else we'll kill most indels since default 0.25 is too high for pools, fix also VE stratifications and add one VE run where eval=1KG, comp=pool data and AC stratification based on 1KG annotation
This commit is contained in:
Guillermo del Angel 2012-10-25 10:35:43 -04:00
parent f838815343
commit 596c1723ae
2 changed files with 13 additions and 1 deletions

View File

@ -183,6 +183,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
private boolean keepSitesWithAC0 = false;
@Hidden
@Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
private int numSamplesFromArgument = 0;
/**
* If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
* variant set, and evaluate the union of the results. Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
@ -589,6 +593,14 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
public int getNumberOfSamplesForEvaluation() {
if (sampleNamesForEvaluation!= null && !sampleNamesForEvaluation.isEmpty())
return sampleNamesForEvaluation.size();
else {
return numSamplesFromArgument;
}
}
public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
public List<RodBinding<VariantContext>> getComps() { return comps; }

View File

@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier {
// There are ploidy x n sample chromosomes
// TODO -- generalize to handle multiple ploidy
nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy();
nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy();
if ( nchrom < 2 )
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");