Bugfix for incorrect allele counting in IndelSummary

-- Previous version would count all alt alleles as present in a sample, even if only 1 were present, because of the way VariantEval subsetted VCs
-- Updated code for subsetting VCs by sample to be clearer about how it handles rederiving alleles
-- Update a few pieces of code to get previous correct behavior
-- Updated a few MD5s as now ref calls at sites in dbSNP are counted as having a comp sites, and therefore show up in known sites when Novelty strat is on (which I think is correct)
-- Walkers that used old subsetting function with true are now using clearer version that does rederive alleles by default
This commit is contained in:
Mark DePristo 2012-08-01 11:57:16 -04:00
parent 2b25df3d53
commit ccac77d888
10 changed files with 38 additions and 22 deletions

View File

@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
// for ( String sample : samplesToPhase )
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
// logger.debug("original VC = " + vc);
// logger.debug("sub VC = " + subvc);
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);

View File

@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
return true;
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
// first subset to the samples
VariantContext subContext = vc.subContextFromSamples(samples, true);
VariantContext subContext = vc.subContextFromSamples(samples);
// now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec?

View File

@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
if ( samples == null || samples.isEmpty() )
return true;
VariantContext subContext = vc.subContextFromSamples(samples, false);
VariantContext subContext = vc.subContextFromSamples(samples);
if ( subContext.isPolymorphicInSamples() ) {
return true;
}

View File

@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
@Requires({"eval != null", "comp != null"})
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
// find all of the matching comps
if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
// if either of these are NO_VARIATION they are LENIENT matches
return EvalCompMatchType.LENIENT;
if ( comp.getType() != eval.getType() )
return EvalCompMatchType.NO_MATCH;

View File

@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
}
}
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc1 != null) updateTiTv(vc1, false);
if (vc2 != null) updateTiTv(vc2, true);
@Override
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null)
updateTiTv(eval, false);
if (comp != null)
updateTiTv(comp, true);
}
@Override

View File

@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
for ( final VariantContext c : knownComps ) {
// loop over sites, looking for something that matches the type eval
if ( eval.getType() == c.getType() ) {
if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
return KNOWN_STATES;
}
}

View File

@ -197,7 +197,9 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested samples
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
// if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
}
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
@ -262,12 +264,8 @@ public class VariantEvalUtils {
// First, filter the VariantContext to represent only the samples for evaluation
VariantContext vcsub = vc;
if (subsetBySample && vc.hasGenotypes()) {
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
else
vcsub = ensureAnnotations(vc, vc);
}
if (subsetBySample && vc.hasGenotypes())
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
if ((byFilter || !vcsub.isFiltered())) {
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);

View File

@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
* in this VC is returned as the set of alleles in the subContext, even if
* some of those alleles aren't in the samples
*
* WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
*
* @param sampleNames the sample names
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
* @return new VariantContext subsetting to just the given samples
*/
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
if ( sampleNames.containsAll(getSampleNames()) ) {
if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
return this; // fast path when you don't have any work to do
} else {
VariantContextBuilder builder = new VariantContextBuilder(this);
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
}
/**
* @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
*
* @param sampleNames
* @return
*/
public VariantContext subContextFromSamples(final Set<String> sampleNames) {
return subContextFromSamples(sampleNames, true);
}
public VariantContext subContextFromSample(String sampleName) {
return subContextFromSamples(Collections.singleton(sampleName), true);
return subContextFromSamples(Collections.singleton(sampleName));
}
/**

View File

@ -34,7 +34,7 @@ import java.util.Arrays;
import java.util.List;
public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
}
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
);
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
}

View File

@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
public void run(final VariantContext vc) {
if ( samples == null )
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
VariantContext sub = vc.subContextFromSamples(samples, true);
VariantContext sub = vc.subContextFromSamples(samples);
sub.getNSamples();
}
};