Bugfix for incorrect allele counting in IndelSummary
-- Previous version would count all alt alleles as present in a sample, even if only 1 were present, because of the way VariantEval subsetted VCs -- Updated code for subsetting VCs by sample to be clearer about how it handles rederiving alleles -- Update a few pieces of code to get previous correct behavior -- Updated a few MD5s as now ref calls at sites in dbSNP are counted as having a comp sites, and therefore show up in known sites when Novelty strat is on (which I think is correct) -- Walkers that used old subsetting function with true are now using clearer version that does rederive alleles by default
This commit is contained in:
parent
2b25df3d53
commit
ccac77d888
|
|
@ -288,7 +288,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
|
|||
private VariantContext reduceVCToSamples(VariantContext vc, Set<String> samplesToPhase) {
|
||||
// for ( String sample : samplesToPhase )
|
||||
// logger.debug(String.format(" Sample %s has genotype %s, het = %s", sample, vc.getGenotype(sample), vc.getGenotype(sample).isHet() ));
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase, true);
|
||||
VariantContext subvc = vc.subContextFromSamples(samplesToPhase);
|
||||
// logger.debug("original VC = " + vc);
|
||||
// logger.debug("sub VC = " + subvc);
|
||||
return VariantContextUtils.pruneVariantContext(subvc, KEYS_TO_KEEP_IN_REDUCED_VCF);
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
return true;
|
||||
// want to include a site in the given samples if it is *likely* to be variant (via the EXACT model)
|
||||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, true);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class GTBasedSampleSelector extends SampleSelector{
|
|||
if ( samples == null || samples.isEmpty() )
|
||||
return true;
|
||||
|
||||
VariantContext subContext = vc.subContextFromSamples(samples, false);
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
if ( subContext.isPolymorphicInSamples() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -500,7 +500,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
|
|||
|
||||
@Requires({"eval != null", "comp != null"})
|
||||
private EvalCompMatchType doEvalAndCompMatch(final VariantContext eval, final VariantContext comp, boolean requireStrictAlleleMatch) {
|
||||
// find all of the matching comps
|
||||
if ( comp.getType() == VariantContext.Type.NO_VARIATION || eval.getType() == VariantContext.Type.NO_VARIATION )
|
||||
// if either of these are NO_VARIATION they are LENIENT matches
|
||||
return EvalCompMatchType.LENIENT;
|
||||
|
||||
if ( comp.getType() != eval.getType() )
|
||||
return EvalCompMatchType.NO_MATCH;
|
||||
|
||||
|
|
|
|||
|
|
@ -57,9 +57,12 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
|||
}
|
||||
}
|
||||
|
||||
public void update2(VariantContext vc1, VariantContext vc2, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (vc1 != null) updateTiTv(vc1, false);
|
||||
if (vc2 != null) updateTiTv(vc2, true);
|
||||
@Override
|
||||
public void update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if (eval != null)
|
||||
updateTiTv(eval, false);
|
||||
if (comp != null)
|
||||
updateTiTv(comp, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ public class Novelty extends VariantStratifier implements StandardStratification
|
|||
final Collection<VariantContext> knownComps = tracker.getValues(knowns, ref.getLocus());
|
||||
for ( final VariantContext c : knownComps ) {
|
||||
// loop over sites, looking for something that matches the type eval
|
||||
if ( eval.getType() == c.getType() ) {
|
||||
if ( eval.getType() == c.getType() || eval.getType() == VariantContext.Type.NO_VARIATION ) {
|
||||
return KNOWN_STATES;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -197,7 +197,9 @@ public class VariantEvalUtils {
|
|||
* @return a new VariantContext with just the requested samples
|
||||
*/
|
||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Set<String> sampleNames) {
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, false));
|
||||
// if we want to preserve AC0 sites as polymorphic we need to not rederive alleles
|
||||
final boolean deriveAlleles = variantEvalWalker.ignoreAC0Sites();
|
||||
return ensureAnnotations(vc, vc.subContextFromSamples(sampleNames, deriveAlleles));
|
||||
}
|
||||
|
||||
public VariantContext ensureAnnotations(final VariantContext vc, final VariantContext vcsub) {
|
||||
|
|
@ -262,12 +264,8 @@ public class VariantEvalUtils {
|
|||
// First, filter the VariantContext to represent only the samples for evaluation
|
||||
VariantContext vcsub = vc;
|
||||
|
||||
if (subsetBySample && vc.hasGenotypes()) {
|
||||
if ( variantEvalWalker.isSubsettingToSpecificSamples() )
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
else
|
||||
vcsub = ensureAnnotations(vc, vc);
|
||||
}
|
||||
if (subsetBySample && vc.hasGenotypes())
|
||||
vcsub = getSubsetOfVariantContext(vc, variantEvalWalker.getSampleNamesForEvaluation());
|
||||
|
||||
if ((byFilter || !vcsub.isFiltered())) {
|
||||
addMapping(mapping, VariantEval.getAllSampleName(), vcsub);
|
||||
|
|
|
|||
|
|
@ -334,12 +334,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
* in this VC is returned as the set of alleles in the subContext, even if
|
||||
* some of those alleles aren't in the samples
|
||||
*
|
||||
* WARNING: BE CAREFUL WITH rederiveAllelesFromGenotypes UNLESS YOU KNOW WHAT YOU ARE DOING?
|
||||
*
|
||||
* @param sampleNames the sample names
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples
|
||||
* @param rederiveAllelesFromGenotypes if true, returns the alleles to just those in use by the samples, true should be default
|
||||
* @return new VariantContext subsetting to just the given samples
|
||||
*/
|
||||
public VariantContext subContextFromSamples(Set<String> sampleNames, final boolean rederiveAllelesFromGenotypes ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) ) {
|
||||
if ( sampleNames.containsAll(getSampleNames()) && ! rederiveAllelesFromGenotypes ) {
|
||||
return this; // fast path when you don't have any work to do
|
||||
} else {
|
||||
VariantContextBuilder builder = new VariantContextBuilder(this);
|
||||
|
|
@ -355,8 +357,18 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #subContextFromSamples(java.util.Set, boolean) with rederiveAllelesFromGenotypes = true
|
||||
*
|
||||
* @param sampleNames
|
||||
* @return
|
||||
*/
|
||||
public VariantContext subContextFromSamples(final Set<String> sampleNames) {
|
||||
return subContextFromSamples(sampleNames, true);
|
||||
}
|
||||
|
||||
public VariantContext subContextFromSample(String sampleName) {
|
||||
return subContextFromSamples(Collections.singleton(sampleName), true);
|
||||
return subContextFromSamples(Collections.singleton(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval/";
|
||||
private static String variantEvalTestDataRoot = privateTestDir + "VariantEval/";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsWithMLEVCF = variantEvalTestDataRoot + "FundamentalsTest.annotated.db.subset.final.withMLE.vcf";
|
||||
|
|
@ -122,7 +122,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("e62a3bd9914d48e2bb2fb4f5dfc5ebc0")
|
||||
Arrays.asList("40abbc9be663aed8ee1158f832463ca8")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNovelty", spec);
|
||||
}
|
||||
|
|
@ -144,7 +144,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("087a2d9943c53e7f49663667c3305c7e")
|
||||
Arrays.asList("106a0e8753e839c0a2c030eb4b165fa9")
|
||||
);
|
||||
executeTest("testFundamentalsCountVariantsSNPsandIndelsWithNoveltyAndFilter", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -152,7 +152,7 @@ public class VariantContextBenchmark extends SimpleBenchmark {
|
|||
public void run(final VariantContext vc) {
|
||||
if ( samples == null )
|
||||
samples = new HashSet<String>(new ArrayList<String>(vc.getSampleNames()).subList(0, nSamplesToTake));
|
||||
VariantContext sub = vc.subContextFromSamples(samples, true);
|
||||
VariantContext sub = vc.subContextFromSamples(samples);
|
||||
sub.getNSamples();
|
||||
}
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue