Bugfix: records that get paired up during the resolution of multiple-records-per-site were not going into genotype-level filtering. Caught via testing.

Testing for moltenized output, and for genotype-level filtering. This tool is now fully functional. There are three todo items:

1) Docs
2) An additional output table that gives concordance proportions normalized by records in both eval and comp (not just total in eval or total in comp)
3) Code cleanup for table creation (putting a table together the way I do takes -way- too many lines of code)
This commit is contained in:
Chris Hartl 2013-01-18 09:49:48 -05:00
parent e15d4ad278
commit 91030e9afa
2 changed files with 50 additions and 9 deletions

View File

@ -94,9 +94,8 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
private List<VariantContextUtils.JexlVCMatchExp> evalJexls = null;
private List<VariantContextUtils.JexlVCMatchExp> compJexls = null;
// todo -- table with "proportion of overlapping sites" (not just eval/comp margins)
// todo -- moltenize
// todo -- table with "proportion of overlapping sites" (not just eval/comp margins) [e.g. drop no-calls]
// (this will break all the integration tests of course, due to new formatting)
public void initialize() {
evalJexls = initializeJexl(genotypeFilterExpressionsEval);
@ -201,7 +200,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
}
if ( pairedComp != null ) {
compList.remove(pairedComp);
resolvedPairs.add(new Pair<VariantContext, VariantContext>(eval,pairedComp));
resolvedPairs.add(new Pair<VariantContext, VariantContext>(filterGenotypes(eval,ignoreFilters,evalJexls),filterGenotypes(pairedComp,ignoreFilters,compJexls)));
pairedEval.add(eval);
if ( compList.size() < 1 )
break;
@ -209,11 +208,11 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
}
evalList.removeAll(pairedEval);
for ( VariantContext unpairedEval : evalList ) {
resolvedPairs.add(new Pair<VariantContext, VariantContext>(unpairedEval,createEmptyContext(unpairedEval,compSamples)));
resolvedPairs.add(new Pair<VariantContext, VariantContext>(filterGenotypes(unpairedEval,ignoreFilters,evalJexls),createEmptyContext(unpairedEval,compSamples)));
}
for ( VariantContext unpairedComp : compList ) {
resolvedPairs.add(new Pair<VariantContext, VariantContext>(createEmptyContext(unpairedComp,evalSamples),unpairedComp));
resolvedPairs.add(new Pair<VariantContext, VariantContext>(createEmptyContext(unpairedComp,evalSamples),filterGenotypes(unpairedComp,ignoreFilters,compJexls)));
}
return resolvedPairs;
@ -233,6 +232,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
}
public void onTraversalDone(ConcordanceMetrics metrics) {
// todo -- this is over 200 lines of code just to format the output and could use some serious cleanup
GATKReport report = new GATKReport();
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length);
@ -460,9 +460,6 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
}
public VariantContext filterGenotypes(VariantContext context, boolean ignoreSiteFilter, List<VariantContextUtils.JexlVCMatchExp> exps) {
// placeholder method for genotype-level filtering. However if the site itself is filtered,
// and such filters are not ignored, the genotype-level data should be altered to reflect this
if ( ! context.isFiltered() || ignoreSiteFilter ) {
List<Genotype> filteredGenotypes = new ArrayList<Genotype>(context.getNSamples());
for ( Genotype g : context.getGenotypes() ) {

View File

@ -61,6 +61,17 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
executeTest("test non-overlapping samples", spec);
}
@Test
public void testNonoverlappingSamplesMoltenized() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
0,
Arrays.asList("")
);
executeTest("Test moltenized output",spec);
}
@Test
public void testMultipleRecordsPerSite() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -71,4 +82,37 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
executeTest("test multiple records per site",spec);
}
@Test
public void testGQFilteringEval() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
0,
Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52")
);
executeTest("Test filtering on the EVAL rod",spec);
}
@Test
public void testFloatFilteringComp() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
0,
Arrays.asList("6406b16cde7960b8943edf594303afd6")
);
executeTest("Test filtering on the COMP rod", spec);
}
@Test
public void testCombinedFilters() {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
0,
Arrays.asList("26ffd06215b6177acce0ea9f35d73d31")
);
executeTest("Test filtering on both rods",spec);
}
}