A better output for Genotype Concordance summary. Now does only % comp hom-ref called hom-ref, het called het, and hom-var called hom-var, which are the quantities we typically show in slides. Updated intergration tests to reflect this change.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5429 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
93de326066
commit
ee8f2871f7
|
|
@ -555,11 +555,9 @@ class ACStats extends SampleStats {
|
|||
class SampleSummaryStats implements TableType {
|
||||
protected final static String ALL_SAMPLES_KEY = "allSamples";
|
||||
protected final static String[] COLUMN_KEYS = new String[]{
|
||||
"percent_comp_ref_called_var",
|
||||
"percent_comp_ref_called_ref",
|
||||
"percent_comp_het_called_het",
|
||||
"percent_comp_het_called_var",
|
||||
"percent_comp_hom_called_hom",
|
||||
"percent_comp_hom_called_var",
|
||||
"percent_non_reference_sensitivity",
|
||||
"percent_overall_genotype_concordance",
|
||||
"percent_non_reference_discrepancy_rate"};
|
||||
|
|
@ -661,8 +659,8 @@ class SampleSummaryStats implements TableType {
|
|||
|
||||
long numer, denom;
|
||||
|
||||
// Summary 0: % ref called as var
|
||||
numer = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_REF), allVariantGenotypes);
|
||||
// Summary 0: % ref called as ref
|
||||
numer = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_REF), allGenotypes);
|
||||
updateSummaries(0, summary, numer, denom);
|
||||
|
||||
|
|
@ -671,40 +669,30 @@ class SampleSummaryStats implements TableType {
|
|||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allGenotypes);
|
||||
updateSummaries(1, summary, numer, denom);
|
||||
|
||||
// Summary 2: % het called as var
|
||||
numer = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allVariantGenotypes);
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HET), allGenotypes);
|
||||
updateSummaries(2, summary, numer, denom);
|
||||
|
||||
// Summary 3: % homVar called as homVar
|
||||
// Summary 2: % homVar called as homVar
|
||||
numer = stats[Genotype.Type.HOM_VAR.ordinal()][Genotype.Type.HOM_VAR.ordinal()];
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allGenotypes);
|
||||
updateSummaries(3, summary, numer, denom);
|
||||
updateSummaries(2, summary, numer, denom);
|
||||
|
||||
// Summary 4: % homVars called as var
|
||||
numer = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allVariantGenotypes);
|
||||
denom = sumStatsAllPairs(stats, EnumSet.of(Genotype.Type.HOM_VAR), allGenotypes);
|
||||
updateSummaries(4, summary, numer, denom);
|
||||
|
||||
// Summary 5: % non-ref called as non-ref
|
||||
// Summary 3: % non-ref called as non-ref
|
||||
// MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
|
||||
numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes);
|
||||
denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes);
|
||||
updateSummaries(5, summary, numer, denom);
|
||||
updateSummaries(3, summary, numer, denom);
|
||||
|
||||
// Summary 6: overall genotype concordance of sites called in eval track
|
||||
// Summary 4: overall genotype concordance of sites called in eval track
|
||||
// MAD: this is the tradition genotype concordance
|
||||
numer = sumStatsDiag(stats, allCalledGenotypes);
|
||||
denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes);
|
||||
updateSummaries(6, summary, numer, denom);
|
||||
updateSummaries(4, summary, numer, denom);
|
||||
|
||||
// Summary 7: overall genotype concordance of sites called non-ref in eval track
|
||||
// Summary 5: overall genotype concordance of sites called non-ref in eval track
|
||||
long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||
long diag = sumStatsDiag(stats, allVariantGenotypes);
|
||||
long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
numer = allNoHomRef - diag;
|
||||
denom = allNoHomRef;
|
||||
updateSummaries(7, summary, numer, denom);
|
||||
updateSummaries(5, summary, numer, denom);
|
||||
}
|
||||
|
||||
// update the final summary stats
|
||||
|
|
|
|||
|
|
@ -355,7 +355,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
for (String vcfFile : vcfFiles) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG -B:eval,VCF " + validationDataLocation + vcfFile + " -B:comp,VCF " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s",
|
||||
1,
|
||||
Arrays.asList("86de930ab857e27717c80b11594808f3"));
|
||||
Arrays.asList("732d32997b19d9c4f0291287858c56d2"));
|
||||
executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
//executeTest("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
|
@ -409,7 +409,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance -B:evalYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.ug.very.few.lines.vcf -B:compYRI,VCF /humgen/gsa-hpprojects/GATK/data/Validation_Data/yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("3199177faa347fe3a78de17b83afd909"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("06cd5618ec23fe83a0ddb2a3e8622a16"));
|
||||
executeTestParallel("testCompVsEvalAC",spec);
|
||||
//executeTest("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue