Merge pull request #214 from broadinstitute/chartl_genotype_concordance_diploid_and_OGC

Add overall genotype concordance to the genotype concordance tool. In ad...
This commit is contained in:
Yossi Farjoun 2013-05-14 14:19:54 -07:00
commit 409a202492
4 changed files with 61 additions and 8 deletions

View File

@ -567,8 +567,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
table[5] = new int[] {12, 0, 34, 20, 10, 0}; table[5] = new int[] {12, 0, 34, 20, 10, 0};
double EXPEC_NRS = 0.8969957; double EXPEC_NRS = 0.8969957;
double EXPEC_NRD = 0.1071429; double EXPEC_NRD = 0.1071429;
double EXPEC_OGC = 0.92592592; // (100+150+50)/(100+5+1+150+7+3+50+2+6)
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7); Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7); Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
Assert.assertEquals(EXPEC_OGC,metrics.getOverallOGC(),1e-7);
int EXPEC_EVAL_REF = 124; int EXPEC_EVAL_REF = 124;
int EXPEC_EVAL_HET = 169; int EXPEC_EVAL_HET = 169;
int EXPEC_EVAL_VAR = 62; int EXPEC_EVAL_VAR = 62;

View File

@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"), baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
0, 0,
Arrays.asList("0f29a0c6dc44066228c8cb204fd53ec0") Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e")
); );
executeTest("test indel concordance", spec); executeTest("test indel concordance", spec);
@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"), baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
0, 0,
Arrays.asList("fc725022d47b4b5f8a6ef87f0f1ffe89") Arrays.asList("6246d81b25a9a96e379c47056177a65d")
); );
executeTest("test non-overlapping samples", spec); executeTest("test non-overlapping samples", spec);
@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize", baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize",
0, 0,
Arrays.asList("3993709e38b033e89017dfbb63226e94") Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e")
); );
executeTest("Test moltenized output",spec); executeTest("Test moltenized output",spec);
@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"), baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"),
0, 0,
Arrays.asList("352d59c4ac0cee5eb8ddbc9404b19ce9") Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1")
); );
executeTest("test multiple records per site",spec); executeTest("test multiple records per site",spec);
@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'", baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
0, 0,
Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52") Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2")
); );
executeTest("Test filtering on the EVAL rod",spec); executeTest("Test filtering on the EVAL rod",spec);
@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'", baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
0, 0,
Arrays.asList("6406b16cde7960b8943edf594303afd6") Arrays.asList("1402712d1ab18bafa5bac130af2f974c")
); );
executeTest("Test filtering on the COMP rod", spec); executeTest("Test filtering on the COMP rod", spec);
@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec( WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'", baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
0, 0,
Arrays.asList("26ffd06215b6177acce0ea9f35d73d31") Arrays.asList("6b83695122481d2dcbe3c792caf743a1")
); );
executeTest("Test filtering on both rods",spec); executeTest("Test filtering on both rods",spec);

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*; import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader; import org.broadinstitute.variant.vcf.VCFHeader;
@ -81,10 +82,23 @@ public class ConcordanceMetrics {
return Collections.unmodifiableMap(nrd); return Collections.unmodifiableMap(nrd);
} }
public Map<String,Double> getPerSampleOGC() {
Map<String,Double> ogc = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
ogc.put(sampleTable.getKey(),calculateOGC(sampleTable.getValue()));
}
return Collections.unmodifiableMap(ogc);
}
public Double getOverallNRD() { public Double getOverallNRD() {
return calculateNRD(overallGenotypeConcordance); return calculateNRD(overallGenotypeConcordance);
} }
public Double getOverallOGC() {
return calculateOGC(overallGenotypeConcordance);
}
public Map<String,Double> getPerSampleNRS() { public Map<String,Double> getPerSampleNRS() {
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size()); Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) { for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
@ -110,6 +124,11 @@ public class ConcordanceMetrics {
for ( String sample : perSampleGenotypeConcordance.keySet() ) { for ( String sample : perSampleGenotypeConcordance.keySet() ) {
Genotype evalGenotype = eval.getGenotype(sample); Genotype evalGenotype = eval.getGenotype(sample);
Genotype truthGenotype = truth.getGenotype(sample); Genotype truthGenotype = truth.getGenotype(sample);
// ensure genotypes are either no-call ("."), missing (empty alleles), or diploid
if ( ( ! evalGenotype.isNoCall() && evalGenotype.getPloidy() != 2 && evalGenotype.getPloidy() > 0) ||
( ! truthGenotype.isNoCall() && truthGenotype.getPloidy() != 2 && truthGenotype.getPloidy() > 0) ) {
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
}
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef); perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef); overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
} }
@ -136,10 +155,32 @@ public class ConcordanceMetrics {
return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total); return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total);
} }
private static double calculateOGC(int[][] concordanceCounts) {
int correct = 0;
int total = 0;
correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()];
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += correct;
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
// OGC is by definition correct/total
// note: if there are no observations (so the ratio is NaN), set this to 100%
return total == 0 ? 1.0 : ( (double) correct)/( (double) total);
}
private static double calculateNRS(GenotypeConcordanceTable table) { private static double calculateNRS(GenotypeConcordanceTable table) {
return calculateNRS(table.getTable()); return calculateNRS(table.getTable());
} }
private static double calculateOGC(GenotypeConcordanceTable table) {
return calculateOGC(table.getTable());
}
private static double calculateNRS(int[][] concordanceCounts) { private static double calculateNRS(int[][] concordanceCounts) {
long confirmedVariant = 0; long confirmedVariant = 0;
long unconfirmedVariant = 0; long unconfirmedVariant = 0;

View File

@ -272,7 +272,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length); GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length); GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length); GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2); GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS, NRD, and OGC",2);
GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length); GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length);
if ( moltenize ) { if ( moltenize ) {
concordanceCompProportions.addColumn("Sample","%s"); concordanceCompProportions.addColumn("Sample","%s");
@ -293,6 +293,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
concordanceCompProportions.addColumn("Proportion","%.3f"); concordanceCompProportions.addColumn("Proportion","%.3f");
concordanceCounts.addColumn("Count","%d"); concordanceCounts.addColumn("Count","%d");
concordanceEvalProportions.addColumn("Proportion","%.3f"); concordanceEvalProportions.addColumn("Proportion","%.3f");
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) { for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue(); ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
@ -378,9 +379,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) { for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue()); concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue());
} }
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
}
concordanceSummary.set("ALL_NRS_NRD","Sample","ALL"); concordanceSummary.set("ALL_NRS_NRD","Sample","ALL");
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS()); concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS());
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD()); concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD());
concordanceSummary.set("ALL_NRS_NRD","Overall_Genotype_Concordance",metrics.getOverallOGC());
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
@ -411,6 +416,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
concordanceCounts.addColumn("Mismatching_Alleles","%d"); concordanceCounts.addColumn("Mismatching_Alleles","%d");
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f"); concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f"); concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
siteConcordance.addColumn(type.toString(),"%d"); siteConcordance.addColumn(type.toString(),"%d");
} }
@ -463,9 +469,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) { for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue()); concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
} }
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
}
concordanceSummary.set("ALL","Sample","ALL"); concordanceSummary.set("ALL","Sample","ALL");
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS()); concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD()); concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
concordanceSummary.set("ALL","Overall_Genotype_Concordance",metrics.getOverallOGC());
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type)); siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));