Merge pull request #214 from broadinstitute/chartl_genotype_concordance_diploid_and_OGC

Add overall genotype concordance to the genotype concordance tool. In ad...
This commit is contained in:
Yossi Farjoun 2013-05-14 14:19:54 -07:00
commit 409a202492
4 changed files with 61 additions and 8 deletions

View File

@ -567,8 +567,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
table[5] = new int[] {12, 0, 34, 20, 10, 0};
double EXPEC_NRS = 0.8969957;
double EXPEC_NRD = 0.1071429;
double EXPEC_OGC = 0.92592592; // (100+150+50)/(100+5+1+150+7+3+50+2+6)
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
Assert.assertEquals(EXPEC_OGC,metrics.getOverallOGC(),1e-7);
int EXPEC_EVAL_REF = 124;
int EXPEC_EVAL_HET = 169;
int EXPEC_EVAL_VAR = 62;

View File

@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
0,
Arrays.asList("0f29a0c6dc44066228c8cb204fd53ec0")
Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e")
);
executeTest("test indel concordance", spec);
@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
0,
Arrays.asList("fc725022d47b4b5f8a6ef87f0f1ffe89")
Arrays.asList("6246d81b25a9a96e379c47056177a65d")
);
executeTest("test non-overlapping samples", spec);
@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize",
0,
Arrays.asList("3993709e38b033e89017dfbb63226e94")
Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e")
);
executeTest("Test moltenized output",spec);
@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"),
0,
Arrays.asList("352d59c4ac0cee5eb8ddbc9404b19ce9")
Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1")
);
executeTest("test multiple records per site",spec);
@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
0,
Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52")
Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2")
);
executeTest("Test filtering on the EVAL rod",spec);
@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
0,
Arrays.asList("6406b16cde7960b8943edf594303afd6")
Arrays.asList("1402712d1ab18bafa5bac130af2f974c")
);
executeTest("Test filtering on the COMP rod", spec);
@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
0,
Arrays.asList("26ffd06215b6177acce0ea9f35d73d31")
Arrays.asList("6b83695122481d2dcbe3c792caf743a1")
);
executeTest("Test filtering on both rods",spec);

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;
@ -81,10 +82,23 @@ public class ConcordanceMetrics {
return Collections.unmodifiableMap(nrd);
}
public Map<String,Double> getPerSampleOGC() {
Map<String,Double> ogc = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
ogc.put(sampleTable.getKey(),calculateOGC(sampleTable.getValue()));
}
return Collections.unmodifiableMap(ogc);
}
public Double getOverallNRD() {
return calculateNRD(overallGenotypeConcordance);
}
public Double getOverallOGC() {
return calculateOGC(overallGenotypeConcordance);
}
public Map<String,Double> getPerSampleNRS() {
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
@ -110,6 +124,11 @@ public class ConcordanceMetrics {
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
Genotype evalGenotype = eval.getGenotype(sample);
Genotype truthGenotype = truth.getGenotype(sample);
// ensure genotypes are either no-call ("."), missing (empty alleles), or diploid
if ( ( ! evalGenotype.isNoCall() && evalGenotype.getPloidy() != 2 && evalGenotype.getPloidy() > 0) ||
( ! truthGenotype.isNoCall() && truthGenotype.getPloidy() != 2 && truthGenotype.getPloidy() > 0) ) {
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
}
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
}
@ -136,10 +155,32 @@ public class ConcordanceMetrics {
return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total);
}
private static double calculateOGC(int[][] concordanceCounts) {
int correct = 0;
int total = 0;
correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()];
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += correct;
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
// OGC is by definition correct/total
// note: if there are no observations (so the ratio is NaN), set this to 100%
return total == 0 ? 1.0 : ( (double) correct)/( (double) total);
}
private static double calculateNRS(GenotypeConcordanceTable table) {
return calculateNRS(table.getTable());
}
private static double calculateOGC(GenotypeConcordanceTable table) {
return calculateOGC(table.getTable());
}
private static double calculateNRS(int[][] concordanceCounts) {
long confirmedVariant = 0;
long unconfirmedVariant = 0;

View File

@ -272,7 +272,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2);
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS, NRD, and OGC",2);
GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length);
if ( moltenize ) {
concordanceCompProportions.addColumn("Sample","%s");
@ -293,6 +293,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
concordanceCompProportions.addColumn("Proportion","%.3f");
concordanceCounts.addColumn("Count","%d");
concordanceEvalProportions.addColumn("Proportion","%.3f");
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
@ -378,9 +379,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue());
}
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
}
concordanceSummary.set("ALL_NRS_NRD","Sample","ALL");
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS());
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD());
concordanceSummary.set("ALL_NRS_NRD","Overall_Genotype_Concordance",metrics.getOverallOGC());
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
@ -411,6 +416,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
concordanceCounts.addColumn("Mismatching_Alleles","%d");
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
siteConcordance.addColumn(type.toString(),"%d");
}
@ -463,9 +469,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
}
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
}
concordanceSummary.set("ALL","Sample","ALL");
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
concordanceSummary.set("ALL","Overall_Genotype_Concordance",metrics.getOverallOGC());
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));