Merge pull request #214 from broadinstitute/chartl_genotype_concordance_diploid_and_OGC
Add overall genotype concordance to the genotype concordance tool. In ad...
This commit is contained in:
commit
409a202492
|
|
@ -567,8 +567,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
table[5] = new int[] {12, 0, 34, 20, 10, 0};
|
||||
double EXPEC_NRS = 0.8969957;
|
||||
double EXPEC_NRD = 0.1071429;
|
||||
double EXPEC_OGC = 0.92592592; // (100+150+50)/(100+5+1+150+7+3+50+2+6)
|
||||
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
|
||||
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
|
||||
Assert.assertEquals(EXPEC_OGC,metrics.getOverallOGC(),1e-7);
|
||||
int EXPEC_EVAL_REF = 124;
|
||||
int EXPEC_EVAL_HET = 169;
|
||||
int EXPEC_EVAL_VAR = 62;
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
|
||||
0,
|
||||
Arrays.asList("0f29a0c6dc44066228c8cb204fd53ec0")
|
||||
Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e")
|
||||
);
|
||||
|
||||
executeTest("test indel concordance", spec);
|
||||
|
|
@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
|
||||
0,
|
||||
Arrays.asList("fc725022d47b4b5f8a6ef87f0f1ffe89")
|
||||
Arrays.asList("6246d81b25a9a96e379c47056177a65d")
|
||||
);
|
||||
|
||||
executeTest("test non-overlapping samples", spec);
|
||||
|
|
@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize",
|
||||
0,
|
||||
Arrays.asList("3993709e38b033e89017dfbb63226e94")
|
||||
Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e")
|
||||
);
|
||||
|
||||
executeTest("Test moltenized output",spec);
|
||||
|
|
@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"),
|
||||
0,
|
||||
Arrays.asList("352d59c4ac0cee5eb8ddbc9404b19ce9")
|
||||
Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1")
|
||||
);
|
||||
|
||||
executeTest("test multiple records per site",spec);
|
||||
|
|
@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
|
||||
0,
|
||||
Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52")
|
||||
Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2")
|
||||
);
|
||||
|
||||
executeTest("Test filtering on the EVAL rod",spec);
|
||||
|
|
@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
|
||||
0,
|
||||
Arrays.asList("6406b16cde7960b8943edf594303afd6")
|
||||
Arrays.asList("1402712d1ab18bafa5bac130af2f974c")
|
||||
);
|
||||
|
||||
executeTest("Test filtering on the COMP rod", spec);
|
||||
|
|
@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
|
||||
0,
|
||||
Arrays.asList("26ffd06215b6177acce0ea9f35d73d31")
|
||||
Arrays.asList("6b83695122481d2dcbe3c792caf743a1")
|
||||
);
|
||||
|
||||
executeTest("Test filtering on both rods",spec);
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
@ -81,10 +82,23 @@ public class ConcordanceMetrics {
|
|||
return Collections.unmodifiableMap(nrd);
|
||||
}
|
||||
|
||||
public Map<String,Double> getPerSampleOGC() {
|
||||
Map<String,Double> ogc = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||
ogc.put(sampleTable.getKey(),calculateOGC(sampleTable.getValue()));
|
||||
}
|
||||
|
||||
return Collections.unmodifiableMap(ogc);
|
||||
}
|
||||
|
||||
public Double getOverallNRD() {
|
||||
return calculateNRD(overallGenotypeConcordance);
|
||||
}
|
||||
|
||||
public Double getOverallOGC() {
|
||||
return calculateOGC(overallGenotypeConcordance);
|
||||
}
|
||||
|
||||
public Map<String,Double> getPerSampleNRS() {
|
||||
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||
|
|
@ -110,6 +124,11 @@ public class ConcordanceMetrics {
|
|||
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
|
||||
Genotype evalGenotype = eval.getGenotype(sample);
|
||||
Genotype truthGenotype = truth.getGenotype(sample);
|
||||
// ensure genotypes are either no-call ("."), missing (empty alleles), or diploid
|
||||
if ( ( ! evalGenotype.isNoCall() && evalGenotype.getPloidy() != 2 && evalGenotype.getPloidy() > 0) ||
|
||||
( ! truthGenotype.isNoCall() && truthGenotype.getPloidy() != 2 && truthGenotype.getPloidy() > 0) ) {
|
||||
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
|
||||
}
|
||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
}
|
||||
|
|
@ -136,10 +155,32 @@ public class ConcordanceMetrics {
|
|||
return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total);
|
||||
}
|
||||
|
||||
private static double calculateOGC(int[][] concordanceCounts) {
|
||||
int correct = 0;
|
||||
int total = 0;
|
||||
correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
|
||||
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += correct;
|
||||
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
|
||||
// OGC is by definition correct/total
|
||||
// note: if there are no observations (so the ratio is NaN), set this to 100%
|
||||
return total == 0 ? 1.0 : ( (double) correct)/( (double) total);
|
||||
}
|
||||
|
||||
private static double calculateNRS(GenotypeConcordanceTable table) {
|
||||
return calculateNRS(table.getTable());
|
||||
}
|
||||
|
||||
private static double calculateOGC(GenotypeConcordanceTable table) {
|
||||
return calculateOGC(table.getTable());
|
||||
}
|
||||
|
||||
private static double calculateNRS(int[][] concordanceCounts) {
|
||||
long confirmedVariant = 0;
|
||||
long unconfirmedVariant = 0;
|
||||
|
|
|
|||
|
|
@ -272,7 +272,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length);
|
||||
GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length);
|
||||
GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length);
|
||||
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2);
|
||||
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS, NRD, and OGC",2);
|
||||
GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length);
|
||||
if ( moltenize ) {
|
||||
concordanceCompProportions.addColumn("Sample","%s");
|
||||
|
|
@ -293,6 +293,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
concordanceCompProportions.addColumn("Proportion","%.3f");
|
||||
concordanceCounts.addColumn("Count","%d");
|
||||
concordanceEvalProportions.addColumn("Proportion","%.3f");
|
||||
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
|
||||
|
||||
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
|
||||
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
|
||||
|
|
@ -378,9 +379,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue());
|
||||
}
|
||||
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
|
||||
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
|
||||
}
|
||||
concordanceSummary.set("ALL_NRS_NRD","Sample","ALL");
|
||||
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS());
|
||||
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD());
|
||||
concordanceSummary.set("ALL_NRS_NRD","Overall_Genotype_Concordance",metrics.getOverallOGC());
|
||||
|
||||
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
|
|
@ -411,6 +416,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
concordanceCounts.addColumn("Mismatching_Alleles","%d");
|
||||
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
|
||||
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
|
||||
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
siteConcordance.addColumn(type.toString(),"%d");
|
||||
}
|
||||
|
|
@ -463,9 +469,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
|
||||
}
|
||||
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
|
||||
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
|
||||
}
|
||||
concordanceSummary.set("ALL","Sample","ALL");
|
||||
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
|
||||
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
|
||||
concordanceSummary.set("ALL","Overall_Genotype_Concordance",metrics.getOverallOGC());
|
||||
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));
|
||||
|
|
|
|||
Loading…
Reference in New Issue