Add overall genotype concordance to the genotype concordance tool. In addition, protect from non-diploid genotypes, which can cause very strange behavior.
Update MD5 sums. As expected, md5 changes are consistent with the genotype concordance field being added to each output.
This commit is contained in:
parent
98021db264
commit
6ff74deac7
|
|
@ -567,8 +567,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||||
table[5] = new int[] {12, 0, 34, 20, 10, 0};
|
table[5] = new int[] {12, 0, 34, 20, 10, 0};
|
||||||
double EXPEC_NRS = 0.8969957;
|
double EXPEC_NRS = 0.8969957;
|
||||||
double EXPEC_NRD = 0.1071429;
|
double EXPEC_NRD = 0.1071429;
|
||||||
|
double EXPEC_OGC = 0.92592592; // (100+150+50)/(100+5+1+150+7+3+50+2+6)
|
||||||
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
|
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
|
||||||
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
|
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
|
||||||
|
Assert.assertEquals(EXPEC_OGC,metrics.getOverallOGC(),1e-7);
|
||||||
int EXPEC_EVAL_REF = 124;
|
int EXPEC_EVAL_REF = 124;
|
||||||
int EXPEC_EVAL_HET = 169;
|
int EXPEC_EVAL_HET = 169;
|
||||||
int EXPEC_EVAL_VAR = 62;
|
int EXPEC_EVAL_VAR = 62;
|
||||||
|
|
|
||||||
|
|
@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
|
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
|
||||||
0,
|
0,
|
||||||
Arrays.asList("0f29a0c6dc44066228c8cb204fd53ec0")
|
Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("test indel concordance", spec);
|
executeTest("test indel concordance", spec);
|
||||||
|
|
@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
|
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
|
||||||
0,
|
0,
|
||||||
Arrays.asList("fc725022d47b4b5f8a6ef87f0f1ffe89")
|
Arrays.asList("6246d81b25a9a96e379c47056177a65d")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("test non-overlapping samples", spec);
|
executeTest("test non-overlapping samples", spec);
|
||||||
|
|
@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize",
|
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize",
|
||||||
0,
|
0,
|
||||||
Arrays.asList("3993709e38b033e89017dfbb63226e94")
|
Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("Test moltenized output",spec);
|
executeTest("Test moltenized output",spec);
|
||||||
|
|
@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"),
|
baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"),
|
||||||
0,
|
0,
|
||||||
Arrays.asList("352d59c4ac0cee5eb8ddbc9404b19ce9")
|
Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("test multiple records per site",spec);
|
executeTest("test multiple records per site",spec);
|
||||||
|
|
@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
|
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
|
||||||
0,
|
0,
|
||||||
Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52")
|
Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("Test filtering on the EVAL rod",spec);
|
executeTest("Test filtering on the EVAL rod",spec);
|
||||||
|
|
@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
|
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
|
||||||
0,
|
0,
|
||||||
Arrays.asList("6406b16cde7960b8943edf594303afd6")
|
Arrays.asList("1402712d1ab18bafa5bac130af2f974c")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("Test filtering on the COMP rod", spec);
|
executeTest("Test filtering on the COMP rod", spec);
|
||||||
|
|
@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
||||||
WalkerTestSpec spec = new WalkerTestSpec(
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
|
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
|
||||||
0,
|
0,
|
||||||
Arrays.asList("26ffd06215b6177acce0ea9f35d73d31")
|
Arrays.asList("6b83695122481d2dcbe3c792caf743a1")
|
||||||
);
|
);
|
||||||
|
|
||||||
executeTest("Test filtering on both rods",spec);
|
executeTest("Test filtering on both rods",spec);
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||||
import org.broadinstitute.variant.variantcontext.*;
|
import org.broadinstitute.variant.variantcontext.*;
|
||||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||||
|
|
@ -81,10 +82,23 @@ public class ConcordanceMetrics {
|
||||||
return Collections.unmodifiableMap(nrd);
|
return Collections.unmodifiableMap(nrd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Map<String,Double> getPerSampleOGC() {
|
||||||
|
Map<String,Double> ogc = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||||
|
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||||
|
ogc.put(sampleTable.getKey(),calculateOGC(sampleTable.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return Collections.unmodifiableMap(ogc);
|
||||||
|
}
|
||||||
|
|
||||||
public Double getOverallNRD() {
|
public Double getOverallNRD() {
|
||||||
return calculateNRD(overallGenotypeConcordance);
|
return calculateNRD(overallGenotypeConcordance);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Double getOverallOGC() {
|
||||||
|
return calculateOGC(overallGenotypeConcordance);
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String,Double> getPerSampleNRS() {
|
public Map<String,Double> getPerSampleNRS() {
|
||||||
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||||
|
|
@ -110,6 +124,11 @@ public class ConcordanceMetrics {
|
||||||
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
|
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
|
||||||
Genotype evalGenotype = eval.getGenotype(sample);
|
Genotype evalGenotype = eval.getGenotype(sample);
|
||||||
Genotype truthGenotype = truth.getGenotype(sample);
|
Genotype truthGenotype = truth.getGenotype(sample);
|
||||||
|
// ensure genotypes are either no-call ("."), missing (empty alleles), or diploid
|
||||||
|
if ( ( ! evalGenotype.isNoCall() && evalGenotype.getPloidy() != 2 && evalGenotype.getPloidy() > 0) ||
|
||||||
|
( ! truthGenotype.isNoCall() && truthGenotype.getPloidy() != 2 && truthGenotype.getPloidy() > 0) ) {
|
||||||
|
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
|
||||||
|
}
|
||||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||||
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||||
}
|
}
|
||||||
|
|
@ -136,10 +155,32 @@ public class ConcordanceMetrics {
|
||||||
return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total);
|
return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static double calculateOGC(int[][] concordanceCounts) {
|
||||||
|
int correct = 0;
|
||||||
|
int total = 0;
|
||||||
|
correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||||
|
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
|
||||||
|
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||||
|
total += correct;
|
||||||
|
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
|
||||||
|
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||||
|
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||||
|
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||||
|
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||||
|
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
|
||||||
|
// NRD is by definition incorrec/total = 1.0-correct/total
|
||||||
|
// note: if there are no observations (so the ratio is NaN), set this to 100%
|
||||||
|
return total == 0 ? 1.0 : ( (double) correct)/( (double) total);
|
||||||
|
}
|
||||||
|
|
||||||
private static double calculateNRS(GenotypeConcordanceTable table) {
|
private static double calculateNRS(GenotypeConcordanceTable table) {
|
||||||
return calculateNRS(table.getTable());
|
return calculateNRS(table.getTable());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static double calculateOGC(GenotypeConcordanceTable table) {
|
||||||
|
return calculateOGC(table.getTable());
|
||||||
|
}
|
||||||
|
|
||||||
private static double calculateNRS(int[][] concordanceCounts) {
|
private static double calculateNRS(int[][] concordanceCounts) {
|
||||||
long confirmedVariant = 0;
|
long confirmedVariant = 0;
|
||||||
long unconfirmedVariant = 0;
|
long unconfirmedVariant = 0;
|
||||||
|
|
|
||||||
|
|
@ -293,6 +293,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
concordanceCompProportions.addColumn("Proportion","%.3f");
|
concordanceCompProportions.addColumn("Proportion","%.3f");
|
||||||
concordanceCounts.addColumn("Count","%d");
|
concordanceCounts.addColumn("Count","%d");
|
||||||
concordanceEvalProportions.addColumn("Proportion","%.3f");
|
concordanceEvalProportions.addColumn("Proportion","%.3f");
|
||||||
|
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
|
||||||
|
|
||||||
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
|
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
|
||||||
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
|
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
|
||||||
|
|
@ -378,9 +379,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue());
|
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue());
|
||||||
}
|
}
|
||||||
|
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleOGC().entrySet() ) {
|
||||||
|
concordanceSummary.set(nrdEntry.getKey(),"Overall_Genotype_Concordance",nrdEntry.getValue());
|
||||||
|
}
|
||||||
concordanceSummary.set("ALL_NRS_NRD","Sample","ALL");
|
concordanceSummary.set("ALL_NRS_NRD","Sample","ALL");
|
||||||
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS());
|
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS());
|
||||||
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD());
|
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD());
|
||||||
|
concordanceSummary.set("ALL_NRS_NRD","Overall_Genotype_Concordance",metrics.getOverallOGC());
|
||||||
|
|
||||||
|
|
||||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||||
|
|
@ -411,6 +416,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
concordanceCounts.addColumn("Mismatching_Alleles","%d");
|
concordanceCounts.addColumn("Mismatching_Alleles","%d");
|
||||||
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
|
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
|
||||||
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
|
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
|
||||||
|
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
|
||||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||||
siteConcordance.addColumn(type.toString(),"%d");
|
siteConcordance.addColumn(type.toString(),"%d");
|
||||||
}
|
}
|
||||||
|
|
@ -463,9 +469,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
||||||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
|
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
|
||||||
}
|
}
|
||||||
|
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
|
||||||
|
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
|
||||||
|
}
|
||||||
concordanceSummary.set("ALL","Sample","ALL");
|
concordanceSummary.set("ALL","Sample","ALL");
|
||||||
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
|
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
|
||||||
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
|
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
|
||||||
|
concordanceSummary.set("ALL","Overall_Genotype_Concordance",metrics.getOverallOGC());
|
||||||
|
|
||||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||||
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));
|
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue