Add overall genotype concordance to the genotype concordance tool. In addition, protect from non-diploid genotypes, which can cause very strange behavior.
Update MD5 sums. As expected, md5 changes are consistent with the genotype concordance field being added to each output.
This commit is contained in:
parent
98021db264
commit
6ff74deac7
|
|
@ -567,8 +567,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest {
|
|||
table[5] = new int[] {12, 0, 34, 20, 10, 0};
|
||||
double EXPEC_NRS = 0.8969957;
|
||||
double EXPEC_NRD = 0.1071429;
|
||||
double EXPEC_OGC = 0.92592592; // (100+150+50)/(100+5+1+150+7+3+50+2+6)
|
||||
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
|
||||
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
|
||||
Assert.assertEquals(EXPEC_OGC,metrics.getOverallOGC(),1e-7);
|
||||
int EXPEC_EVAL_REF = 124;
|
||||
int EXPEC_EVAL_HET = 169;
|
||||
int EXPEC_EVAL_VAR = 62;
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("NA12878.Jan2013.haplotypeCaller.subset.indels.vcf", "NA12878.Jan2013.bestPractices.subset.indels.vcf"),
|
||||
0,
|
||||
Arrays.asList("0f29a0c6dc44066228c8cb204fd53ec0")
|
||||
Arrays.asList("6fe03c63a76cb61a76e550137ebf8c5e")
|
||||
);
|
||||
|
||||
executeTest("test indel concordance", spec);
|
||||
|
|
@ -75,7 +75,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf"),
|
||||
0,
|
||||
Arrays.asList("fc725022d47b4b5f8a6ef87f0f1ffe89")
|
||||
Arrays.asList("6246d81b25a9a96e379c47056177a65d")
|
||||
);
|
||||
|
||||
executeTest("test non-overlapping samples", spec);
|
||||
|
|
@ -86,7 +86,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("GenotypeConcordanceNonOverlapTest_Eval.vcf", "GenotypeConcordanceNonOverlapTest_Comp.vcf") + " -moltenize",
|
||||
0,
|
||||
Arrays.asList("3993709e38b033e89017dfbb63226e94")
|
||||
Arrays.asList("ee1da9b0119ce7869b2d05d81cef255e")
|
||||
);
|
||||
|
||||
executeTest("Test moltenized output",spec);
|
||||
|
|
@ -97,7 +97,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"),
|
||||
0,
|
||||
Arrays.asList("352d59c4ac0cee5eb8ddbc9404b19ce9")
|
||||
Arrays.asList("a1c48b041b0f0b8bf9387d5db337e5a1")
|
||||
);
|
||||
|
||||
executeTest("test multiple records per site",spec);
|
||||
|
|
@ -108,7 +108,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfe 'GQ<30'",
|
||||
0,
|
||||
Arrays.asList("b7b495ccfa6d50a6be3e095d3f6d3c52")
|
||||
Arrays.asList("7f52e70482c30031bedf2fcc6bd359b2")
|
||||
);
|
||||
|
||||
executeTest("Test filtering on the EVAL rod",spec);
|
||||
|
|
@ -119,7 +119,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.50'",
|
||||
0,
|
||||
Arrays.asList("6406b16cde7960b8943edf594303afd6")
|
||||
Arrays.asList("1402712d1ab18bafa5bac130af2f974c")
|
||||
);
|
||||
|
||||
executeTest("Test filtering on the COMP rod", spec);
|
||||
|
|
@ -130,7 +130,7 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString("genotypeConcordanceFilterTest.vcf","genotypeConcordanceFilterTest.vcf") + " -gfc 'LX<0.52' -gfe 'DP<5' -gfe 'GQ<37'",
|
||||
0,
|
||||
Arrays.asList("26ffd06215b6177acce0ea9f35d73d31")
|
||||
Arrays.asList("6b83695122481d2dcbe3c792caf743a1")
|
||||
);
|
||||
|
||||
executeTest("Test filtering on both rods",spec);
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
|
@ -81,10 +82,23 @@ public class ConcordanceMetrics {
|
|||
return Collections.unmodifiableMap(nrd);
|
||||
}
|
||||
|
||||
public Map<String,Double> getPerSampleOGC() {
|
||||
Map<String,Double> ogc = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||
ogc.put(sampleTable.getKey(),calculateOGC(sampleTable.getValue()));
|
||||
}
|
||||
|
||||
return Collections.unmodifiableMap(ogc);
|
||||
}
|
||||
|
||||
public Double getOverallNRD() {
|
||||
return calculateNRD(overallGenotypeConcordance);
|
||||
}
|
||||
|
||||
public Double getOverallOGC() {
|
||||
return calculateOGC(overallGenotypeConcordance);
|
||||
}
|
||||
|
||||
public Map<String,Double> getPerSampleNRS() {
|
||||
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||
|
|
@ -110,6 +124,11 @@ public class ConcordanceMetrics {
|
|||
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
|
||||
Genotype evalGenotype = eval.getGenotype(sample);
|
||||
Genotype truthGenotype = truth.getGenotype(sample);
|
||||
// ensure genotypes are either no-call ("."), missing (empty alleles), or diploid
|
||||
if ( ( ! evalGenotype.isNoCall() && evalGenotype.getPloidy() != 2 && evalGenotype.getPloidy() > 0) ||
|
||||
( ! truthGenotype.isNoCall() && truthGenotype.getPloidy() != 2 && truthGenotype.getPloidy() > 0) ) {
|
||||
throw new UserException(String.format("Concordance Metrics is currently only implemented for DIPLOID genotypes, found eval ploidy: %d, comp ploidy: %d",evalGenotype.getPloidy(),truthGenotype.getPloidy()));
|
||||
}
|
||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth,truthRef);
|
||||
}
|
||||
|
|
@ -136,10 +155,32 @@ public class ConcordanceMetrics {
|
|||
return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total);
|
||||
}
|
||||
|
||||
private static double calculateOGC(int[][] concordanceCounts) {
|
||||
int correct = 0;
|
||||
int total = 0;
|
||||
correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
|
||||
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += correct;
|
||||
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
|
||||
// NRD is by definition incorrec/total = 1.0-correct/total
|
||||
// note: if there are no observations (so the ratio is NaN), set this to 100%
|
||||
return total == 0 ? 1.0 : ( (double) correct)/( (double) total);
|
||||
}
|
||||
|
||||
private static double calculateNRS(GenotypeConcordanceTable table) {
|
||||
return calculateNRS(table.getTable());
|
||||
}
|
||||
|
||||
private static double calculateOGC(GenotypeConcordanceTable table) {
|
||||
return calculateOGC(table.getTable());
|
||||
}
|
||||
|
||||
private static double calculateNRS(int[][] concordanceCounts) {
|
||||
long confirmedVariant = 0;
|
||||
long unconfirmedVariant = 0;
|
||||
|
|
|
|||
|
|
@ -293,6 +293,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
concordanceCompProportions.addColumn("Proportion","%.3f");
|
||||
concordanceCounts.addColumn("Count","%d");
|
||||
concordanceEvalProportions.addColumn("Proportion","%.3f");
|
||||
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
|
||||
|
||||
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
|
||||
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
|
||||
|
|
@ -378,9 +379,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference_Discrepancy",nrdEntry.getValue());
|
||||
}
|
||||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleOGC().entrySet() ) {
|
||||
concordanceSummary.set(nrdEntry.getKey(),"Overall_Genotype_Concordance",nrdEntry.getValue());
|
||||
}
|
||||
concordanceSummary.set("ALL_NRS_NRD","Sample","ALL");
|
||||
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Sensitivity",metrics.getOverallNRS());
|
||||
concordanceSummary.set("ALL_NRS_NRD","Non-Reference_Discrepancy",metrics.getOverallNRD());
|
||||
concordanceSummary.set("ALL_NRS_NRD","Overall_Genotype_Concordance",metrics.getOverallOGC());
|
||||
|
||||
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
|
|
@ -411,6 +416,7 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
concordanceCounts.addColumn("Mismatching_Alleles","%d");
|
||||
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
|
||||
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
|
||||
concordanceSummary.addColumn("Overall_Genotype_Concordance","%.3f");
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
siteConcordance.addColumn(type.toString(),"%d");
|
||||
}
|
||||
|
|
@ -463,9 +469,13 @@ public class GenotypeConcordance extends RodWalker<List<Pair<VariantContext,Vari
|
|||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
|
||||
}
|
||||
for ( Map.Entry<String,Double> ogcEntry : metrics.getPerSampleOGC().entrySet() ) {
|
||||
concordanceSummary.set(ogcEntry.getKey(),"Overall_Genotype_Concordance",ogcEntry.getValue());
|
||||
}
|
||||
concordanceSummary.set("ALL","Sample","ALL");
|
||||
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
|
||||
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
|
||||
concordanceSummary.set("ALL","Overall_Genotype_Concordance",metrics.getOverallOGC());
|
||||
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));
|
||||
|
|
|
|||
Loading…
Reference in New Issue