From 31a5f88c4f16cb4c4696fda5ffbf0f1800c528c5 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Thu, 10 Jan 2013 14:33:47 -0500 Subject: [PATCH] Expanded unit tests to cover the Concordance Metrics class fairly uniformly. --- .../variantutils/ConcordanceMetrics.java | 9 +- .../ConcordanceMetricsUnitTest.java | 346 +++++++++++++++++- 2 files changed, 346 insertions(+), 9 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index f71bc4a01..a28e75cdd 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -41,7 +41,7 @@ public class ConcordanceMetrics { public GenotypeConcordanceTable getGenotypeConcordance(String sample) { GenotypeConcordanceTable table = perSampleGenotypeConcordance.get(sample); if ( table == null ) - throw new ReviewedStingException("Attempted to request the concordance table for a sample on which it was not calculated"); + throw new ReviewedStingException("Attempted to request the concordance table for sample "+sample+" on which it was not calculated"); return table; } @@ -108,7 +108,8 @@ public class ConcordanceMetrics { total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; // NRD is by definition incorrec/total = 1.0-correct/total - return 1.0 - ( (double) correct)/( (double) total); + // note: if there are no observations (so the ratio is NaN), set this to 100% + return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total); } private static double calculateNRS(GenotypeConcordanceTable table) { @@ -129,7 +130,9 @@ public class ConcordanceMetrics { } } - return ( (double) confirmedVariant ) / ( (double) ( confirmedVariant + unconfirmedVariant ) ); + long total = confirmedVariant + unconfirmedVariant; + // note: if there are no observations (so the ratio is NaN) set this to 0% + return total == 0l ? 0.0 : ( (double) confirmedVariant ) / ( (double) ( total ) ); } diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java index 50d71c532..f5f0e3fee 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; +import com.sun.org.apache.xpath.internal.operations.Gt; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -12,6 +13,7 @@ import org.broadinstitute.variant.variantcontext.Allele; import org.broadinstitute.variant.variantcontext.Genotype; import org.broadinstitute.variant.variantcontext.GenotypeBuilder; import org.broadinstitute.variant.variantcontext.GenotypeType; +import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.variant.vcf.VCFCodec; @@ -34,11 +36,6 @@ import net.sf.picard.reference.ReferenceSequenceFile; public class ConcordanceMetricsUnitTest extends BaseTest { - // todo -- coverage for several sites (3,4) - // todo -- coverage for calculation of table margins - // todo -- coverage for site concordance - // todo -- coverage for disjoint and mostly-disjoint sample sets - private static ReferenceSequenceFile seq; private GenomeLocParser genomeLocParser; @@ -60,6 +57,10 @@ public class ConcordanceMetricsUnitTest extends BaseTest { "##FORMAT=\n" + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t"; public static String TEST_1_HEADER = HEADER_BASE + "test1_sample1\ttest1_sample2\ttest1_sample3\n"; + public static String TEST_2_HEADER = HEADER_BASE + "test2_sample1\ttest2_sample2\n"; + public static String TEST_3_HEADER_1 = HEADER_BASE + "test3_sample1\ttest3_sample2\ttest3_sample3\ttest3_sample4\ttest3_sample5\n"; + public static String TEST_3_HEADER_2 = HEADER_BASE + "test3_sample6\ttest3_sample7\ttest3_sample8\ttest3_sample9\ttest3_sample10\n"; + public static String TEST_3_HEADER_3 = HEADER_BASE + "test3_sample3\ttest3_sample6\ttest3_sample7\ttest3_sample8\ttest3_sample9\ttest3_sample10\n"; private Pair getData1() { @@ -319,8 +320,175 @@ public class ConcordanceMetricsUnitTest extends BaseTest { Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1); } + private List> getData6() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + + + // site 1 - + // sample 1: hom-ref/hom-ref + // sample 2: het/hom-ref + + Genotype sam_2_1_1_eval = GenotypeBuilder.create("test2_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_2_2_1_eval = GenotypeBuilder.create("test2_sample2", Arrays.asList(reference_A,alt_C)); + + Genotype sam_2_1_1_truth = GenotypeBuilder.create("test2_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_2_2_1_truth = GenotypeBuilder.create("test2_sample2", Arrays.asList(reference_A,reference_A)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_2_1_1_eval,sam_2_2_1_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_2_1_1_truth,sam_2_2_1_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testDataSite1 = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + reference_A = Allele.create(BaseUtils.A,true); + Allele alt_T = Allele.create(BaseUtils.T); + + // site 2 - + // sample 1: no-call/hom-ref + // sample 2: hom-var/hom-var + + Genotype sam_2_1_2_eval = GenotypeBuilder.create("test2_sample1",Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); + Genotype sam_2_2_2_eval = GenotypeBuilder.create("test2_sample2",Arrays.asList(alt_T,alt_T)); + Genotype sam_2_1_2_truth = GenotypeBuilder.create("test2_sample1",Arrays.asList(reference_A,reference_A)); + Genotype sam_2_2_2_truth = GenotypeBuilder.create("test2_sample2",Arrays.asList(alt_T,alt_T)); + + loc = genomeLocParser.createGenomeLoc("chr1", 4, 4); + eval_1_builder = new VariantContextBuilder(); + truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + eval_1_builder.alleles(Arrays.asList(reference_A,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_T)); + eval_1_builder.genotypes(Arrays.asList(sam_2_1_2_eval,sam_2_2_2_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_2_1_2_truth,sam_2_2_2_truth)); + + Pair testDataSite2 = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + Allele alt_G = Allele.create(BaseUtils.G); + + // site 3 - + // sample 1: alleles do not match + // sample 2: het/het + Genotype sam_2_1_3_eval = GenotypeBuilder.create("test2_sample1",Arrays.asList(alt_G,alt_T)); + Genotype sam_2_2_3_eval = GenotypeBuilder.create("test2_sample2",Arrays.asList(reference_A,alt_T)); + Genotype sam_2_1_3_truth = GenotypeBuilder.create("test2_sample1",Arrays.asList(alt_T,alt_T)); + Genotype sam_2_2_3_truth = GenotypeBuilder.create("test2_sample2",Arrays.asList(reference_A,alt_T)); + + loc = genomeLocParser.createGenomeLoc("chr1",5,5); + eval_1_builder = new VariantContextBuilder(); + truth_1_builder = new VariantContextBuilder(); + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + eval_1_builder.alleles(Arrays.asList(reference_A,alt_T,alt_G)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_T)); + eval_1_builder.genotypes(Arrays.asList(sam_2_1_3_eval,sam_2_2_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_2_1_3_truth,sam_2_2_3_truth)); + + Pair testDataSite3 = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + // site 4 - + // sample 1: unavailable/het + // sample 2: unavailable/ref + Genotype sam_2_1_4_eval = GenotypeBuilder.create("test2_sample1",new ArrayList(0)); + Genotype sam_2_2_4_eval = GenotypeBuilder.create("test2_sample2",new ArrayList(0)); + Genotype sam_2_1_4_truth = GenotypeBuilder.create("test2_sample1",Arrays.asList(reference_A,alt_T)); + Genotype sam_2_2_4_truth = GenotypeBuilder.create("test2_sample2",Arrays.asList(reference_A,reference_A)); + + loc = genomeLocParser.createGenomeLoc("chr1",6,6); + eval_1_builder = new VariantContextBuilder(); + truth_1_builder = new VariantContextBuilder(); + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + eval_1_builder.alleles(Arrays.asList(reference_A,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_T)); + eval_1_builder.genotypes(Arrays.asList(sam_2_1_4_eval,sam_2_2_4_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_2_1_4_truth,sam_2_2_4_truth)); + + Pair testDataSite4 = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + // site 5 - + // sample 1: hom-var/no-call + // sample 2: het/het + Genotype sam_2_1_5_eval = GenotypeBuilder.create("test2_sample1",Arrays.asList(alt_C,alt_C)); + Genotype sam_2_2_5_eval = GenotypeBuilder.create("test2_sample2",Arrays.asList(reference_A,alt_C)); + Genotype sam_2_1_5_truth = GenotypeBuilder.create("test2_sample1",Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); + Genotype sam_2_2_5_truth = GenotypeBuilder.create("test2_sample2",Arrays.asList(reference_A,alt_C)); + + loc = genomeLocParser.createGenomeLoc("chr1",7,7); + eval_1_builder = new VariantContextBuilder(); + truth_1_builder = new VariantContextBuilder(); + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_2_1_5_eval,sam_2_2_5_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_2_1_5_truth,sam_2_2_5_truth)); + + Pair testDataSite5 = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return Arrays.asList(testDataSite1,testDataSite2,testDataSite3,testDataSite4,testDataSite5); + } + @Test(enabled=true) - public void testNRD_testNRS() { + public void testMultiSite() { + int[][] sample1_expected = new int[GenotypeType.values().length][GenotypeType.values().length]; + int[][] sample2_expected = new int[GenotypeType.values().length][GenotypeType.values().length]; + // order: no-call,ref,het,hom-var,unavailable,mixed + sample1_expected[0] = new int[]{0,1,0,0,0,0}; + sample2_expected[0] = new int[]{0,0,0,0,0,0}; + sample1_expected[1] = new int[]{0,1,0,0,0,0}; + sample2_expected[1] = new int[]{0,0,0,0,0,0}; + sample1_expected[2] = new int[]{0,0,0,0,0,0}; + sample2_expected[2] = new int[]{0,1,2,0,0,0}; + sample1_expected[3] = new int[]{1,0,0,0,0,0}; + sample2_expected[3] = new int[]{0,0,0,1,0,0}; + sample1_expected[4] = new int[]{0,0,1,0,0,0}; + sample2_expected[4] = new int[]{0,1,0,0,0,0}; + + List> data = getData6(); + + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + + for ( Pair contextPair : data ) { + VariantContext eval = contextPair.getFirst(); + VariantContext comp = contextPair.getSecond(); + logger.warn(eval.toString()); + logger.warn(comp.toString()); + Assert.assertTrue(eval != null); + Assert.assertTrue(comp != null); + Assert.assertTrue(eval.getGenotype("test2_sample1") != null); + Assert.assertTrue(comp.getGenotype("test2_sample1") != null); + Assert.assertTrue(eval.getGenotype("test2_sample2") != null); + Assert.assertTrue(comp.getGenotype("test2_sample2") != null); + metrics.update(eval,comp); + } + + int[][] sample1_observed = metrics.getGenotypeConcordance("test2_sample1").getTable(); + int[][] sample2_observed = metrics.getGenotypeConcordance("test2_sample2").getTable(); + for ( GenotypeType eType : GenotypeType.values() ) { + for ( GenotypeType cType : GenotypeType.values() ) { + Assert.assertEquals(sample1_expected[eType.ordinal()][cType.ordinal()],sample1_observed[eType.ordinal()][cType.ordinal()]); + Assert.assertEquals(sample2_expected[eType.ordinal()][cType.ordinal()],sample2_observed[eType.ordinal()][cType.ordinal()]); + } + } + } + + @Test(enabled=true) + public void testNRD_testNRS_testMargins() { Pair data = getData3(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); @@ -340,5 +508,171 @@ public class ConcordanceMetricsUnitTest extends BaseTest { double EXPEC_NRD = 0.1071429; Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7); Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7); + int EXPEC_EVAL_REF = 124; + int EXPEC_EVAL_HET = 169; + int EXPEC_EVAL_VAR = 62; + int EXPEC_COMP_REF = 127; + int EXPEC_COMP_HET = 205; + int EXPEC_COMP_VAR = 82; + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getnEvalGenotypes(GenotypeType.HOM_REF),EXPEC_EVAL_REF); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getnEvalGenotypes(GenotypeType.HET),EXPEC_EVAL_HET); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getnEvalGenotypes(GenotypeType.HOM_VAR),EXPEC_EVAL_VAR); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getnCompGenotypes(GenotypeType.HOM_REF),EXPEC_COMP_REF); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getnCompGenotypes(GenotypeType.HET),EXPEC_COMP_HET); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getnCompGenotypes(GenotypeType.HOM_VAR),EXPEC_COMP_VAR); + } + + @Test(enabled=true) + public void testRobustness() { + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1)))); + VCFHeader disjointCompHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2)))); + VCFHeader overlapCompHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3)))); + ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader); + ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader); + + // test what happens if you put in disjoint sets and start making requests + Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size()); + String msg = "No Exception Thrown"; + try { + disjointMetrics.getGenotypeConcordance("test3_sample4"); + } catch ( Exception e) { + msg = e.getMessage(); + } + Assert.assertEquals("Attempted to request the concordance table for sample test3_sample4 on which it was not calculated",msg); + + // test that the overlapping sample is in the overlapping table (basically do this without throwing an exception) + overlapMetrics.getGenotypeConcordance("test3_sample3"); + + String msg2 = "No Exception Thrown"; + try { + disjointMetrics.getGenotypeConcordance("test3_sample4"); + } catch ( Exception e) { + msg2 = e.getMessage(); + } + Assert.assertEquals("Attempted to request the concordance table for sample test3_sample4 on which it was not calculated",msg2); + + // test what happens if you try to calculate NRS and NRD on an empty table + Assert.assertEquals(disjointMetrics.getOverallNRD(), 1.0, 1e-16); + Assert.assertEquals(disjointMetrics.getOverallNRS(), 0.0, 1e-16); + } + + public List> getData7() { + + Allele ref1 = Allele.create(BaseUtils.T,true); + Allele alt1 = Allele.create(BaseUtils.C); + Allele alt2 = Allele.create(BaseUtils.G); + Allele alt3 = Allele.create(BaseUtils.A); + + GenomeLoc loc1 = genomeLocParser.createGenomeLoc("chr1",1,1); + VariantContextBuilder site1Eval = new VariantContextBuilder(); + VariantContextBuilder site1Comp = new VariantContextBuilder(); + + + // site 1: eval superset comp + site1Eval.loc(loc1.getContig(),loc1.getStart(),loc1.getStop()); + site1Comp.loc(loc1.getContig(),loc1.getStart(),loc1.getStop()); + site1Eval.alleles(Arrays.asList(ref1,alt1,alt2)); + site1Comp.alleles(Arrays.asList(ref1,alt2)); + site1Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt2))); + site1Comp.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt2)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt2))); + + // site 2: eval subset comp + GenomeLoc loc2 = genomeLocParser.createGenomeLoc("chr1",2,2); + VariantContextBuilder site2Eval = new VariantContextBuilder(); + VariantContextBuilder site2Comp = new VariantContextBuilder(); + site2Eval.loc(loc2.getContig(),loc2.getStart(),loc2.getStop()); + site2Comp.loc(loc2.getContig(),loc2.getStart(),loc2.getStop()); + site2Eval.alleles(Arrays.asList(ref1,alt1)); + site2Comp.alleles(Arrays.asList(ref1,alt1,alt3)); + site2Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt1))); + site2Comp.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt3)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt1))); + + // site 3: eval only + GenomeLoc loc3 = genomeLocParser.createGenomeLoc("chr1",3,3); + VariantContextBuilder site3Eval = new VariantContextBuilder(); + VariantContextBuilder site3Comp = new VariantContextBuilder(); + site3Eval.loc(loc3.getContig(),loc3.getStart(),loc3.getStop()); + site3Comp.loc(loc3.getContig(),loc3.getStart(),loc3.getStop()); + site3Eval.alleles(Arrays.asList(ref1,alt1)); + site3Comp.alleles(Arrays.asList(ref1,alt1)); + site3Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt1))); + site3Comp.genotypes(GenotypeBuilder.create("test2_sample1",new ArrayList(0)),GenotypeBuilder.create("test2_sample2",new ArrayList(0))); + + // site 4: comp only - monomorphic + GenomeLoc loc4 = genomeLocParser.createGenomeLoc("chr1",4,4); + VariantContextBuilder site4Eval = new VariantContextBuilder(); + VariantContextBuilder site4Comp = new VariantContextBuilder(); + site4Eval.loc(loc4.getContig(),loc4.getStart(),loc4.getStop()); + site4Comp.loc(loc4.getContig(),loc4.getStart(),loc4.getStop()); + site4Eval.alleles(Arrays.asList(ref1,alt1)); + site4Comp.alleles(Arrays.asList(ref1,alt1)); + site4Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,ref1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,ref1))); + site4Comp.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt1))); + + // site 5: overlapping + GenomeLoc loc5 = genomeLocParser.createGenomeLoc("chr1",5,5); + VariantContextBuilder site5Eval = new VariantContextBuilder(); + VariantContextBuilder site5Comp = new VariantContextBuilder(); + site5Eval.loc(loc5.getContig(),loc5.getStart(),loc5.getStop()); + site5Comp.loc(loc5.getContig(),loc5.getStart(),loc5.getStop()); + site5Eval.alleles(Arrays.asList(ref1,alt1,alt3)); + site5Comp.alleles(Arrays.asList(ref1,alt1,alt3)); + site5Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(alt1,alt3))); + site5Comp.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(alt1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(alt3,alt3))); + + // site 6: some non-matching alts + GenomeLoc loc6 = genomeLocParser.createGenomeLoc("chr1",6,6); + VariantContextBuilder site6Eval = new VariantContextBuilder(); + VariantContextBuilder site6Comp = new VariantContextBuilder(); + site6Eval.loc(loc6.getContig(),loc6.getStart(),loc6.getStop()); + site6Comp.loc(loc6.getContig(),loc6.getStart(),loc6.getStop()); + site6Eval.alleles(Arrays.asList(ref1,alt1,alt2)); + site6Comp.alleles(Arrays.asList(ref1,alt1,alt3)); + site6Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt2))); + site6Comp.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt3))); + + // site 7: matching with no-calls + GenomeLoc loc7 = genomeLocParser.createGenomeLoc("chr1",7,7); + VariantContextBuilder site7Eval = new VariantContextBuilder(); + VariantContextBuilder site7Comp = new VariantContextBuilder(); + site7Eval.loc(loc7.getContig(),loc7.getStart(),loc7.getStop()); + site7Comp.loc(loc7.getContig(),loc7.getStart(),loc7.getStop()); + site7Eval.alleles(Arrays.asList(ref1,alt1)); + site7Comp.alleles(Arrays.asList(ref1,alt1)); + site7Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(Allele.NO_CALL,Allele.NO_CALL))); + site7Comp.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(ref1,alt1)),GenotypeBuilder.create("test2_sample2",Arrays.asList(ref1,alt1))); + + Pair site1 = new Pair(site1Eval.make(),site1Comp.make()); + Pair site2 = new Pair(site2Eval.make(),site2Comp.make()); + Pair site3 = new Pair(site3Eval.make(),site3Comp.make()); + Pair site4 = new Pair(site4Eval.make(),site4Comp.make()); + Pair site5 = new Pair(site5Eval.make(),site5Comp.make()); + Pair site6 = new Pair(site6Eval.make(),site6Comp.make()); + Pair site7 = new Pair(site7Eval.make(),site7Comp.make()); + + return Arrays.asList(site1,site2,site3,site4,site5,site6,site7); + } + + @Test(enabled = true) + public void testSites() { + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + + List> data = getData7(); + + for ( Pair varPair : data ) { + metrics.update(varPair.getFirst(),varPair.getSecond()); + } + + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),1); + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH),2); + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_ONLY),1); + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.TRUTH_ONLY),1); + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_SUBSET_TRUTH),1); + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH),1); + } } \ No newline at end of file