diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index bb76006bf..8a87c9957 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -272,22 +272,7 @@ public class ConcordanceMetrics { @Requires({"evalVC != null","truthVC != null"}) private SiteConcordanceType getMatchType(VariantContext evalVC, VariantContext truthVC) { - if ( evalVC.isMonomorphicInSamples() ) - return SiteConcordanceType.TRUTH_ONLY; - if ( truthVC.isMonomorphicInSamples() ) - return SiteConcordanceType.EVAL_ONLY; - - boolean evalSusbsetTruth = VariantContextUtils.allelesAreSubset(evalVC,truthVC); - boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truthVC,evalVC); - - if ( evalSusbsetTruth && truthSubsetEval ) - return SiteConcordanceType.ALLELES_MATCH; - else if ( evalSusbsetTruth ) - return SiteConcordanceType.EVAL_SUBSET_TRUTH; - else if ( truthSubsetEval ) - return SiteConcordanceType.EVAL_SUPERSET_TRUTH; - - return SiteConcordanceType.ALLELES_DO_NOT_MATCH; + return SiteConcordanceType.getConcordanceType(evalVC,truthVC); } public int[] getSiteConcordance() { @@ -305,6 +290,27 @@ public class ConcordanceMetrics { EVAL_SUBSET_TRUTH, ALLELES_DO_NOT_MATCH, EVAL_ONLY, - TRUTH_ONLY + TRUTH_ONLY; + + public static SiteConcordanceType getConcordanceType(VariantContext eval, VariantContext truth) { + if ( eval.isMonomorphicInSamples() ) + return TRUTH_ONLY; + if ( truth.isMonomorphicInSamples() ) + return EVAL_ONLY; + + boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth); + boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(eval,truth); + + if ( evalSubsetTruth && truthSubsetEval ) + return ALLELES_MATCH; + + if ( evalSubsetTruth ) + return EVAL_SUBSET_TRUTH; + + if ( truthSubsetEval ) + return EVAL_SUPERSET_TRUTH; + + return ALLELES_DO_NOT_MATCH; + } } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 07c9a0d77..0cd1882df 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -64,7 +64,7 @@ import java.util.*; /** * A simple walker for performing genotype concordance calculations between two callsets */ -public class GenotypeConcordance extends RodWalker,ConcordanceMetrics> { +public class GenotypeConcordance extends RodWalker>,ConcordanceMetrics> { @Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true) RodBinding evalBinding; @@ -81,7 +81,6 @@ public class GenotypeConcordance extends RodWalker evalSamples; List compSamples; - // todo -- integration test coverage // todo -- deal with occurrences like: // Eval: 20 4000 A C // Eval: 20 4000 A AC @@ -89,6 +88,7 @@ public class GenotypeConcordance extends RodWalker map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - Pair evalCompPair = null; + public List> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + List> evalCompPair = new ArrayList>(3); if ( tracker != null && ( tracker.getValues(evalBinding,ref.getLocus()).size() > 0 || - tracker.getValues(compBinding,ref.getLocus()).size() > 0 ) ) { + tracker.getValues(compBinding,ref.getLocus()).size() > 0 ) ) { List eval = tracker.getValues(evalBinding,ref.getLocus()); List comp = tracker.getValues(compBinding,ref.getLocus()); if ( eval.size() > 1 || comp.size() > 1 ) { - logger.warn("Eval or Comp Rod at position "+ref.getLocus().toString()+" has multiple records. Site will be skipped."); - return evalCompPair; + logger.info("Eval or Comp Rod at position " + ref.getLocus().toString() + " has multiple records. Resolving."); + evalCompPair = resolveMultipleRecords(eval,comp); + } else { + // if a rod is missing, explicitly create a variant context with 'missing' genotypes. Slow, but correct. + // note that if there is no eval rod there must be a comp rod, and also the reverse + VariantContext evalContext = eval.size() == 1 ? eval.get(0) : createEmptyContext(comp.get(0),evalSamples); + VariantContext compContext = comp.size() == 1 ? comp.get(0) : createEmptyContext(eval.get(0),compSamples); + evalContext = filterGenotypes(evalContext,ignoreFilters); + compContext = filterGenotypes(compContext,ignoreFilters); + evalCompPair.add(new Pair(evalContext,compContext)); } - // if a rod is missing, explicitly create a variant context with 'missing' genotypes. Slow, but correct. - // note that if there is no eval rod there must be a comp rod, and also the reverse - VariantContext evalContext = eval.size() == 1 ? eval.get(0) : createEmptyContext(ref,comp.get(0),evalSamples); - VariantContext compContext = comp.size() == 1 ? comp.get(0) : createEmptyContext(ref,eval.get(0),compSamples); - evalContext = filterGenotypes(evalContext,ignoreFilters); - compContext = filterGenotypes(compContext,ignoreFilters); - evalCompPair = new Pair(evalContext,compContext); } return evalCompPair; } - public ConcordanceMetrics reduce(Pair evalComp, ConcordanceMetrics metrics) { - if ( evalComp != null ) + /** + * The point of this method is to match up pairs of evals and comps by their alternate alleles. Basically multiple records could + * exist for a site such as: + * Eval: 20 4000 A C + * Eval: 20 4000 A AC + * Comp: 20 4000 A C + * So for each eval, loop through the comps. If the eval alleles (non-emptily) intersect the comp alleles, pair them up and remove + * that comp records. Continue until we're out of evals or comps. This is n^2, but should rarely actually happen. + * + * The remaining unpaired records get paird with an empty contexts. So in the example above we'd get a list of: + * 1 - (20,4000,A/C | 20,4000,A/C) + * 2 - (20,4000,A/AC | Empty ) + * @param evalList - list of eval variant contexts + * @param compList - list of comp variant contexts + * @return resolved pairs of the input lists + */ + private List> resolveMultipleRecords(List evalList, List compList) { + List> resolvedPairs = new ArrayList>(evalList.size()+compList.size()); // oversized but w/e + List pairedEval = new ArrayList(evalList.size()); + for ( VariantContext eval : evalList ) { + Set evalAlts = new HashSet(eval.getAlternateAlleles()); + VariantContext pairedComp = null; + for ( VariantContext comp : compList ) { + for ( Allele compAlt : comp.getAlternateAlleles() ) { + if ( evalAlts.contains(compAlt) ) { + // matching alt allele, pair these records + pairedComp = comp; + break; + } + } + } + if ( pairedComp != null ) { + compList.remove(pairedComp); + resolvedPairs.add(new Pair(eval,pairedComp)); + pairedEval.add(eval); + if ( compList.size() < 1 ) + break; + } + } + evalList.removeAll(pairedEval); + for ( VariantContext unpairedEval : evalList ) { + resolvedPairs.add(new Pair(unpairedEval,createEmptyContext(unpairedEval,compSamples))); + } + + for ( VariantContext unpairedComp : compList ) { + resolvedPairs.add(new Pair(createEmptyContext(unpairedComp,evalSamples),unpairedComp)); + } + + return resolvedPairs; + } + + public ConcordanceMetrics reduce(List> evalCompList, ConcordanceMetrics metrics) { + for ( Pair evalComp : evalCompList) metrics.update(evalComp.getFirst(),evalComp.getSecond()); return metrics; } @@ -233,7 +285,7 @@ public class GenotypeConcordance extends RodWalker samples) { + public VariantContext createEmptyContext(VariantContext other, List samples) { VariantContextBuilder builder = new VariantContextBuilder(); // set the alleles to be the same builder.alleles(other.getAlleles()); diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java index d4d8d6f8c..113f098e3 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java @@ -60,4 +60,15 @@ public class GenotypeConcordanceIntegrationTest extends WalkerTest { executeTest("test non-overlapping samples", spec); } + + @Test + public void testMultipleRecordsPerSite() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString("GenotypeConcordance.multipleRecordsTest1.eval.vcf","GenotypeConcordance.multipleRecordsTest1.comp.vcf"), + 0, + Arrays.asList("fdf2cac15775c613f596c27247a76570") + ); + + executeTest("test multiple records per site",spec); + } }