diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 5b1a1e236..517cb0df0 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -35,10 +35,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.variant.vcf.VCFConstants; import org.broadinstitute.variant.vcf.VCFHeaderLine; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; -import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; import org.broadinstitute.variant.variantcontext.VariantContext; import org.broadinstitute.variant.variantcontext.VariantContextUtils; @@ -52,12 +50,6 @@ import java.util.*; */ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { - public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; - public static final VCFInfoHeaderLine[] descriptions = { - VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY), - VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY), - VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) }; - private Set founderIds = new HashSet(); public Map annotate(final RefMetaDataTracker tracker, @@ -78,8 +70,8 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn } public List getKeyNames() { - return Arrays.asList(keyNames); + return Arrays.asList(ChromosomeCountConstants.keyNames); } - public List getDescriptions() { return Arrays.asList(descriptions); } + public List getDescriptions() { return Arrays.asList(ChromosomeCountConstants.descriptions); } } \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java new file mode 100644 index 000000000..f71bc4a01 --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -0,0 +1,261 @@ +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Invariant; +import com.google.java.contract.Requires; +import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.VCFHeader; + +import java.util.*; + +/** + * A class for tabulating and evaluating a callset-by-callset genotype concordance table + * */ +public class ConcordanceMetrics { + + private Map perSampleGenotypeConcordance; + private GenotypeConcordanceTable overallGenotypeConcordance; + private SiteConcordanceTable overallSiteConcordance; + + public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) { + HashSet overlappingSamples = new HashSet(evaluate.getGenotypeSamples()); + overlappingSamples.retainAll(truth.getGenotypeSamples()); + perSampleGenotypeConcordance = new HashMap(overlappingSamples.size()); + for ( String sample : overlappingSamples ) { + perSampleGenotypeConcordance.put(sample,new GenotypeConcordanceTable()); + } + overallGenotypeConcordance = new GenotypeConcordanceTable(); + overallSiteConcordance = new SiteConcordanceTable(); + } + + public GenotypeConcordanceTable getOverallGenotypeConcordance() { + return overallGenotypeConcordance; + } + + public SiteConcordanceTable getOverallSiteConcordance() { + return overallSiteConcordance; + } + + public GenotypeConcordanceTable getGenotypeConcordance(String sample) { + GenotypeConcordanceTable table = perSampleGenotypeConcordance.get(sample); + if ( table == null ) + throw new ReviewedStingException("Attempted to request the concordance table for a sample on which it was not calculated"); + return table; + } + + public Map getPerSampleGenotypeConcordance() { + return Collections.unmodifiableMap(perSampleGenotypeConcordance); + } + + public Map getPerSampleNRD() { + Map nrd = new HashMap(perSampleGenotypeConcordance.size()); + for ( Map.Entry sampleTable : perSampleGenotypeConcordance.entrySet() ) { + nrd.put(sampleTable.getKey(),calculateNRD(sampleTable.getValue())); + } + + return Collections.unmodifiableMap(nrd); + } + + public Double getOverallNRD() { + return calculateNRD(overallGenotypeConcordance); + } + + public Map getPerSampleNRS() { + Map nrs = new HashMap(perSampleGenotypeConcordance.size()); + for ( Map.Entry sampleTable : perSampleGenotypeConcordance.entrySet() ) { + nrs.put(sampleTable.getKey(),calculateNRS(sampleTable.getValue())); + } + + return Collections.unmodifiableMap(nrs); + } + + public Double getOverallNRS() { + return calculateNRS(overallGenotypeConcordance); + } + + @Requires({"eval != null","truth != null"}) + public void update(VariantContext eval, VariantContext truth) { + overallSiteConcordance.update(eval,truth); + Set alleleTruth = new HashSet(8); + alleleTruth.add(truth.getReference().getBaseString()); + for ( Allele a : truth.getAlternateAlleles() ) { + alleleTruth.add(a.getBaseString()); + } + for ( String sample : perSampleGenotypeConcordance.keySet() ) { + Genotype evalGenotype = eval.getGenotype(sample); + Genotype truthGenotype = truth.getGenotype(sample); + perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth); + overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth); + } + } + + private static double calculateNRD(GenotypeConcordanceTable table) { + return calculateNRD(table.getTable()); + } + + private static double calculateNRD(int[][] concordanceCounts) { + int correct = 0; + int total = 0; + correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()]; + correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += correct; + total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()]; + total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; + total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; + // NRD is by definition incorrec/total = 1.0-correct/total + return 1.0 - ( (double) correct)/( (double) total); + } + + private static double calculateNRS(GenotypeConcordanceTable table) { + return calculateNRS(table.getTable()); + } + + private static double calculateNRS(int[][] concordanceCounts) { + long confirmedVariant = 0; + long unconfirmedVariant = 0; + for ( GenotypeType truthState : Arrays.asList(GenotypeType.HET,GenotypeType.HOM_VAR) ) { + for ( GenotypeType evalState : GenotypeType.values() ) { + if ( evalState == GenotypeType.MIXED ) + continue; + if ( evalState.equals(GenotypeType.HET) || evalState.equals(GenotypeType.HOM_VAR) ) + confirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()]; + else + unconfirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()]; + } + } + + return ( (double) confirmedVariant ) / ( (double) ( confirmedVariant + unconfirmedVariant ) ); + } + + + class GenotypeConcordanceTable { + + private int[][] genotypeCounts; + private int nMismatchingAlt; + + public GenotypeConcordanceTable() { + genotypeCounts = new int[GenotypeType.values().length][GenotypeType.values().length]; + nMismatchingAlt = 0; + } + + @Requires({"eval!=null","truth != null","truthAlleles != null"}) + public void update(Genotype eval, Genotype truth, Set truthAlleles) { + // this is slow but correct + boolean matchingAlt = true; + if ( eval.isCalled() && truth.isCalled() ) { + // by default, no-calls "match" between alleles, so if + // one or both sites are no-call or unavailable, the alt alleles match + // otherwise, check explicitly: if the eval has an allele that's not ref, no-call, or present in truth + // the alt allele is mismatching - regardless of whether the genotype is correct. + for ( Allele evalAllele : eval.getAlleles() ) { + matchingAlt &= truthAlleles.contains(evalAllele.getBaseString()); + } + } + + if ( matchingAlt ) { + genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++; + } else { + nMismatchingAlt++; + } + } + + public int[][] getTable() { + return genotypeCounts; + } + + public int getnMismatchingAlt() { + return nMismatchingAlt; + } + + public int getnEvalGenotypes(GenotypeType type) { + int nGeno = 0; + for ( GenotypeType comptype : GenotypeType.values() ) + nGeno += genotypeCounts[type.ordinal()][comptype.ordinal()]; + return nGeno; + } + + public int getnCalledEvalGenotypes() { + int nGeno = 0; + for ( GenotypeType evalType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) { + nGeno += getnEvalGenotypes(evalType); + } + + return nGeno + nMismatchingAlt; + } + + public int getnCompGenotypes(GenotypeType type) { + int nGeno = 0; + for ( GenotypeType evaltype : GenotypeType.values() ) + nGeno += genotypeCounts[evaltype.ordinal()][type.ordinal()]; + return nGeno; + } + + public int getnCalledCompGenotypes() { + int nGeno = 0; + for ( GenotypeType compType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) { + nGeno += getnCompGenotypes(compType); + } + return nGeno; + } + + public int get(GenotypeType evalType, GenotypeType compType) { + return genotypeCounts[evalType.ordinal()][compType.ordinal()]; + } + } + + class SiteConcordanceTable { + + private int[] siteConcordance; + + public SiteConcordanceTable() { + siteConcordance = new int[SiteConcordanceType.values().length]; + } + + public void update(VariantContext evalVC, VariantContext truthVC) { + SiteConcordanceType matchType = getMatchType(evalVC,truthVC); + siteConcordance[matchType.ordinal()]++; + } + + @Requires({"evalVC != null","truthVC != null"}) + private SiteConcordanceType getMatchType(VariantContext evalVC, VariantContext truthVC) { + if ( evalVC.isMonomorphicInSamples() ) + return SiteConcordanceType.TRUTH_ONLY; + if ( truthVC.isMonomorphicInSamples() ) + return SiteConcordanceType.EVAL_ONLY; + + boolean evalSusbsetTruth = VariantContextUtils.allelesAreSubset(evalVC,truthVC); + boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truthVC,evalVC); + + if ( evalSusbsetTruth && truthSubsetEval ) + return SiteConcordanceType.ALLELES_MATCH; + else if ( evalSusbsetTruth ) + return SiteConcordanceType.EVAL_SUBSET_TRUTH; + else if ( truthSubsetEval ) + return SiteConcordanceType.EVAL_SUPERSET_TRUTH; + + return SiteConcordanceType.ALLELES_DO_NOT_MATCH; + } + + public int[] getSiteConcordance() { + return siteConcordance; + } + + public int get(SiteConcordanceType type) { + return getSiteConcordance()[type.ordinal()]; + } + } + + enum SiteConcordanceType { + ALLELES_MATCH, + EVAL_SUPERSET_TRUTH, + EVAL_SUBSET_TRUTH, + ALLELES_DO_NOT_MATCH, + EVAL_ONLY, + TRUTH_ONLY + } +} diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java new file mode 100644 index 000000000..de7b14ddb --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -0,0 +1,213 @@ +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.report.GATKReport; +import org.broadinstitute.sting.gatk.report.GATKReportTable; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.vcf.VCFHeader; + +import java.io.PrintStream; +import java.util.*; + +/** + * A simple walker for performing genotype concordance calculations between two callsets + */ +public class GenotypeConcordance extends RodWalker,ConcordanceMetrics> { + + @Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true) + RodBinding evalBinding; + + @Input(fullName="comp",shortName="comp",doc="The variants and genotypes to compare against",required=true) + RodBinding compBinding; + + @Argument(fullName="ignoreFilters",doc="Filters will be ignored",required=false) + boolean ignoreFilters = false; + + @Output + PrintStream out; + + List evalSamples; + List compSamples; + + // todo -- integration test coverage + // todo -- deal with occurrences like: + // Eval: 20 4000 A C + // Eval: 20 4000 A AC + // Comp: 20 4000 A C + // currently this results in a warning and skipping + // todo -- extend to multiple eval, multiple comp + // todo -- table with "proportion of overlapping sites" (not just eval/comp margins) + + + public ConcordanceMetrics reduceInit() { + Map headerMap = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(evalBinding,compBinding)); + VCFHeader evalHeader = headerMap.get(evalBinding.getName()); + evalSamples = evalHeader.getGenotypeSamples(); + VCFHeader compHeader = headerMap.get(compBinding.getName()); + compSamples = compHeader.getGenotypeSamples(); + return new ConcordanceMetrics(evalHeader,compHeader); + } + + + public Pair map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + Pair evalCompPair = null; + if ( tracker != null && ( + tracker.getValues(evalBinding,ref.getLocus()).size() > 0 || + tracker.getValues(compBinding,ref.getLocus()).size() > 0 ) ) { + + List eval = tracker.getValues(evalBinding,ref.getLocus()); + List comp = tracker.getValues(compBinding,ref.getLocus()); + if ( eval.size() > 1 || comp.size() > 1 ) { + logger.warn("Eval or Comp Rod at position "+ref.getLocus().toString()+" has multiple records. Site will be skipped."); + return evalCompPair; + } + // if a rod is missing, explicitly create a variant context with 'missing' genotypes. Slow, but correct. + // note that if there is no eval rod there must be a comp rod, and also the reverse + VariantContext evalContext = eval.size() == 1 ? eval.get(0) : createEmptyContext(ref,comp.get(0),evalSamples); + VariantContext compContext = comp.size() == 1 ? comp.get(0) : createEmptyContext(ref,eval.get(0),compSamples); + evalContext = filterGenotypes(evalContext,ignoreFilters); + compContext = filterGenotypes(compContext,ignoreFilters); + evalCompPair = new Pair(evalContext,compContext); + } + + return evalCompPair; + } + + public ConcordanceMetrics reduce(Pair evalComp, ConcordanceMetrics metrics) { + if ( evalComp != null ) + metrics.update(evalComp.getFirst(),evalComp.getSecond()); + return metrics; + } + + public void onTraversalDone(ConcordanceMetrics metrics) { + GATKReport report = new GATKReport(); + GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length); + GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length); + GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length); + GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2); + GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length); + concordanceCompProportions.addColumn("Sample","%s"); + concordanceCounts.addColumn("Sample","%s"); + concordanceEvalProportions.addColumn("Sample","%s"); + concordanceSummary.addColumn("Sample","%s"); + for ( GenotypeType evalType : GenotypeType.values() ) { + for ( GenotypeType compType : GenotypeType.values() ) { + String colKey = String.format("%s_%s", evalType.toString(), compType.toString()); + concordanceCounts.addColumn(colKey,"%d"); + if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) + concordanceEvalProportions.addColumn(colKey,"%.3f"); + if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF ) + concordanceCompProportions.addColumn(colKey,"%.3f"); + } + } + concordanceEvalProportions.addColumn("Mismatching_Alleles","%.3f"); + concordanceCompProportions.addColumn("Mismatching_Alleles","%.3f"); + concordanceCounts.addColumn("Mismatching_Alleles","%d"); + concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f"); + concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f"); + for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { + siteConcordance.addColumn(type.toString(),"%d"); + } + + for ( Map.Entry entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) { + ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue(); + concordanceEvalProportions.set(entry.getKey(),"Sample",entry.getKey()); + concordanceCompProportions.set(entry.getKey(),"Sample",entry.getKey()); + concordanceCounts.set(entry.getKey(),"Sample",entry.getKey()); + for ( GenotypeType evalType : GenotypeType.values() ) { + for ( GenotypeType compType : GenotypeType.values() ) { + String colKey = String.format("%s_%s",evalType.toString(),compType.toString()); + int count = table.get(evalType, compType); + concordanceCounts.set(entry.getKey(),colKey,count); + if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) + concordanceEvalProportions.set(entry.getKey(),colKey,( (double) count)/table.getnEvalGenotypes(evalType)); + if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF ) + concordanceCompProportions.set(entry.getKey(),colKey,( (double) count)/table.getnCompGenotypes(compType)); + } + } + concordanceEvalProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledEvalGenotypes()); + concordanceCompProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledCompGenotypes()); + concordanceCounts.set(entry.getKey(),"Mismatching_Alleles",table.getnMismatchingAlt()); + } + + String rowKey = "ALL"; + concordanceCompProportions.set(rowKey,"Sample",rowKey); + concordanceEvalProportions.set(rowKey,"Sample",rowKey); + concordanceCounts.set(rowKey,"Sample",rowKey); + ConcordanceMetrics.GenotypeConcordanceTable table = metrics.getOverallGenotypeConcordance(); + for ( GenotypeType evalType : GenotypeType.values() ) { + for ( GenotypeType compType : GenotypeType.values() ) { + String colKey = String.format("%s_%s",evalType.toString(),compType.toString()); + int count = table.get(evalType,compType); + concordanceCounts.set(rowKey,colKey,count); + if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) + concordanceEvalProportions.set(rowKey,colKey,( (double) count)/table.getnEvalGenotypes(evalType)); + if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF ) + concordanceCompProportions.set(rowKey,colKey,( (double) count)/table.getnCompGenotypes(compType)); + } + } + concordanceEvalProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledEvalGenotypes()); + concordanceCompProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledCompGenotypes()); + concordanceCounts.set(rowKey,"Mismatching_Alleles",table.getnMismatchingAlt()); + + for ( Map.Entry nrsEntry : metrics.getPerSampleNRS().entrySet() ) { + concordanceSummary.set(nrsEntry.getKey(),"Sample",nrsEntry.getKey()); + concordanceSummary.set(nrsEntry.getKey(),"Non-Reference Sensitivity",nrsEntry.getValue()); + } + for ( Map.Entry nrdEntry : metrics.getPerSampleNRD().entrySet() ) { + concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue()); + } + concordanceSummary.set("ALL","Sample","ALL"); + concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS()); + concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD()); + + for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) { + siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type)); + } + + report.addTable(concordanceCompProportions); + report.addTable(concordanceEvalProportions); + report.addTable(concordanceCounts); + report.addTable(concordanceSummary); + report.addTable(siteConcordance); + + report.print(out); + } + + public VariantContext createEmptyContext(ReferenceContext ref, VariantContext other, List samples) { + VariantContextBuilder builder = new VariantContextBuilder(); + // set the alleles to be the same + builder.alleles(other.getAlleles()); + builder.loc(other.getChr(),other.getStart(),other.getEnd()); + // set all genotypes to empty + List genotypes = new ArrayList(samples.size()); + for ( String sample : samples ) + genotypes.add(GenotypeBuilder.create(sample, new ArrayList(0))); + builder.genotypes(genotypes); + return builder.make(); + } + + public VariantContext filterGenotypes(VariantContext context, boolean ignoreSiteFilter) { + // placeholder method for genotype-level filtering. However if the site itself is filtered, + // and such filters are not ignored, the genotype-level data should be altered to reflect this + if ( ! context.isFiltered() || ignoreSiteFilter ) { + // todo -- add genotype-level jexl filtering here + return context; + } + VariantContextBuilder builder = new VariantContextBuilder(); + builder.alleles(Arrays.asList(context.getReference())); + builder.loc(context.getChr(),context.getStart(),context.getEnd()); + List newGeno = new ArrayList(context.getNSamples()); + for ( Genotype g : context.getGenotypes().iterateInSampleNameOrder() ) { + newGeno.add(GenotypeBuilder.create(g.getSampleName(),new ArrayList())); + } + builder.genotypes(newGeno); + return builder.make(); + } +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index cffd405b1..dd92961cf 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; @@ -423,7 +424,7 @@ public class SelectVariants extends RodWalker implements TreeR headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF")); headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN")); } - headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions)); + headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions)); headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY)); for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/DiagnoseTargetsIntegrationTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/LocusStatisticsUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/SampleStatisticsUnitTest.java diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java new file mode 100644 index 000000000..50d71c532 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetricsUnitTest.java @@ -0,0 +1,344 @@ +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.variant.utils.BaseUtils; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypeBuilder; +import org.broadinstitute.variant.variantcontext.GenotypeType; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.variantcontext.VariantContextBuilder; +import org.broadinstitute.variant.vcf.VCFCodec; +import org.broadinstitute.variant.vcf.VCFHeader; +import org.testng.annotations.Test; +import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.PositionalBufferedStream; +import org.broadinstitute.variant.vcf.*; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.StringBufferInputStream; +import java.util.ArrayList; +import java.util.Set; +import java.util.Arrays; +import java.util.List; +import net.sf.picard.reference.ReferenceSequenceFile; + +public class ConcordanceMetricsUnitTest extends BaseTest { + + // todo -- coverage for several sites (3,4) + // todo -- coverage for calculation of table margins + // todo -- coverage for site concordance + // todo -- coverage for disjoint and mostly-disjoint sample sets + + private static ReferenceSequenceFile seq; + private GenomeLocParser genomeLocParser; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); + } + public static String HEADER_BASE = "##fileformat=VCFv4.0\n" + + "##filedate=2010-06-21\n"+ + "##reference=NCBI36\n"+ + "##INFO=\n"+ + "##INFO=\n"+ + "##INFO=\n"+ + "##FILTER=\n"+ + "##FORMAT=\n"+ + "##FORMAT=\n"+ + "##FORMAT=\n" + + "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t"; + public static String TEST_1_HEADER = HEADER_BASE + "test1_sample1\ttest1_sample2\ttest1_sample3\n"; + + + private Pair getData1() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testSimpleComparison() { + Pair data = getData1(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); + Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1); + } + + private Pair getData2() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + Allele alt_T = Allele.create(BaseUtils.T); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_T)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testMismatchingAllele() { + Pair data = getData2(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); + Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],0); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0); + } + + private Pair getData3() { + + Allele reference_ACT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C,BaseUtils.T},true); + Allele alt_AC = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C}); + Allele alt_A = Allele.create(BaseUtils.A); + Allele alt_ATT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.T,BaseUtils.T}); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_ATT)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_AC)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 5); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_ACT,alt_ATT,alt_A)); + truth_1_builder.alleles(Arrays.asList(reference_ACT,alt_AC,alt_A)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testComplex() { + Pair data = getData3(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[3][3],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[1][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][2],1); + Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[3][3],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],0); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],1); + Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0); + } + + private Pair getData4() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + Allele alt_T = Allele.create(BaseUtils.T); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testNoCalls() { + Pair data = getData4(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],1); + } + + private Pair getData5() { + + Allele reference_A = Allele.create(BaseUtils.A,true); + Allele alt_C = Allele.create(BaseUtils.C); + Allele alt_T = Allele.create(BaseUtils.T); + + Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", new ArrayList(0)); + Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); + + Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); + Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); + Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", new ArrayList(0)); + + GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); + VariantContextBuilder eval_1_builder = new VariantContextBuilder(); + VariantContextBuilder truth_1_builder = new VariantContextBuilder(); + + eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); + truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); + eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); + truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); + + eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); + + Pair testData = new Pair(eval_1_builder.make(),truth_1_builder.make()); + + return testData; + } + + @Test(enabled=true) + public void testMissing() { + Pair data = getData5(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + metrics.update(eval,truth); + Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0); + Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1); + } + + @Test(enabled=true) + public void testNRD_testNRS() { + Pair data = getData3(); + VariantContext eval = data.getFirst(); + VariantContext truth = data.getSecond(); + VCFCodec codec = new VCFCodec(); + VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); + ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader); + int[][] table = metrics.getOverallGenotypeConcordance().getTable(); + // set up the table + table[0] = new int[] {30, 12, 7, 5, 6, 0}; + table[1] = new int[] {10, 100, 5, 1, 7, 1}; + table[2] = new int[] {5, 7, 150, 3, 3, 1}; + table[3] = new int[] {3, 2, 6, 50, 1, 0}; + table[4] = new int[] {10, 6, 3, 3, 2, 0}; + table[5] = new int[] {12, 0, 34, 20, 10, 0}; + double EXPEC_NRS = 0.8969957; + double EXPEC_NRD = 0.1071429; + Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7); + Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7); + } +} \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/ContextCovariateUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/QualQuantizerUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadCovariatesUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/ReadGroupCovariateUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalDatumUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalUtilsUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationReportUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTablesUnitTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java b/protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java similarity index 100% rename from public/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java rename to protected/java/test/org/broadinstitute/sting/utils/recalibration/RecalibrationTestUtils.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java new file mode 100755 index 000000000..26f3bb8d9 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCountConstants.java @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.variant.vcf.VCFConstants; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.variant.vcf.VCFStandardHeaderLines; + + +/** + * Keys and descriptions for the common chromosome count annotations + */ +public class ChromosomeCountConstants { + + public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; + + public static final VCFInfoHeaderLine[] descriptions = { + VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY), + VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY), + VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) }; +} \ No newline at end of file diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DoCOutputType.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffElement.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffEngine.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffNode.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffObjects.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffValue.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/DiffableReader.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/Difference.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java similarity index 99% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 78b89d615..27b0e5201 100755 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.Window; -import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; +import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.vcf.*; @@ -217,7 +217,7 @@ public class CombineVariants extends RodWalker implements Tree if ( SET_KEY != null ) headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants")); if ( !ASSUME_IDENTICAL_SAMPLES ) - headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions)); + headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions)); VCFHeader vcfHeader = new VCFHeader(headerLines, samples); vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER); vcfWriter.writeHeader(vcfHeader);