Merge branch 'master' of github.com:broadinstitute/gsa-unstable
This commit is contained in:
commit
6089a4bf62
|
|
@ -35,10 +35,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFStandardHeaderLines;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
|
||||
|
||||
|
|
@ -52,12 +50,6 @@ import java.util.*;
|
|||
*/
|
||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
|
||||
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
public static final VCFInfoHeaderLine[] descriptions = {
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY),
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY),
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) };
|
||||
|
||||
private Set<String> founderIds = new HashSet<String>();
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
|
|
@ -78,8 +70,8 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
|||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList(keyNames);
|
||||
return Arrays.asList(ChromosomeCountConstants.keyNames);
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
|
||||
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(ChromosomeCountConstants.descriptions); }
|
||||
}
|
||||
|
|
@ -0,0 +1,261 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A class for tabulating and evaluating a callset-by-callset genotype concordance table
|
||||
* */
|
||||
public class ConcordanceMetrics {
|
||||
|
||||
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
|
||||
private GenotypeConcordanceTable overallGenotypeConcordance;
|
||||
private SiteConcordanceTable overallSiteConcordance;
|
||||
|
||||
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) {
|
||||
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
|
||||
overlappingSamples.retainAll(truth.getGenotypeSamples());
|
||||
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
|
||||
for ( String sample : overlappingSamples ) {
|
||||
perSampleGenotypeConcordance.put(sample,new GenotypeConcordanceTable());
|
||||
}
|
||||
overallGenotypeConcordance = new GenotypeConcordanceTable();
|
||||
overallSiteConcordance = new SiteConcordanceTable();
|
||||
}
|
||||
|
||||
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
|
||||
return overallGenotypeConcordance;
|
||||
}
|
||||
|
||||
public SiteConcordanceTable getOverallSiteConcordance() {
|
||||
return overallSiteConcordance;
|
||||
}
|
||||
|
||||
public GenotypeConcordanceTable getGenotypeConcordance(String sample) {
|
||||
GenotypeConcordanceTable table = perSampleGenotypeConcordance.get(sample);
|
||||
if ( table == null )
|
||||
throw new ReviewedStingException("Attempted to request the concordance table for a sample on which it was not calculated");
|
||||
return table;
|
||||
}
|
||||
|
||||
public Map<String,GenotypeConcordanceTable> getPerSampleGenotypeConcordance() {
|
||||
return Collections.unmodifiableMap(perSampleGenotypeConcordance);
|
||||
}
|
||||
|
||||
public Map<String,Double> getPerSampleNRD() {
|
||||
Map<String,Double> nrd = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||
nrd.put(sampleTable.getKey(),calculateNRD(sampleTable.getValue()));
|
||||
}
|
||||
|
||||
return Collections.unmodifiableMap(nrd);
|
||||
}
|
||||
|
||||
public Double getOverallNRD() {
|
||||
return calculateNRD(overallGenotypeConcordance);
|
||||
}
|
||||
|
||||
public Map<String,Double> getPerSampleNRS() {
|
||||
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
|
||||
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
|
||||
nrs.put(sampleTable.getKey(),calculateNRS(sampleTable.getValue()));
|
||||
}
|
||||
|
||||
return Collections.unmodifiableMap(nrs);
|
||||
}
|
||||
|
||||
public Double getOverallNRS() {
|
||||
return calculateNRS(overallGenotypeConcordance);
|
||||
}
|
||||
|
||||
@Requires({"eval != null","truth != null"})
|
||||
public void update(VariantContext eval, VariantContext truth) {
|
||||
overallSiteConcordance.update(eval,truth);
|
||||
Set<String> alleleTruth = new HashSet<String>(8);
|
||||
alleleTruth.add(truth.getReference().getBaseString());
|
||||
for ( Allele a : truth.getAlternateAlleles() ) {
|
||||
alleleTruth.add(a.getBaseString());
|
||||
}
|
||||
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
|
||||
Genotype evalGenotype = eval.getGenotype(sample);
|
||||
Genotype truthGenotype = truth.getGenotype(sample);
|
||||
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth);
|
||||
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth);
|
||||
}
|
||||
}
|
||||
|
||||
private static double calculateNRD(GenotypeConcordanceTable table) {
|
||||
return calculateNRD(table.getTable());
|
||||
}
|
||||
|
||||
private static double calculateNRD(int[][] concordanceCounts) {
|
||||
int correct = 0;
|
||||
int total = 0;
|
||||
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
|
||||
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += correct;
|
||||
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
|
||||
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
|
||||
// NRD is by definition incorrec/total = 1.0-correct/total
|
||||
return 1.0 - ( (double) correct)/( (double) total);
|
||||
}
|
||||
|
||||
private static double calculateNRS(GenotypeConcordanceTable table) {
|
||||
return calculateNRS(table.getTable());
|
||||
}
|
||||
|
||||
private static double calculateNRS(int[][] concordanceCounts) {
|
||||
long confirmedVariant = 0;
|
||||
long unconfirmedVariant = 0;
|
||||
for ( GenotypeType truthState : Arrays.asList(GenotypeType.HET,GenotypeType.HOM_VAR) ) {
|
||||
for ( GenotypeType evalState : GenotypeType.values() ) {
|
||||
if ( evalState == GenotypeType.MIXED )
|
||||
continue;
|
||||
if ( evalState.equals(GenotypeType.HET) || evalState.equals(GenotypeType.HOM_VAR) )
|
||||
confirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()];
|
||||
else
|
||||
unconfirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()];
|
||||
}
|
||||
}
|
||||
|
||||
return ( (double) confirmedVariant ) / ( (double) ( confirmedVariant + unconfirmedVariant ) );
|
||||
}
|
||||
|
||||
|
||||
class GenotypeConcordanceTable {
|
||||
|
||||
private int[][] genotypeCounts;
|
||||
private int nMismatchingAlt;
|
||||
|
||||
public GenotypeConcordanceTable() {
|
||||
genotypeCounts = new int[GenotypeType.values().length][GenotypeType.values().length];
|
||||
nMismatchingAlt = 0;
|
||||
}
|
||||
|
||||
@Requires({"eval!=null","truth != null","truthAlleles != null"})
|
||||
public void update(Genotype eval, Genotype truth, Set<String> truthAlleles) {
|
||||
// this is slow but correct
|
||||
boolean matchingAlt = true;
|
||||
if ( eval.isCalled() && truth.isCalled() ) {
|
||||
// by default, no-calls "match" between alleles, so if
|
||||
// one or both sites are no-call or unavailable, the alt alleles match
|
||||
// otherwise, check explicitly: if the eval has an allele that's not ref, no-call, or present in truth
|
||||
// the alt allele is mismatching - regardless of whether the genotype is correct.
|
||||
for ( Allele evalAllele : eval.getAlleles() ) {
|
||||
matchingAlt &= truthAlleles.contains(evalAllele.getBaseString());
|
||||
}
|
||||
}
|
||||
|
||||
if ( matchingAlt ) {
|
||||
genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++;
|
||||
} else {
|
||||
nMismatchingAlt++;
|
||||
}
|
||||
}
|
||||
|
||||
public int[][] getTable() {
|
||||
return genotypeCounts;
|
||||
}
|
||||
|
||||
public int getnMismatchingAlt() {
|
||||
return nMismatchingAlt;
|
||||
}
|
||||
|
||||
public int getnEvalGenotypes(GenotypeType type) {
|
||||
int nGeno = 0;
|
||||
for ( GenotypeType comptype : GenotypeType.values() )
|
||||
nGeno += genotypeCounts[type.ordinal()][comptype.ordinal()];
|
||||
return nGeno;
|
||||
}
|
||||
|
||||
public int getnCalledEvalGenotypes() {
|
||||
int nGeno = 0;
|
||||
for ( GenotypeType evalType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) {
|
||||
nGeno += getnEvalGenotypes(evalType);
|
||||
}
|
||||
|
||||
return nGeno + nMismatchingAlt;
|
||||
}
|
||||
|
||||
public int getnCompGenotypes(GenotypeType type) {
|
||||
int nGeno = 0;
|
||||
for ( GenotypeType evaltype : GenotypeType.values() )
|
||||
nGeno += genotypeCounts[evaltype.ordinal()][type.ordinal()];
|
||||
return nGeno;
|
||||
}
|
||||
|
||||
public int getnCalledCompGenotypes() {
|
||||
int nGeno = 0;
|
||||
for ( GenotypeType compType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) {
|
||||
nGeno += getnCompGenotypes(compType);
|
||||
}
|
||||
return nGeno;
|
||||
}
|
||||
|
||||
public int get(GenotypeType evalType, GenotypeType compType) {
|
||||
return genotypeCounts[evalType.ordinal()][compType.ordinal()];
|
||||
}
|
||||
}
|
||||
|
||||
class SiteConcordanceTable {
|
||||
|
||||
private int[] siteConcordance;
|
||||
|
||||
public SiteConcordanceTable() {
|
||||
siteConcordance = new int[SiteConcordanceType.values().length];
|
||||
}
|
||||
|
||||
public void update(VariantContext evalVC, VariantContext truthVC) {
|
||||
SiteConcordanceType matchType = getMatchType(evalVC,truthVC);
|
||||
siteConcordance[matchType.ordinal()]++;
|
||||
}
|
||||
|
||||
@Requires({"evalVC != null","truthVC != null"})
|
||||
private SiteConcordanceType getMatchType(VariantContext evalVC, VariantContext truthVC) {
|
||||
if ( evalVC.isMonomorphicInSamples() )
|
||||
return SiteConcordanceType.TRUTH_ONLY;
|
||||
if ( truthVC.isMonomorphicInSamples() )
|
||||
return SiteConcordanceType.EVAL_ONLY;
|
||||
|
||||
boolean evalSusbsetTruth = VariantContextUtils.allelesAreSubset(evalVC,truthVC);
|
||||
boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truthVC,evalVC);
|
||||
|
||||
if ( evalSusbsetTruth && truthSubsetEval )
|
||||
return SiteConcordanceType.ALLELES_MATCH;
|
||||
else if ( evalSusbsetTruth )
|
||||
return SiteConcordanceType.EVAL_SUBSET_TRUTH;
|
||||
else if ( truthSubsetEval )
|
||||
return SiteConcordanceType.EVAL_SUPERSET_TRUTH;
|
||||
|
||||
return SiteConcordanceType.ALLELES_DO_NOT_MATCH;
|
||||
}
|
||||
|
||||
public int[] getSiteConcordance() {
|
||||
return siteConcordance;
|
||||
}
|
||||
|
||||
public int get(SiteConcordanceType type) {
|
||||
return getSiteConcordance()[type.ordinal()];
|
||||
}
|
||||
}
|
||||
|
||||
enum SiteConcordanceType {
|
||||
ALLELES_MATCH,
|
||||
EVAL_SUPERSET_TRUTH,
|
||||
EVAL_SUBSET_TRUTH,
|
||||
ALLELES_DO_NOT_MATCH,
|
||||
EVAL_ONLY,
|
||||
TRUTH_ONLY
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,213 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReport;
|
||||
import org.broadinstitute.sting.gatk.report.GATKReportTable;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A simple walker for performing genotype concordance calculations between two callsets
|
||||
*/
|
||||
public class GenotypeConcordance extends RodWalker<Pair<VariantContext,VariantContext>,ConcordanceMetrics> {
|
||||
|
||||
@Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true)
|
||||
RodBinding<VariantContext> evalBinding;
|
||||
|
||||
@Input(fullName="comp",shortName="comp",doc="The variants and genotypes to compare against",required=true)
|
||||
RodBinding<VariantContext> compBinding;
|
||||
|
||||
@Argument(fullName="ignoreFilters",doc="Filters will be ignored",required=false)
|
||||
boolean ignoreFilters = false;
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
List<String> evalSamples;
|
||||
List<String> compSamples;
|
||||
|
||||
// todo -- integration test coverage
|
||||
// todo -- deal with occurrences like:
|
||||
// Eval: 20 4000 A C
|
||||
// Eval: 20 4000 A AC
|
||||
// Comp: 20 4000 A C
|
||||
// currently this results in a warning and skipping
|
||||
// todo -- extend to multiple eval, multiple comp
|
||||
// todo -- table with "proportion of overlapping sites" (not just eval/comp margins)
|
||||
|
||||
|
||||
public ConcordanceMetrics reduceInit() {
|
||||
Map<String,VCFHeader> headerMap = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(evalBinding,compBinding));
|
||||
VCFHeader evalHeader = headerMap.get(evalBinding.getName());
|
||||
evalSamples = evalHeader.getGenotypeSamples();
|
||||
VCFHeader compHeader = headerMap.get(compBinding.getName());
|
||||
compSamples = compHeader.getGenotypeSamples();
|
||||
return new ConcordanceMetrics(evalHeader,compHeader);
|
||||
}
|
||||
|
||||
|
||||
public Pair<VariantContext,VariantContext> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
Pair<VariantContext,VariantContext> evalCompPair = null;
|
||||
if ( tracker != null && (
|
||||
tracker.getValues(evalBinding,ref.getLocus()).size() > 0 ||
|
||||
tracker.getValues(compBinding,ref.getLocus()).size() > 0 ) ) {
|
||||
|
||||
List<VariantContext> eval = tracker.getValues(evalBinding,ref.getLocus());
|
||||
List<VariantContext> comp = tracker.getValues(compBinding,ref.getLocus());
|
||||
if ( eval.size() > 1 || comp.size() > 1 ) {
|
||||
logger.warn("Eval or Comp Rod at position "+ref.getLocus().toString()+" has multiple records. Site will be skipped.");
|
||||
return evalCompPair;
|
||||
}
|
||||
// if a rod is missing, explicitly create a variant context with 'missing' genotypes. Slow, but correct.
|
||||
// note that if there is no eval rod there must be a comp rod, and also the reverse
|
||||
VariantContext evalContext = eval.size() == 1 ? eval.get(0) : createEmptyContext(ref,comp.get(0),evalSamples);
|
||||
VariantContext compContext = comp.size() == 1 ? comp.get(0) : createEmptyContext(ref,eval.get(0),compSamples);
|
||||
evalContext = filterGenotypes(evalContext,ignoreFilters);
|
||||
compContext = filterGenotypes(compContext,ignoreFilters);
|
||||
evalCompPair = new Pair<VariantContext, VariantContext>(evalContext,compContext);
|
||||
}
|
||||
|
||||
return evalCompPair;
|
||||
}
|
||||
|
||||
public ConcordanceMetrics reduce(Pair<VariantContext,VariantContext> evalComp, ConcordanceMetrics metrics) {
|
||||
if ( evalComp != null )
|
||||
metrics.update(evalComp.getFirst(),evalComp.getSecond());
|
||||
return metrics;
|
||||
}
|
||||
|
||||
public void onTraversalDone(ConcordanceMetrics metrics) {
|
||||
GATKReport report = new GATKReport();
|
||||
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length);
|
||||
GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length);
|
||||
GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length);
|
||||
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2);
|
||||
GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length);
|
||||
concordanceCompProportions.addColumn("Sample","%s");
|
||||
concordanceCounts.addColumn("Sample","%s");
|
||||
concordanceEvalProportions.addColumn("Sample","%s");
|
||||
concordanceSummary.addColumn("Sample","%s");
|
||||
for ( GenotypeType evalType : GenotypeType.values() ) {
|
||||
for ( GenotypeType compType : GenotypeType.values() ) {
|
||||
String colKey = String.format("%s_%s", evalType.toString(), compType.toString());
|
||||
concordanceCounts.addColumn(colKey,"%d");
|
||||
if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR)
|
||||
concordanceEvalProportions.addColumn(colKey,"%.3f");
|
||||
if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF )
|
||||
concordanceCompProportions.addColumn(colKey,"%.3f");
|
||||
}
|
||||
}
|
||||
concordanceEvalProportions.addColumn("Mismatching_Alleles","%.3f");
|
||||
concordanceCompProportions.addColumn("Mismatching_Alleles","%.3f");
|
||||
concordanceCounts.addColumn("Mismatching_Alleles","%d");
|
||||
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
|
||||
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
siteConcordance.addColumn(type.toString(),"%d");
|
||||
}
|
||||
|
||||
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
|
||||
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
|
||||
concordanceEvalProportions.set(entry.getKey(),"Sample",entry.getKey());
|
||||
concordanceCompProportions.set(entry.getKey(),"Sample",entry.getKey());
|
||||
concordanceCounts.set(entry.getKey(),"Sample",entry.getKey());
|
||||
for ( GenotypeType evalType : GenotypeType.values() ) {
|
||||
for ( GenotypeType compType : GenotypeType.values() ) {
|
||||
String colKey = String.format("%s_%s",evalType.toString(),compType.toString());
|
||||
int count = table.get(evalType, compType);
|
||||
concordanceCounts.set(entry.getKey(),colKey,count);
|
||||
if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR)
|
||||
concordanceEvalProportions.set(entry.getKey(),colKey,( (double) count)/table.getnEvalGenotypes(evalType));
|
||||
if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF )
|
||||
concordanceCompProportions.set(entry.getKey(),colKey,( (double) count)/table.getnCompGenotypes(compType));
|
||||
}
|
||||
}
|
||||
concordanceEvalProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledEvalGenotypes());
|
||||
concordanceCompProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledCompGenotypes());
|
||||
concordanceCounts.set(entry.getKey(),"Mismatching_Alleles",table.getnMismatchingAlt());
|
||||
}
|
||||
|
||||
String rowKey = "ALL";
|
||||
concordanceCompProportions.set(rowKey,"Sample",rowKey);
|
||||
concordanceEvalProportions.set(rowKey,"Sample",rowKey);
|
||||
concordanceCounts.set(rowKey,"Sample",rowKey);
|
||||
ConcordanceMetrics.GenotypeConcordanceTable table = metrics.getOverallGenotypeConcordance();
|
||||
for ( GenotypeType evalType : GenotypeType.values() ) {
|
||||
for ( GenotypeType compType : GenotypeType.values() ) {
|
||||
String colKey = String.format("%s_%s",evalType.toString(),compType.toString());
|
||||
int count = table.get(evalType,compType);
|
||||
concordanceCounts.set(rowKey,colKey,count);
|
||||
if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR)
|
||||
concordanceEvalProportions.set(rowKey,colKey,( (double) count)/table.getnEvalGenotypes(evalType));
|
||||
if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF )
|
||||
concordanceCompProportions.set(rowKey,colKey,( (double) count)/table.getnCompGenotypes(compType));
|
||||
}
|
||||
}
|
||||
concordanceEvalProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledEvalGenotypes());
|
||||
concordanceCompProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledCompGenotypes());
|
||||
concordanceCounts.set(rowKey,"Mismatching_Alleles",table.getnMismatchingAlt());
|
||||
|
||||
for ( Map.Entry<String,Double> nrsEntry : metrics.getPerSampleNRS().entrySet() ) {
|
||||
concordanceSummary.set(nrsEntry.getKey(),"Sample",nrsEntry.getKey());
|
||||
concordanceSummary.set(nrsEntry.getKey(),"Non-Reference Sensitivity",nrsEntry.getValue());
|
||||
}
|
||||
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
|
||||
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
|
||||
}
|
||||
concordanceSummary.set("ALL","Sample","ALL");
|
||||
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
|
||||
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
|
||||
|
||||
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
|
||||
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));
|
||||
}
|
||||
|
||||
report.addTable(concordanceCompProportions);
|
||||
report.addTable(concordanceEvalProportions);
|
||||
report.addTable(concordanceCounts);
|
||||
report.addTable(concordanceSummary);
|
||||
report.addTable(siteConcordance);
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
|
||||
public VariantContext createEmptyContext(ReferenceContext ref, VariantContext other, List<String> samples) {
|
||||
VariantContextBuilder builder = new VariantContextBuilder();
|
||||
// set the alleles to be the same
|
||||
builder.alleles(other.getAlleles());
|
||||
builder.loc(other.getChr(),other.getStart(),other.getEnd());
|
||||
// set all genotypes to empty
|
||||
List<Genotype> genotypes = new ArrayList<Genotype>(samples.size());
|
||||
for ( String sample : samples )
|
||||
genotypes.add(GenotypeBuilder.create(sample, new ArrayList<Allele>(0)));
|
||||
builder.genotypes(genotypes);
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
public VariantContext filterGenotypes(VariantContext context, boolean ignoreSiteFilter) {
|
||||
// placeholder method for genotype-level filtering. However if the site itself is filtered,
|
||||
// and such filters are not ignored, the genotype-level data should be altered to reflect this
|
||||
if ( ! context.isFiltered() || ignoreSiteFilter ) {
|
||||
// todo -- add genotype-level jexl filtering here
|
||||
return context;
|
||||
}
|
||||
VariantContextBuilder builder = new VariantContextBuilder();
|
||||
builder.alleles(Arrays.asList(context.getReference()));
|
||||
builder.loc(context.getChr(),context.getStart(),context.getEnd());
|
||||
List<Genotype> newGeno = new ArrayList<Genotype>(context.getNSamples());
|
||||
for ( Genotype g : context.getGenotypes().iterateInSampleNameOrder() ) {
|
||||
newGeno.add(GenotypeBuilder.create(g.getSampleName(),new ArrayList<Allele>()));
|
||||
}
|
||||
builder.genotypes(newGeno);
|
||||
return builder.make();
|
||||
}
|
||||
}
|
||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
|
||||
|
|
@ -423,7 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
|
||||
headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
|
||||
}
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
|
||||
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
|
||||
|
||||
for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,344 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.variant.utils.BaseUtils;
|
||||
import org.broadinstitute.variant.variantcontext.*;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypeType;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.variant.vcf.VCFCodec;
|
||||
import org.broadinstitute.variant.vcf.VCFHeader;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.StringBufferInputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
|
||||
public class ConcordanceMetricsUnitTest extends BaseTest {
|
||||
|
||||
// todo -- coverage for several sites (3,4)
|
||||
// todo -- coverage for calculation of table margins
|
||||
// todo -- coverage for site concordance
|
||||
// todo -- coverage for disjoint and mostly-disjoint sample sets
|
||||
|
||||
private static ReferenceSequenceFile seq;
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
@BeforeClass
|
||||
public void init() throws FileNotFoundException {
|
||||
// sequence
|
||||
seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
|
||||
genomeLocParser = new GenomeLocParser(seq);
|
||||
}
|
||||
public static String HEADER_BASE = "##fileformat=VCFv4.0\n" +
|
||||
"##filedate=2010-06-21\n"+
|
||||
"##reference=NCBI36\n"+
|
||||
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
|
||||
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
|
||||
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
|
||||
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
|
||||
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
|
||||
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
|
||||
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n" +
|
||||
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t";
|
||||
public static String TEST_1_HEADER = HEADER_BASE + "test1_sample1\ttest1_sample2\ttest1_sample3\n";
|
||||
|
||||
|
||||
private Pair<VariantContext,VariantContext> getData1() {
|
||||
|
||||
Allele reference_A = Allele.create(BaseUtils.A,true);
|
||||
Allele alt_C = Allele.create(BaseUtils.C);
|
||||
|
||||
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
|
||||
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
|
||||
|
||||
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C));
|
||||
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
|
||||
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
|
||||
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
|
||||
|
||||
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C));
|
||||
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
|
||||
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
|
||||
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
|
||||
|
||||
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
|
||||
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
|
||||
|
||||
return testData;
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testSimpleComparison() {
|
||||
Pair<VariantContext,VariantContext> data = getData1();
|
||||
VariantContext eval = data.getFirst();
|
||||
VariantContext truth = data.getSecond();
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1);
|
||||
Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1);
|
||||
}
|
||||
|
||||
private Pair<VariantContext,VariantContext> getData2() {
|
||||
|
||||
Allele reference_A = Allele.create(BaseUtils.A,true);
|
||||
Allele alt_C = Allele.create(BaseUtils.C);
|
||||
Allele alt_T = Allele.create(BaseUtils.T);
|
||||
|
||||
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_T));
|
||||
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
|
||||
|
||||
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
|
||||
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C));
|
||||
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
|
||||
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
|
||||
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
|
||||
|
||||
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T));
|
||||
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
|
||||
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
|
||||
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
|
||||
|
||||
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
|
||||
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
|
||||
|
||||
return testData;
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testMismatchingAllele() {
|
||||
Pair<VariantContext,VariantContext> data = getData2();
|
||||
VariantContext eval = data.getFirst();
|
||||
VariantContext truth = data.getSecond();
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
|
||||
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1);
|
||||
Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1);
|
||||
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],1);
|
||||
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],0);
|
||||
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0);
|
||||
}
|
||||
|
||||
private Pair<VariantContext,VariantContext> getData3() {
|
||||
|
||||
Allele reference_ACT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C,BaseUtils.T},true);
|
||||
Allele alt_AC = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C});
|
||||
Allele alt_A = Allele.create(BaseUtils.A);
|
||||
Allele alt_ATT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.T,BaseUtils.T});
|
||||
|
||||
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_ATT));
|
||||
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A));
|
||||
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A));
|
||||
|
||||
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_AC));
|
||||
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A));
|
||||
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A));
|
||||
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 5);
|
||||
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
|
||||
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
|
||||
|
||||
eval_1_builder.alleles(Arrays.asList(reference_ACT,alt_ATT,alt_A));
|
||||
truth_1_builder.alleles(Arrays.asList(reference_ACT,alt_AC,alt_A));
|
||||
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
|
||||
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
|
||||
|
||||
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
|
||||
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
|
||||
|
||||
return testData;
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testComplex() {
|
||||
Pair<VariantContext,VariantContext> data = getData3();
|
||||
VariantContext eval = data.getFirst();
|
||||
VariantContext truth = data.getSecond();
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[3][3],1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[1][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][2],1);
|
||||
Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[3][3],1);
|
||||
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],0);
|
||||
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],1);
|
||||
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0);
|
||||
}
|
||||
|
||||
private Pair<VariantContext,VariantContext> getData4() {
|
||||
|
||||
Allele reference_A = Allele.create(BaseUtils.A,true);
|
||||
Allele alt_C = Allele.create(BaseUtils.C);
|
||||
Allele alt_T = Allele.create(BaseUtils.T);
|
||||
|
||||
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL));
|
||||
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
|
||||
|
||||
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
|
||||
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL));
|
||||
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
|
||||
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
|
||||
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
|
||||
|
||||
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T));
|
||||
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
|
||||
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
|
||||
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
|
||||
|
||||
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
|
||||
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
|
||||
|
||||
return testData;
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testNoCalls() {
|
||||
Pair<VariantContext,VariantContext> data = getData4();
|
||||
VariantContext eval = data.getFirst();
|
||||
VariantContext truth = data.getSecond();
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],1);
|
||||
}
|
||||
|
||||
private Pair<VariantContext,VariantContext> getData5() {
|
||||
|
||||
Allele reference_A = Allele.create(BaseUtils.A,true);
|
||||
Allele alt_C = Allele.create(BaseUtils.C);
|
||||
Allele alt_T = Allele.create(BaseUtils.T);
|
||||
|
||||
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", new ArrayList<Allele>(0));
|
||||
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
|
||||
|
||||
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
|
||||
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
|
||||
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", new ArrayList<Allele>(0));
|
||||
|
||||
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
|
||||
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
|
||||
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
|
||||
|
||||
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T));
|
||||
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
|
||||
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
|
||||
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
|
||||
|
||||
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
|
||||
|
||||
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
|
||||
|
||||
return testData;
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testMissing() {
|
||||
Pair<VariantContext,VariantContext> data = getData5();
|
||||
VariantContext eval = data.getFirst();
|
||||
VariantContext truth = data.getSecond();
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
metrics.update(eval,truth);
|
||||
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0);
|
||||
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1);
|
||||
}
|
||||
|
||||
@Test(enabled=true)
|
||||
public void testNRD_testNRS() {
|
||||
Pair<VariantContext,VariantContext> data = getData3();
|
||||
VariantContext eval = data.getFirst();
|
||||
VariantContext truth = data.getSecond();
|
||||
VCFCodec codec = new VCFCodec();
|
||||
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
|
||||
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
|
||||
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
|
||||
// set up the table
|
||||
table[0] = new int[] {30, 12, 7, 5, 6, 0};
|
||||
table[1] = new int[] {10, 100, 5, 1, 7, 1};
|
||||
table[2] = new int[] {5, 7, 150, 3, 3, 1};
|
||||
table[3] = new int[] {3, 2, 6, 50, 1, 0};
|
||||
table[4] = new int[] {10, 6, 3, 3, 2, 0};
|
||||
table[5] = new int[] {12, 0, 34, 20, 10, 0};
|
||||
double EXPEC_NRS = 0.8969957;
|
||||
double EXPEC_NRD = 0.1071429;
|
||||
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
|
||||
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2010.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.variant.vcf.VCFConstants;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.variant.vcf.VCFStandardHeaderLines;
|
||||
|
||||
|
||||
/**
|
||||
* Keys and descriptions for the common chromosome count annotations
|
||||
*/
|
||||
public class ChromosomeCountConstants {
|
||||
|
||||
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
|
||||
public static final VCFInfoHeaderLine[] descriptions = {
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY),
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY),
|
||||
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) };
|
||||
}
|
||||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
|
|||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
|
||||
import org.broadinstitute.sting.gatk.walkers.Window;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.variant.vcf.*;
|
||||
|
|
@ -217,7 +217,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
|
|||
if ( SET_KEY != null )
|
||||
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
|
||||
if ( !ASSUME_IDENTICAL_SAMPLES )
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
|
||||
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
|
||||
VCFHeader vcfHeader = new VCFHeader(headerLines, samples);
|
||||
vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
Loading…
Reference in New Issue