Merge branch 'master' of github.com:broadinstitute/gsa-unstable

This commit is contained in:
Menachem Fromer 2013-01-09 13:26:40 -05:00
commit 6089a4bf62
29 changed files with 868 additions and 13 deletions

View File

@ -35,10 +35,8 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.vcf.VCFHeaderLine;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.vcf.VCFStandardHeaderLines;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextUtils;
@ -52,12 +50,6 @@ import java.util.*;
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
public static final VCFInfoHeaderLine[] descriptions = {
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY),
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY),
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) };
private Set<String> founderIds = new HashSet<String>();
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
@ -78,8 +70,8 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
}
public List<String> getKeyNames() {
return Arrays.asList(keyNames);
return Arrays.asList(ChromosomeCountConstants.keyNames);
}
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(descriptions); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(ChromosomeCountConstants.descriptions); }
}

View File

@ -0,0 +1,261 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;
import java.util.*;
/**
* A class for tabulating and evaluating a callset-by-callset genotype concordance table
* */
public class ConcordanceMetrics {
private Map<String,GenotypeConcordanceTable> perSampleGenotypeConcordance;
private GenotypeConcordanceTable overallGenotypeConcordance;
private SiteConcordanceTable overallSiteConcordance;
public ConcordanceMetrics(VCFHeader evaluate, VCFHeader truth) {
HashSet<String> overlappingSamples = new HashSet<String>(evaluate.getGenotypeSamples());
overlappingSamples.retainAll(truth.getGenotypeSamples());
perSampleGenotypeConcordance = new HashMap<String, GenotypeConcordanceTable>(overlappingSamples.size());
for ( String sample : overlappingSamples ) {
perSampleGenotypeConcordance.put(sample,new GenotypeConcordanceTable());
}
overallGenotypeConcordance = new GenotypeConcordanceTable();
overallSiteConcordance = new SiteConcordanceTable();
}
public GenotypeConcordanceTable getOverallGenotypeConcordance() {
return overallGenotypeConcordance;
}
public SiteConcordanceTable getOverallSiteConcordance() {
return overallSiteConcordance;
}
public GenotypeConcordanceTable getGenotypeConcordance(String sample) {
GenotypeConcordanceTable table = perSampleGenotypeConcordance.get(sample);
if ( table == null )
throw new ReviewedStingException("Attempted to request the concordance table for a sample on which it was not calculated");
return table;
}
public Map<String,GenotypeConcordanceTable> getPerSampleGenotypeConcordance() {
return Collections.unmodifiableMap(perSampleGenotypeConcordance);
}
public Map<String,Double> getPerSampleNRD() {
Map<String,Double> nrd = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
nrd.put(sampleTable.getKey(),calculateNRD(sampleTable.getValue()));
}
return Collections.unmodifiableMap(nrd);
}
public Double getOverallNRD() {
return calculateNRD(overallGenotypeConcordance);
}
public Map<String,Double> getPerSampleNRS() {
Map<String,Double> nrs = new HashMap<String,Double>(perSampleGenotypeConcordance.size());
for ( Map.Entry<String,GenotypeConcordanceTable> sampleTable : perSampleGenotypeConcordance.entrySet() ) {
nrs.put(sampleTable.getKey(),calculateNRS(sampleTable.getValue()));
}
return Collections.unmodifiableMap(nrs);
}
public Double getOverallNRS() {
return calculateNRS(overallGenotypeConcordance);
}
@Requires({"eval != null","truth != null"})
public void update(VariantContext eval, VariantContext truth) {
overallSiteConcordance.update(eval,truth);
Set<String> alleleTruth = new HashSet<String>(8);
alleleTruth.add(truth.getReference().getBaseString());
for ( Allele a : truth.getAlternateAlleles() ) {
alleleTruth.add(a.getBaseString());
}
for ( String sample : perSampleGenotypeConcordance.keySet() ) {
Genotype evalGenotype = eval.getGenotype(sample);
Genotype truthGenotype = truth.getGenotype(sample);
perSampleGenotypeConcordance.get(sample).update(evalGenotype,truthGenotype,alleleTruth);
overallGenotypeConcordance.update(evalGenotype,truthGenotype,alleleTruth);
}
}
private static double calculateNRD(GenotypeConcordanceTable table) {
return calculateNRD(table.getTable());
}
private static double calculateNRD(int[][] concordanceCounts) {
int correct = 0;
int total = 0;
correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()];
correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += correct;
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()];
total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()];
total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()];
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()];
total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()];
// NRD is by definition incorrec/total = 1.0-correct/total
return 1.0 - ( (double) correct)/( (double) total);
}
private static double calculateNRS(GenotypeConcordanceTable table) {
return calculateNRS(table.getTable());
}
private static double calculateNRS(int[][] concordanceCounts) {
long confirmedVariant = 0;
long unconfirmedVariant = 0;
for ( GenotypeType truthState : Arrays.asList(GenotypeType.HET,GenotypeType.HOM_VAR) ) {
for ( GenotypeType evalState : GenotypeType.values() ) {
if ( evalState == GenotypeType.MIXED )
continue;
if ( evalState.equals(GenotypeType.HET) || evalState.equals(GenotypeType.HOM_VAR) )
confirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()];
else
unconfirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()];
}
}
return ( (double) confirmedVariant ) / ( (double) ( confirmedVariant + unconfirmedVariant ) );
}
class GenotypeConcordanceTable {
private int[][] genotypeCounts;
private int nMismatchingAlt;
public GenotypeConcordanceTable() {
genotypeCounts = new int[GenotypeType.values().length][GenotypeType.values().length];
nMismatchingAlt = 0;
}
@Requires({"eval!=null","truth != null","truthAlleles != null"})
public void update(Genotype eval, Genotype truth, Set<String> truthAlleles) {
// this is slow but correct
boolean matchingAlt = true;
if ( eval.isCalled() && truth.isCalled() ) {
// by default, no-calls "match" between alleles, so if
// one or both sites are no-call or unavailable, the alt alleles match
// otherwise, check explicitly: if the eval has an allele that's not ref, no-call, or present in truth
// the alt allele is mismatching - regardless of whether the genotype is correct.
for ( Allele evalAllele : eval.getAlleles() ) {
matchingAlt &= truthAlleles.contains(evalAllele.getBaseString());
}
}
if ( matchingAlt ) {
genotypeCounts[eval.getType().ordinal()][truth.getType().ordinal()]++;
} else {
nMismatchingAlt++;
}
}
public int[][] getTable() {
return genotypeCounts;
}
public int getnMismatchingAlt() {
return nMismatchingAlt;
}
public int getnEvalGenotypes(GenotypeType type) {
int nGeno = 0;
for ( GenotypeType comptype : GenotypeType.values() )
nGeno += genotypeCounts[type.ordinal()][comptype.ordinal()];
return nGeno;
}
public int getnCalledEvalGenotypes() {
int nGeno = 0;
for ( GenotypeType evalType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) {
nGeno += getnEvalGenotypes(evalType);
}
return nGeno + nMismatchingAlt;
}
public int getnCompGenotypes(GenotypeType type) {
int nGeno = 0;
for ( GenotypeType evaltype : GenotypeType.values() )
nGeno += genotypeCounts[evaltype.ordinal()][type.ordinal()];
return nGeno;
}
public int getnCalledCompGenotypes() {
int nGeno = 0;
for ( GenotypeType compType : Arrays.asList(GenotypeType.HOM_REF,GenotypeType.HOM_VAR,GenotypeType.HET) ) {
nGeno += getnCompGenotypes(compType);
}
return nGeno;
}
public int get(GenotypeType evalType, GenotypeType compType) {
return genotypeCounts[evalType.ordinal()][compType.ordinal()];
}
}
class SiteConcordanceTable {
private int[] siteConcordance;
public SiteConcordanceTable() {
siteConcordance = new int[SiteConcordanceType.values().length];
}
public void update(VariantContext evalVC, VariantContext truthVC) {
SiteConcordanceType matchType = getMatchType(evalVC,truthVC);
siteConcordance[matchType.ordinal()]++;
}
@Requires({"evalVC != null","truthVC != null"})
private SiteConcordanceType getMatchType(VariantContext evalVC, VariantContext truthVC) {
if ( evalVC.isMonomorphicInSamples() )
return SiteConcordanceType.TRUTH_ONLY;
if ( truthVC.isMonomorphicInSamples() )
return SiteConcordanceType.EVAL_ONLY;
boolean evalSusbsetTruth = VariantContextUtils.allelesAreSubset(evalVC,truthVC);
boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truthVC,evalVC);
if ( evalSusbsetTruth && truthSubsetEval )
return SiteConcordanceType.ALLELES_MATCH;
else if ( evalSusbsetTruth )
return SiteConcordanceType.EVAL_SUBSET_TRUTH;
else if ( truthSubsetEval )
return SiteConcordanceType.EVAL_SUPERSET_TRUTH;
return SiteConcordanceType.ALLELES_DO_NOT_MATCH;
}
public int[] getSiteConcordance() {
return siteConcordance;
}
public int get(SiteConcordanceType type) {
return getSiteConcordance()[type.ordinal()];
}
}
enum SiteConcordanceType {
ALLELES_MATCH,
EVAL_SUPERSET_TRUTH,
EVAL_SUBSET_TRUTH,
ALLELES_DO_NOT_MATCH,
EVAL_ONLY,
TRUTH_ONLY
}
}

View File

@ -0,0 +1,213 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.report.GATKReport;
import org.broadinstitute.sting.gatk.report.GATKReportTable;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.vcf.VCFHeader;
import java.io.PrintStream;
import java.util.*;
/**
* A simple walker for performing genotype concordance calculations between two callsets
*/
public class GenotypeConcordance extends RodWalker<Pair<VariantContext,VariantContext>,ConcordanceMetrics> {
@Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true)
RodBinding<VariantContext> evalBinding;
@Input(fullName="comp",shortName="comp",doc="The variants and genotypes to compare against",required=true)
RodBinding<VariantContext> compBinding;
@Argument(fullName="ignoreFilters",doc="Filters will be ignored",required=false)
boolean ignoreFilters = false;
@Output
PrintStream out;
List<String> evalSamples;
List<String> compSamples;
// todo -- integration test coverage
// todo -- deal with occurrences like:
// Eval: 20 4000 A C
// Eval: 20 4000 A AC
// Comp: 20 4000 A C
// currently this results in a warning and skipping
// todo -- extend to multiple eval, multiple comp
// todo -- table with "proportion of overlapping sites" (not just eval/comp margins)
public ConcordanceMetrics reduceInit() {
Map<String,VCFHeader> headerMap = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList(evalBinding,compBinding));
VCFHeader evalHeader = headerMap.get(evalBinding.getName());
evalSamples = evalHeader.getGenotypeSamples();
VCFHeader compHeader = headerMap.get(compBinding.getName());
compSamples = compHeader.getGenotypeSamples();
return new ConcordanceMetrics(evalHeader,compHeader);
}
public Pair<VariantContext,VariantContext> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
Pair<VariantContext,VariantContext> evalCompPair = null;
if ( tracker != null && (
tracker.getValues(evalBinding,ref.getLocus()).size() > 0 ||
tracker.getValues(compBinding,ref.getLocus()).size() > 0 ) ) {
List<VariantContext> eval = tracker.getValues(evalBinding,ref.getLocus());
List<VariantContext> comp = tracker.getValues(compBinding,ref.getLocus());
if ( eval.size() > 1 || comp.size() > 1 ) {
logger.warn("Eval or Comp Rod at position "+ref.getLocus().toString()+" has multiple records. Site will be skipped.");
return evalCompPair;
}
// if a rod is missing, explicitly create a variant context with 'missing' genotypes. Slow, but correct.
// note that if there is no eval rod there must be a comp rod, and also the reverse
VariantContext evalContext = eval.size() == 1 ? eval.get(0) : createEmptyContext(ref,comp.get(0),evalSamples);
VariantContext compContext = comp.size() == 1 ? comp.get(0) : createEmptyContext(ref,eval.get(0),compSamples);
evalContext = filterGenotypes(evalContext,ignoreFilters);
compContext = filterGenotypes(compContext,ignoreFilters);
evalCompPair = new Pair<VariantContext, VariantContext>(evalContext,compContext);
}
return evalCompPair;
}
public ConcordanceMetrics reduce(Pair<VariantContext,VariantContext> evalComp, ConcordanceMetrics metrics) {
if ( evalComp != null )
metrics.update(evalComp.getFirst(),evalComp.getSecond());
return metrics;
}
public void onTraversalDone(ConcordanceMetrics metrics) {
GATKReport report = new GATKReport();
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length);
GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS and NRD",2);
GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length);
concordanceCompProportions.addColumn("Sample","%s");
concordanceCounts.addColumn("Sample","%s");
concordanceEvalProportions.addColumn("Sample","%s");
concordanceSummary.addColumn("Sample","%s");
for ( GenotypeType evalType : GenotypeType.values() ) {
for ( GenotypeType compType : GenotypeType.values() ) {
String colKey = String.format("%s_%s", evalType.toString(), compType.toString());
concordanceCounts.addColumn(colKey,"%d");
if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR)
concordanceEvalProportions.addColumn(colKey,"%.3f");
if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF )
concordanceCompProportions.addColumn(colKey,"%.3f");
}
}
concordanceEvalProportions.addColumn("Mismatching_Alleles","%.3f");
concordanceCompProportions.addColumn("Mismatching_Alleles","%.3f");
concordanceCounts.addColumn("Mismatching_Alleles","%d");
concordanceSummary.addColumn("Non-Reference Sensitivity","%.3f");
concordanceSummary.addColumn("Non-Reference Discrepancy","%.3f");
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
siteConcordance.addColumn(type.toString(),"%d");
}
for ( Map.Entry<String,ConcordanceMetrics.GenotypeConcordanceTable> entry : metrics.getPerSampleGenotypeConcordance().entrySet() ) {
ConcordanceMetrics.GenotypeConcordanceTable table = entry.getValue();
concordanceEvalProportions.set(entry.getKey(),"Sample",entry.getKey());
concordanceCompProportions.set(entry.getKey(),"Sample",entry.getKey());
concordanceCounts.set(entry.getKey(),"Sample",entry.getKey());
for ( GenotypeType evalType : GenotypeType.values() ) {
for ( GenotypeType compType : GenotypeType.values() ) {
String colKey = String.format("%s_%s",evalType.toString(),compType.toString());
int count = table.get(evalType, compType);
concordanceCounts.set(entry.getKey(),colKey,count);
if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR)
concordanceEvalProportions.set(entry.getKey(),colKey,( (double) count)/table.getnEvalGenotypes(evalType));
if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF )
concordanceCompProportions.set(entry.getKey(),colKey,( (double) count)/table.getnCompGenotypes(compType));
}
}
concordanceEvalProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledEvalGenotypes());
concordanceCompProportions.set(entry.getKey(),"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledCompGenotypes());
concordanceCounts.set(entry.getKey(),"Mismatching_Alleles",table.getnMismatchingAlt());
}
String rowKey = "ALL";
concordanceCompProportions.set(rowKey,"Sample",rowKey);
concordanceEvalProportions.set(rowKey,"Sample",rowKey);
concordanceCounts.set(rowKey,"Sample",rowKey);
ConcordanceMetrics.GenotypeConcordanceTable table = metrics.getOverallGenotypeConcordance();
for ( GenotypeType evalType : GenotypeType.values() ) {
for ( GenotypeType compType : GenotypeType.values() ) {
String colKey = String.format("%s_%s",evalType.toString(),compType.toString());
int count = table.get(evalType,compType);
concordanceCounts.set(rowKey,colKey,count);
if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR)
concordanceEvalProportions.set(rowKey,colKey,( (double) count)/table.getnEvalGenotypes(evalType));
if ( compType == GenotypeType.HET || compType == GenotypeType.HOM_VAR || compType == GenotypeType.HOM_REF )
concordanceCompProportions.set(rowKey,colKey,( (double) count)/table.getnCompGenotypes(compType));
}
}
concordanceEvalProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledEvalGenotypes());
concordanceCompProportions.set(rowKey,"Mismatching_Alleles", ( (double) table.getnMismatchingAlt() )/table.getnCalledCompGenotypes());
concordanceCounts.set(rowKey,"Mismatching_Alleles",table.getnMismatchingAlt());
for ( Map.Entry<String,Double> nrsEntry : metrics.getPerSampleNRS().entrySet() ) {
concordanceSummary.set(nrsEntry.getKey(),"Sample",nrsEntry.getKey());
concordanceSummary.set(nrsEntry.getKey(),"Non-Reference Sensitivity",nrsEntry.getValue());
}
for ( Map.Entry<String,Double> nrdEntry : metrics.getPerSampleNRD().entrySet() ) {
concordanceSummary.set(nrdEntry.getKey(),"Non-Reference Discrepancy",nrdEntry.getValue());
}
concordanceSummary.set("ALL","Sample","ALL");
concordanceSummary.set("ALL","Non-Reference Sensitivity",metrics.getOverallNRS());
concordanceSummary.set("ALL","Non-Reference Discrepancy",metrics.getOverallNRD());
for (ConcordanceMetrics.SiteConcordanceType type : ConcordanceMetrics.SiteConcordanceType.values() ) {
siteConcordance.set("Comparison",type.toString(),metrics.getOverallSiteConcordance().get(type));
}
report.addTable(concordanceCompProportions);
report.addTable(concordanceEvalProportions);
report.addTable(concordanceCounts);
report.addTable(concordanceSummary);
report.addTable(siteConcordance);
report.print(out);
}
public VariantContext createEmptyContext(ReferenceContext ref, VariantContext other, List<String> samples) {
VariantContextBuilder builder = new VariantContextBuilder();
// set the alleles to be the same
builder.alleles(other.getAlleles());
builder.loc(other.getChr(),other.getStart(),other.getEnd());
// set all genotypes to empty
List<Genotype> genotypes = new ArrayList<Genotype>(samples.size());
for ( String sample : samples )
genotypes.add(GenotypeBuilder.create(sample, new ArrayList<Allele>(0)));
builder.genotypes(genotypes);
return builder.make();
}
public VariantContext filterGenotypes(VariantContext context, boolean ignoreSiteFilter) {
// placeholder method for genotype-level filtering. However if the site itself is filtered,
// and such filters are not ignored, the genotype-level data should be altered to reflect this
if ( ! context.isFiltered() || ignoreSiteFilter ) {
// todo -- add genotype-level jexl filtering here
return context;
}
VariantContextBuilder builder = new VariantContextBuilder();
builder.alleles(Arrays.asList(context.getReference()));
builder.loc(context.getChr(),context.getStart(),context.getEnd());
List<Genotype> newGeno = new ArrayList<Genotype>(context.getNSamples());
for ( Genotype g : context.getGenotypes().iterateInSampleNameOrder() ) {
newGeno.add(GenotypeBuilder.create(g.getSampleName(),new ArrayList<Allele>()));
}
builder.genotypes(newGeno);
return builder.make();
}
}

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
@ -423,7 +424,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
headerLines.add(new VCFInfoHeaderLine("AF_Orig", 1, VCFHeaderLineType.Float, "Original AF"));
headerLines.add(new VCFInfoHeaderLine("AN_Orig", 1, VCFHeaderLineType.Integer, "Original AN"));
}
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
headerLines.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
for (int i = 0; i < SELECT_EXPRESSIONS.size(); i++) {

View File

@ -0,0 +1,344 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.gatk.walkers.variantutils.ConcordanceMetrics;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import org.broadinstitute.variant.utils.BaseUtils;
import org.broadinstitute.variant.variantcontext.*;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
import org.broadinstitute.variant.variantcontext.GenotypeType;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.vcf.VCFCodec;
import org.broadinstitute.variant.vcf.VCFHeader;
import org.testng.annotations.Test;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.variant.vcf.*;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.StringBufferInputStream;
import java.util.ArrayList;
import java.util.Set;
import java.util.Arrays;
import java.util.List;
import net.sf.picard.reference.ReferenceSequenceFile;
public class ConcordanceMetricsUnitTest extends BaseTest {
// todo -- coverage for several sites (3,4)
// todo -- coverage for calculation of table margins
// todo -- coverage for site concordance
// todo -- coverage for disjoint and mostly-disjoint sample sets
private static ReferenceSequenceFile seq;
private GenomeLocParser genomeLocParser;
@BeforeClass
public void init() throws FileNotFoundException {
// sequence
seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
genomeLocParser = new GenomeLocParser(seq);
}
public static String HEADER_BASE = "##fileformat=VCFv4.0\n" +
"##filedate=2010-06-21\n"+
"##reference=NCBI36\n"+
"##INFO=<ID=GC, Number=0, Type=Flag, Description=\"Overlap with Gencode CCDS coding sequence\">\n"+
"##INFO=<ID=DP, Number=1, Type=Integer, Description=\"Total number of reads in haplotype window\">\n"+
"##INFO=<ID=AF, Number=1, Type=Float, Description=\"Dindel estimated population allele frequency\">\n"+
"##FILTER=<ID=NoQCALL, Description=\"Variant called by Dindel but not confirmed by QCALL\">\n"+
"##FORMAT=<ID=GT, Number=1, Type=String, Description=\"Genotype\">\n"+
"##FORMAT=<ID=HQ, Number=2, Type=Integer, Description=\"Haplotype quality\">\n"+
"##FORMAT=<ID=GQ, Number=1, Type=Integer, Description=\"Genotype quality\">\n" +
"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t";
public static String TEST_1_HEADER = HEADER_BASE + "test1_sample1\ttest1_sample2\ttest1_sample3\n";
private Pair<VariantContext,VariantContext> getData1() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,reference_A));
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C));
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C));
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
return testData;
}
@Test(enabled=true)
public void testSimpleComparison() {
Pair<VariantContext,VariantContext> data = getData1();
VariantContext eval = data.getFirst();
VariantContext truth = data.getSecond();
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1);
Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1);
}
private Pair<VariantContext,VariantContext> getData2() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele alt_T = Allele.create(BaseUtils.T);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_T));
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C));
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T));
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
return testData;
}
@Test(enabled=true)
public void testMismatchingAllele() {
Pair<VariantContext,VariantContext> data = getData2();
VariantContext eval = data.getFirst();
VariantContext truth = data.getSecond();
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
metrics.update(eval,truth);
Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2);
Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),2);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1);
Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1);
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],1);
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],0);
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0);
}
private Pair<VariantContext,VariantContext> getData3() {
Allele reference_ACT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C,BaseUtils.T},true);
Allele alt_AC = Allele.create(new byte[]{BaseUtils.A,BaseUtils.C});
Allele alt_A = Allele.create(BaseUtils.A);
Allele alt_ATT = Allele.create(new byte[]{BaseUtils.A,BaseUtils.T,BaseUtils.T});
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_ATT));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A));
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A));
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_AC));
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A));
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A));
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 5);
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
eval_1_builder.alleles(Arrays.asList(reference_ACT,alt_ATT,alt_A));
truth_1_builder.alleles(Arrays.asList(reference_ACT,alt_AC,alt_A));
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
return testData;
}
@Test(enabled=true)
public void testComplex() {
Pair<VariantContext,VariantContext> data = getData3();
VariantContext eval = data.getFirst();
VariantContext truth = data.getSecond();
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
metrics.update(eval,truth);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[3][3],1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[1][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][2],1);
Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[3][3],1);
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],0);
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],1);
Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0);
}
private Pair<VariantContext,VariantContext> getData4() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele alt_T = Allele.create(BaseUtils.T);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL));
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL));
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T));
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
return testData;
}
@Test(enabled=true)
public void testNoCalls() {
Pair<VariantContext,VariantContext> data = getData4();
VariantContext eval = data.getFirst();
VariantContext truth = data.getSecond();
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
metrics.update(eval,truth);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],1);
}
private Pair<VariantContext,VariantContext> getData5() {
Allele reference_A = Allele.create(BaseUtils.A,true);
Allele alt_C = Allele.create(BaseUtils.C);
Allele alt_T = Allele.create(BaseUtils.T);
Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", new ArrayList<Allele>(0));
Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C));
Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A));
Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C));
Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", new ArrayList<Allele>(0));
GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3);
VariantContextBuilder eval_1_builder = new VariantContextBuilder();
VariantContextBuilder truth_1_builder = new VariantContextBuilder();
eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T));
truth_1_builder.alleles(Arrays.asList(reference_A,alt_C));
eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval));
truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth));
eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop());
Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make());
return testData;
}
@Test(enabled=true)
public void testMissing() {
Pair<VariantContext,VariantContext> data = getData5();
VariantContext eval = data.getFirst();
VariantContext truth = data.getSecond();
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
metrics.update(eval,truth);
Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE));
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0);
Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1);
}
@Test(enabled=true)
public void testNRD_testNRS() {
Pair<VariantContext,VariantContext> data = getData3();
VariantContext eval = data.getFirst();
VariantContext truth = data.getSecond();
VCFCodec codec = new VCFCodec();
VCFHeader evalHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
VCFHeader compHeader = (VCFHeader)codec.readHeader(new AsciiLineReader(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER))));
ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader);
int[][] table = metrics.getOverallGenotypeConcordance().getTable();
// set up the table
table[0] = new int[] {30, 12, 7, 5, 6, 0};
table[1] = new int[] {10, 100, 5, 1, 7, 1};
table[2] = new int[] {5, 7, 150, 3, 3, 1};
table[3] = new int[] {3, 2, 6, 50, 1, 0};
table[4] = new int[] {10, 6, 3, 3, 2, 0};
table[5] = new int[] {12, 0, 34, 20, 10, 0};
double EXPEC_NRS = 0.8969957;
double EXPEC_NRD = 0.1071429;
Assert.assertEquals(EXPEC_NRS,metrics.getOverallNRS(),1e-7);
Assert.assertEquals(EXPEC_NRD,metrics.getOverallNRD(),1e-7);
}
}

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2010.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.variant.vcf.VCFConstants;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.vcf.VCFStandardHeaderLines;
/**
* Keys and descriptions for the common chromosome count annotations
*/
public class ChromosomeCountConstants {
public static final String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
public static final VCFInfoHeaderLine[] descriptions = {
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY),
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY),
VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY) };
}

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.gatk.walkers.Reference;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
import org.broadinstitute.sting.gatk.walkers.TreeReducible;
import org.broadinstitute.sting.gatk.walkers.Window;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts;
import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.variant.vcf.*;
@ -217,7 +217,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
if ( SET_KEY != null )
headerLines.add(new VCFInfoHeaderLine(SET_KEY, 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants"));
if ( !ASSUME_IDENTICAL_SAMPLES )
headerLines.addAll(Arrays.asList(ChromosomeCounts.descriptions));
headerLines.addAll(Arrays.asList(ChromosomeCountConstants.descriptions));
VCFHeader vcfHeader = new VCFHeader(headerLines, samples);
vcfHeader.setWriteCommandLine(!SUPPRESS_COMMAND_LINE_HEADER);
vcfWriter.writeHeader(vcfHeader);