VariantContextBuilder

-- New approach to making VariantContexts modeled on StringBuilder
-- No more modify routines -- use VariantContextBuilder
-- Renamed isPolymorphic to isPolymorphicInSamples.   Same for mono
-- getChromosomeCount -> getCalledChrCount
-- Walkers changed to use new VariantContext.  Some deprecated new VariantContext calls remain
-- VCFCodec now uses optimized cached information to create GenotypesContext.
This commit is contained in:
Mark DePristo 2011-11-18 12:39:10 -05:00
parent 7490dbb6eb
commit f54afc19b4
23 changed files with 149 additions and 192 deletions

View File

@ -47,7 +47,7 @@ import java.util.Map;
public class SampleList extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( vc.isMonomorphic() || !vc.hasGenotypes() )
if ( vc.isMonomorphicInSamples() || !vc.hasGenotypes() )
return null;
StringBuffer samples = new StringBuffer();

View File

@ -260,7 +260,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
}
}
} else /* (mask != null && validate == null ) */ {
if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )) {
if ( ! mask.isSNP() && ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphicInSamples() )) {
logger.warn("Mask Variant Context on the following warning line is not a SNP. Currently we can only mask out SNPs. This probe will not be designed.");
logger.warn(String.format("%s:%d-%d\t%s\t%s",mask.getChr(),mask.getStart(),mask.getEnd(),mask.isSimpleInsertion() ? "INS" : "DEL", Utils.join(",",mask.getAlleles())));
sequenceInvalid = true;
@ -281,7 +281,7 @@ public class ValidationAmplicons extends RodWalker<Integer,Integer> {
sequence.append('N');
indelCounter--;
rawSequence.append(Character.toUpperCase((char)ref.getBase()));
} else if ( ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphic() )){
} else if ( ! mask.isFiltered() && ( ! filterMonomorphic || ! mask.isMonomorphicInSamples() )){
logger.debug("SNP in mask found at " + ref.getLocus().toString());
if ( lowerCaseSNPs ) {

View File

@ -72,7 +72,7 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
boolean evalIsGood = eval != null && eval.isPolymorphic();
boolean evalIsGood = eval != null && eval.isPolymorphicInSamples();
boolean compIsGood = comp != null && comp.isNotFiltered();
if (evalIsGood) nEvalVariants++; // count the number of eval events

View File

@ -103,7 +103,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
// added in a proxy check for monomorphic status here.
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
if ( vc1.isMonomorphic() ) {
if ( vc1.isMonomorphicInSamples() ) {
nRefLoci++;
} else {
switch (vc1.getType()) {

View File

@ -30,7 +30,6 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -103,7 +102,7 @@ public class G1KPhaseITable extends VariantEvaluator {
}
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( eval == null || eval.isMonomorphic() ) return null;
if ( eval == null || eval.isMonomorphicInSamples() ) return null;
switch (eval.getType()) {
case SNP:

View File

@ -91,7 +91,7 @@ public class IndelLengthHistogram extends VariantEvaluator {
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if ( vc1.isIndel() && vc1.isPolymorphic() ) {
if ( vc1.isIndel() && vc1.isPolymorphicInSamples() ) {
if ( ! vc1.isBiallelic() ) {
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");

View File

@ -8,11 +8,9 @@ import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
import org.broadinstitute.sting.gatk.walkers.varianteval.util.TableType;
import org.broadinstitute.sting.utils.IndelUtils;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.HashMap;
/*
* Copyright (c) 2010 The Broad Institute
@ -270,7 +268,7 @@ public class IndelStatistics extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (eval != null && eval.isPolymorphic()) {
if (eval != null && eval.isPolymorphicInSamples()) {
if ( indelStats == null ) {
indelStats = new IndelStats(eval);
}

View File

@ -118,7 +118,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
int ac = -1;
if ( eval.hasGenotypes() )
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
ac = eval.getCalledChrCount(eval.getAlternateAllele(0));
else if ( eval.hasAttribute("AC") ) {
ac = eval.getAttributeAsInt("AC", -1);
}
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
}
}
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() && metrics != null ) {
metrics.incrValue(eval);
}
}

View File

@ -37,7 +37,7 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
}
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphicInSamples()) {
return null; //no interesting sites
}

View File

@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
}
public void updateTiTv(VariantContext vc, boolean updateStandard) {
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphicInSamples()) {
if (VariantContextUtils.isTransition(vc)) {
if (updateStandard) nTiInComp++;
else nTi++;

View File

@ -11,7 +11,6 @@ import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Collection;
import java.util.Set;
/**
* The Broad Institute
@ -118,8 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
public SiteStatus calcSiteStatus(VariantContext vc) {
if ( vc == null ) return SiteStatus.NO_CALL;
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
if ( vc.isMonomorphic() ) return SiteStatus.MONO;
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
if ( vc.isMonomorphicInSamples() ) return SiteStatus.MONO;
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphicInSamples was false and there are genotypes
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
int ac = 0;

View File

@ -232,14 +232,14 @@ public class VariantQualityScore extends VariantEvaluator {
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null;
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
if( titvStats == null ) { titvStats = new TiTvStats(); }
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
int alternateAlleleCount = 0;
for (final Allele a : eval.getAlternateAlleles()) {
alternateAlleleCount += eval.getChromosomeCount(a);
alternateAlleleCount += eval.getCalledChrCount(a);
}
alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
}

View File

@ -47,7 +47,7 @@ public class AlleleCount extends VariantStratifier {
AC = eval.getAttributeAsInt("AC", 0);
} else if ( eval.isVariant() ) {
for (Allele allele : eval.getAlternateAlleles())
AC = Math.max(AC, eval.getChromosomeCount(allele));
AC = Math.max(AC, eval.getCalledChrCount(allele));
} else
// by default, the site is considered monomorphic
AC = 0;

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -38,7 +37,6 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
/**
@ -284,7 +282,7 @@ public class VariantDataManager {
private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) {
return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() &&
((evalVC.isSNP() && trainVC.isSNP()) || ((evalVC.isIndel()||evalVC.isMixed()) && (trainVC.isIndel()||trainVC.isMixed()))) &&
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphic());
(TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples());
}
public void writeOutRecalibrationTable( final PrintStream RECAL_FILE ) {

View File

@ -488,7 +488,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (outMVFile != null)
outMVFileStream.format("MV@%s:%d. REF=%s, ALT=%s, AC=%d, momID=%s, dadID=%s, childID=%s, momG=%s, momGL=%s, dadG=%s, dadGL=%s, " +
"childG=%s childGL=%s\n",vc.getChr(), vc.getStart(),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getChromosomeCount(vc.getAlternateAllele(0)),
vc.getReference().getDisplayString(), vc.getAlternateAllele(0).getDisplayString(), vc.getCalledChrCount(vc.getAlternateAllele(0)),
mv.getSampleMom(), mv.getSampleDad(), mv.getSampleChild(),
vc.getGenotype(mv.getSampleMom()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
vc.getGenotype(mv.getSampleDad()).toBriefString(), vc.getGenotype(mv.getSampleMom()).getLikelihoods().getAsString(),
@ -520,7 +520,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
continue;
VariantContext sub = subsetRecord(vc, samples);
if ( (sub.isPolymorphic() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
if ( (sub.isPolymorphicInSamples() || !EXCLUDE_NON_VARIANTS) && (!sub.isFiltered() || !EXCLUDE_FILTERED) ) {
for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) {
if ( !VariantContextUtils.match(sub, jexl) ) {
return 0;
@ -677,11 +677,11 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
if (KEEP_ORIGINAL_CHR_COUNTS) {
if ( sub.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) )
builder.attribute("AC_Orig",sub.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
builder.attribute("AC_Orig", sub.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
if ( sub.hasAttribute(VCFConstants.ALLELE_FREQUENCY_KEY) )
builder.attribute("AF_Orig",sub.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
builder.attribute("AF_Orig", sub.getAttribute(VCFConstants.ALLELE_FREQUENCY_KEY));
if ( sub.hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) )
builder.attribute("AN_Orig",sub.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
builder.attribute("AN_Orig", sub.getAttribute(VCFConstants.ALLELE_NUMBER_KEY));
}
Map<String, Object> attributes = new HashMap<String, Object>(builder.make().getAttributes());

View File

@ -233,17 +233,17 @@ public class VariantValidationAssessor extends RodWalker<VariantContext,Integer>
numRecords++;
// add the info fields
builder.attribute("NoCallPct", String.format("%.1f", 100.0*noCallProp));
builder.attribute("HomRefPct", String.format("%.1f", 100.0*homRefProp));
builder.attribute("HomVarPct", String.format("%.1f", 100.0*homVarProp));
builder.attribute("HetPct", String.format("%.1f", 100.0*hetProp));
builder.attribute("NoCallPct", String.format("%.1f", 100.0 * noCallProp));
builder.attribute("HomRefPct", String.format("%.1f", 100.0 * homRefProp));
builder.attribute("HomVarPct", String.format("%.1f", 100.0 * homVarProp));
builder.attribute("HetPct", String.format("%.1f", 100.0 * hetProp));
builder.attribute("HW", String.format("%.2f", hwScore));
Collection<Allele> altAlleles = vContext.getAlternateAlleles();
int altAlleleCount = altAlleles.size() == 0 ? 0 : vContext.getChromosomeCount(altAlleles.iterator().next());
int altAlleleCount = altAlleles.size() == 0 ? 0 : vContext.getCalledChrCount(altAlleles.iterator().next());
if ( !isViolation && altAlleleCount > 0 )
numTrueVariants++;
builder.attribute(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount));
builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount()));
builder.attribute(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getCalledChrCount()));
return builder.make();
}

View File

@ -98,7 +98,7 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
headerStrings.add(line);
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
Set<String> auxTags = new LinkedHashSet<String>();
Set<String> sampleNames = new LinkedHashSet<String>();
// iterate over all the passed in strings
for ( String str : headerStrings ) {
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
@ -126,9 +126,9 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
while ( arrayIndex < strings.length )
auxTags.add(strings[arrayIndex++]);
sampleNames.add(strings[arrayIndex++]);
if ( sawFormatTag && auxTags.size() == 0 )
if ( sawFormatTag && sampleNames.size() == 0 )
throw new UserException.MalformedVCFHeader("The FORMAT field was provided but there is no genotype/sample data");
} else {
@ -152,7 +152,8 @@ public abstract class AbstractVCFCodec implements FeatureCodec, NameAwareCodec,
}
}
header = new VCFHeader(metaData, auxTags);
header = new VCFHeader(metaData, sampleNames);
header.buildVCFReaderMaps(new ArrayList<String>(sampleNames));
return header;
}

View File

@ -151,7 +151,7 @@ public class VCFCodec extends AbstractVCFCodec {
int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
GenotypesContext genotypes = GenotypesContext.create(nParts);
ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
// get the format keys
int nGTKeys = ParsingUtils.split(genotypeParts[0], genotypeKeyArray, VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR);
@ -215,7 +215,7 @@ public class VCFCodec extends AbstractVCFCodec {
}
}
return genotypes;
return GenotypesContext.create(genotypes, header.sampleNameToOffset, header.sampleNamesInOrder);
}
@Override

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.*;
@ -38,6 +39,10 @@ public class VCFHeader {
// were the input samples sorted originally (or are we sorting them)?
private boolean samplesWereAlreadySorted = true;
// cache for efficient conversion of VCF -> VariantContext
protected ArrayList<String> sampleNamesInOrder = null;
protected HashMap<String, Integer> sampleNameToOffset = null;
/**
* create a VCF header, given a list of meta data and auxillary tags
@ -69,6 +74,27 @@ public class VCFHeader {
samplesWereAlreadySorted = ParsingUtils.isSorted(genotypeSampleNames);
}
/**
* Tell this VCF header to use pre-calculated sample name ordering and the
* sample name -> offset map. This assumes that all VariantContext created
* using this header (i.e., read by the VCFCodec) will have genotypes
* occurring in the same order
*
*/
protected void buildVCFReaderMaps(List<String> genotypeSampleNamesInAppearenceOrder) {
sampleNamesInOrder = new ArrayList<String>(genotypeSampleNamesInAppearenceOrder.size());
sampleNameToOffset = new HashMap<String, Integer>(genotypeSampleNamesInAppearenceOrder.size());
int i = 0;
for ( final String name : genotypeSampleNamesInAppearenceOrder ) {
sampleNamesInOrder.add(name);
sampleNameToOffset.put(name, i++);
}
Collections.sort(sampleNamesInOrder);
}
/**
* Adds a header line to the header metadata.
*

View File

@ -85,6 +85,12 @@ public class GenotypesContext implements List<Genotype> {
return new GenotypesContext(nGenotypes, false);
}
public static final GenotypesContext create(final ArrayList<Genotype> genotypes,
final Map<String, Integer> sampleNameToOffset,
final List<String> sampleNamesInOrder) {
return new GenotypesContext(genotypes, sampleNameToOffset, sampleNamesInOrder, false);
}
public static final GenotypesContext create(final ArrayList<Genotype> genotypes) {
return genotypes == null ? NO_GENOTYPES : new GenotypesContext(genotypes, false);
}
@ -101,6 +107,8 @@ public class GenotypesContext implements List<Genotype> {
return toCopy == null ? NO_GENOTYPES : create(new ArrayList<Genotype>(toCopy));
}
// public static final GenotypeMap create(final Collection<Genotype> genotypes) {
// if ( genotypes == null )
// return null; // todo -- really should return an empty map

View File

@ -131,17 +131,17 @@ import java.util.*;
*
* <pre>
* vc.hasGenotypes()
* vc.isMonomorphic()
* vc.isPolymorphic()
* vc.isMonomorphicInSamples()
* vc.isPolymorphicInSamples()
* vc.getSamples().size()
*
* vc.getGenotypes()
* vc.getGenotypes().get("g1")
* vc.hasGenotype("g1")
*
* vc.getChromosomeCount()
* vc.getChromosomeCount(Aref)
* vc.getChromosomeCount(T)
* vc.getCalledChrCount()
* vc.getCalledChrCount(Aref)
* vc.getCalledChrCount(T)
* </pre>
*
* === NO_CALL alleles ===
@ -374,72 +374,17 @@ public class VariantContext implements Feature { // to enable tribble intergrati
//
// ---------------------------------------------------------------------------------------------------------
// /**
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotype
// * genotype and alleles in genotype. This is the right way to test if a single genotype is actually
// * variant or not.
// *
// * @param genotype genotype
// * @return vc subcontext
// * @deprecated replaced by {@link #subContextFromSample(String)}
// */
// public VariantContext subContextFromGenotypes(Genotype genotype) {
// return subContextFromGenotypes(Arrays.asList(genotype));
// }
//
//
// /**
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
// * genotypes and alleles in these genotypes. This is the right way to test if a single genotype is actually
// * variant or not.
// *
// * @param genotypes genotypes
// * @return vc subcontext
// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection)}
// */
// public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes) {
// return subContextFromGenotypes(genotypes, allelesOfGenotypes(genotypes)) ;
// }
//
// /**
// * Returns a context identical to this (i.e., filter, qual are all the same) but containing only the Genotypes
// * genotypes. Also, the resulting variant context will contain the alleles provided, not only those found in genotypes
// *
// * @param genotypes genotypes
// * @param alleles the set of allele segregating alleles at this site. Must include those in genotypes, but may be more
// * @return vc subcontext
// * @deprecated replaced by {@link #subContextFromSamples(java.util.Collection, java.util.Collection)}
// */
// @Deprecated
// public VariantContext subContextFromGenotypes(Collection<Genotype> genotypes, Collection<Allele> alleles) {
// return new VariantContext(getSource(), contig, start, stop, alleles,
// GenotypeCollection.create(genotypes),
// getNegLog10PError(),
// filtersWereApplied() ? getFilters() : null,
// getAttributes(),
// getReferenceBaseForIndel());
// }
public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
loadGenotypes();
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
return new VariantContext(getSource(), getID(), contig, start, stop, alleles,
newGenotypes,
getNegLog10PError(),
filtersWereApplied() ? getFilters() : null,
getAttributes(),
getReferenceBaseForIndel());
VariantContextBuilder builder = new VariantContextBuilder(this);
return builder.genotypes(genotypes.subsetToSamples(sampleNames)).make();
}
public VariantContext subContextFromSamples(Set<String> sampleNames) {
loadGenotypes();
VariantContextBuilder builder = new VariantContextBuilder(this);
GenotypesContext newGenotypes = genotypes.subsetToSamples(sampleNames);
return new VariantContext(getSource(), getID(), contig, start, stop, allelesOfGenotypes(newGenotypes),
newGenotypes,
getNegLog10PError(),
filtersWereApplied() ? getFilters() : null,
getAttributes(),
getReferenceBaseForIndel());
return builder.genotypes(newGenotypes).alleles(allelesOfGenotypes(newGenotypes)).make();
}
public VariantContext subContextFromSample(String sampleName) {
@ -451,12 +396,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param genotypes genotypes
* @return allele set
*/
private Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
Set<Allele> alleles = new HashSet<Allele>();
private final Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
final Set<Allele> alleles = new HashSet<Allele>();
boolean addedref = false;
for ( Genotype g : genotypes ) {
for ( Allele a : g.getAlleles() ) {
for ( final Genotype g : genotypes ) {
for ( final Allele a : g.getAlleles() ) {
addedref = addedref || a.isReference();
if ( a.isCalled() )
alleles.add(a);
@ -938,7 +883,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*
* @return chromosome count
*/
public int getChromosomeCount() {
public int getCalledChrCount() {
int n = 0;
for ( final Genotype g : getGenotypes() ) {
@ -955,7 +900,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @param a allele
* @return chromosome count
*/
public int getChromosomeCount(Allele a) {
public int getCalledChrCount(Allele a) {
int n = 0;
for ( final Genotype g : getGenotypes() ) {
@ -971,9 +916,9 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*
* @return true if it's monomorphic
*/
public boolean isMonomorphic() {
public boolean isMonomorphicInSamples() {
if ( monomorphic == null )
monomorphic = ! isVariant() || (hasGenotypes() && getChromosomeCount(getReference()) == getChromosomeCount());
monomorphic = ! isVariant() || (hasGenotypes() && getCalledChrCount(getReference()) == getCalledChrCount());
return monomorphic;
}
@ -983,8 +928,8 @@ public class VariantContext implements Feature { // to enable tribble intergrati
*
* @return true if it's polymorphic
*/
public boolean isPolymorphic() {
return ! isMonomorphic();
public boolean isPolymorphicInSamples() {
return ! isMonomorphicInSamples();
}
private void calculateGenotypeCounts() {
@ -1119,19 +1064,28 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
public void validateChromosomeCounts() {
Map<String, Object> observedAttrs = calculateChromosomeCounts();
// AN
if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString());
int observedAN = (Integer)observedAttrs.get(VCFConstants.ALLELE_NUMBER_KEY);
int observedAN = getCalledChrCount();
if ( reportedAN != observedAN )
throw new TribbleException.InternalCodecException(String.format("the Allele Number (AN) tag is incorrect for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedAN, observedAN));
}
// AC
if ( hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
List<Integer> observedACs = (List<Integer>)observedAttrs.get(VCFConstants.ALLELE_COUNT_KEY);
ArrayList<Integer> observedACs = new ArrayList<Integer>();
// if there are alternate alleles, record the relevant tags
if ( getAlternateAlleles().size() > 0 ) {
for ( Allele allele : getAlternateAlleles() ) {
observedACs.add(getCalledChrCount(allele));
}
}
else { // otherwise, set them to 0
observedACs.add(0);
}
if ( getAttribute(VCFConstants.ALLELE_COUNT_KEY) instanceof List ) {
Collections.sort(observedACs);
List reportedACs = (List)getAttribute(VCFConstants.ALLELE_COUNT_KEY);
@ -1152,31 +1106,6 @@ public class VariantContext implements Feature { // to enable tribble intergrati
}
}
private Map<String, Object> calculateChromosomeCounts() {
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VCFConstants.ALLELE_NUMBER_KEY, getChromosomeCount());
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
// if there are alternate alleles, record the relevant tags
if ( getAlternateAlleles().size() > 0 ) {
for ( Allele allele : getAlternateAlleles() ) {
alleleCounts.add(getChromosomeCount(allele));
alleleFreqs.add((double)getChromosomeCount(allele) / (double)getChromosomeCount());
}
}
// otherwise, set them to 0
else {
alleleCounts.add(0);
alleleFreqs.add(0.0);
}
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts);
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs);
return attributes;
}
// ---------------------------------------------------------------------------------------------------------
//
// validation: the normal validation routines are called automatically upon creation of the VC
@ -1399,7 +1328,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
Allele best = null;
int maxAC1 = 0;
for (Allele a:this.getAlternateAlleles()) {
int ac = this.getChromosomeCount(a);
int ac = this.getCalledChrCount(a);
if (ac >=maxAC1) {
maxAC1 = ac;
best = a;

View File

@ -63,7 +63,7 @@ public class VariantContextUtils {
*/
public static void calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
// if everyone is a no-call, remove the old attributes if requested
if ( vc.getChromosomeCount() == 0 && removeStaleValues ) {
if ( vc.getCalledChrCount() == 0 && removeStaleValues ) {
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )
attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) )
@ -74,15 +74,15 @@ public class VariantContextUtils {
}
if ( vc.hasGenotypes() ) {
attributes.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getChromosomeCount());
attributes.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getCalledChrCount());
// if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<String> alleleFreqs = new ArrayList<String>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
double totalChromosomes = (double)vc.getChromosomeCount();
double totalChromosomes = (double)vc.getCalledChrCount();
for ( Allele allele : vc.getAlternateAlleles() ) {
int altChromosomes = vc.getChromosomeCount(allele);
int altChromosomes = vc.getCalledChrCount(allele);
alleleCounts.add(altChromosomes);
String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes));
alleleFreqs.add(freq);
@ -320,7 +320,7 @@ public class VariantContextUtils {
}
public static double computeHardyWeinbergPvalue(VariantContext vc) {
if ( vc.getChromosomeCount() == 0 )
if ( vc.getCalledChrCount() == 0 )
return 0.0;
return HardyWeinbergCalculation.hwCalculate(vc.getHomRefCount(), vc.getHetCount(), vc.getHomVarCount());
}

View File

@ -12,7 +12,6 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import java.lang.reflect.Array;
import java.util.*;
@ -260,12 +259,12 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertTrue(vc.isSNP());
Assert.assertEquals(vc.getNAlleles(), 2);
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertFalse(vc.isMonomorphicInSamples());
Assert.assertTrue(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getGenotype("foo"), g);
Assert.assertEquals(vc.getChromosomeCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called
Assert.assertEquals(vc.getChromosomeCount(Aref), 0);
Assert.assertEquals(vc.getChromosomeCount(C), 1);
Assert.assertEquals(vc.getCalledChrCount(), 1); // we only have 1 called chromosomes, we exclude the NO_CALL one isn't called
Assert.assertEquals(vc.getCalledChrCount(Aref), 0);
Assert.assertEquals(vc.getCalledChrCount(C), 1);
Assert.assertFalse(vc.getGenotype("foo").isHet());
Assert.assertFalse(vc.getGenotype("foo").isHom());
Assert.assertFalse(vc.getGenotype("foo").isNoCall());
@ -332,8 +331,8 @@ public class VariantContextUnitTest extends BaseTest {
.genotypes(g1, g2, g3).make();
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertFalse(vc.isMonomorphicInSamples());
Assert.assertTrue(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getSampleNames().size(), 3);
Assert.assertEquals(vc.getGenotypes().size(), 3);
@ -352,9 +351,9 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertFalse(vc.hasGenotype("at"));
Assert.assertFalse(vc.hasGenotype("tt"));
Assert.assertEquals(vc.getChromosomeCount(), 6);
Assert.assertEquals(vc.getChromosomeCount(Aref), 3);
Assert.assertEquals(vc.getChromosomeCount(T), 3);
Assert.assertEquals(vc.getCalledChrCount(), 6);
Assert.assertEquals(vc.getCalledChrCount(Aref), 3);
Assert.assertEquals(vc.getCalledChrCount(T), 3);
}
@Test
@ -372,17 +371,17 @@ public class VariantContextUnitTest extends BaseTest {
.genotypes(g1, g2, g3, g4, g5, g6).make();
Assert.assertTrue(vc.hasGenotypes());
Assert.assertFalse(vc.isMonomorphic());
Assert.assertTrue(vc.isPolymorphic());
Assert.assertFalse(vc.isMonomorphicInSamples());
Assert.assertTrue(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getGenotypes().size(), 6);
Assert.assertEquals(3, vc.getGenotypes(Arrays.asList("AA", "Td", "dd")).size());
Assert.assertEquals(10, vc.getChromosomeCount());
Assert.assertEquals(3, vc.getChromosomeCount(Aref));
Assert.assertEquals(4, vc.getChromosomeCount(T));
Assert.assertEquals(3, vc.getChromosomeCount(del));
Assert.assertEquals(2, vc.getChromosomeCount(Allele.NO_CALL));
Assert.assertEquals(10, vc.getCalledChrCount());
Assert.assertEquals(3, vc.getCalledChrCount(Aref));
Assert.assertEquals(4, vc.getCalledChrCount(T));
Assert.assertEquals(3, vc.getCalledChrCount(del));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
}
@Test
@ -398,14 +397,14 @@ public class VariantContextUnitTest extends BaseTest {
.genotypes(g1, g2, g3).make();
Assert.assertTrue(vc.hasGenotypes());
Assert.assertTrue(vc.isMonomorphic());
Assert.assertFalse(vc.isPolymorphic());
Assert.assertTrue(vc.isMonomorphicInSamples());
Assert.assertFalse(vc.isPolymorphicInSamples());
Assert.assertEquals(vc.getGenotypes().size(), 3);
Assert.assertEquals(4, vc.getChromosomeCount());
Assert.assertEquals(4, vc.getChromosomeCount(Aref));
Assert.assertEquals(0, vc.getChromosomeCount(T));
Assert.assertEquals(2, vc.getChromosomeCount(Allele.NO_CALL));
Assert.assertEquals(4, vc.getCalledChrCount());
Assert.assertEquals(4, vc.getCalledChrCount(Aref));
Assert.assertEquals(0, vc.getCalledChrCount(T));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
}
}
@ -452,12 +451,12 @@ public class VariantContextUnitTest extends BaseTest {
VariantContext vc14 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g1.getSampleName(), g4.getSampleName())));
VariantContext vc5 = vc.subContextFromSamples(new HashSet<String>(Arrays.asList(g5.getSampleName())));
Assert.assertTrue(vc12.isPolymorphic());
Assert.assertTrue(vc23.isPolymorphic());
Assert.assertTrue(vc1.isMonomorphic());
Assert.assertTrue(vc4.isMonomorphic());
Assert.assertTrue(vc14.isMonomorphic());
Assert.assertTrue(vc5.isPolymorphic());
Assert.assertTrue(vc12.isPolymorphicInSamples());
Assert.assertTrue(vc23.isPolymorphicInSamples());
Assert.assertTrue(vc1.isMonomorphicInSamples());
Assert.assertTrue(vc4.isMonomorphicInSamples());
Assert.assertTrue(vc14.isMonomorphicInSamples());
Assert.assertTrue(vc5.isPolymorphicInSamples());
Assert.assertTrue(vc12.isSNP());
Assert.assertTrue(vc12.isVariant());
@ -484,12 +483,12 @@ public class VariantContextUnitTest extends BaseTest {
Assert.assertTrue(vc5.isVariant());
Assert.assertTrue(vc5.isBiallelic());
Assert.assertEquals(3, vc12.getChromosomeCount(Aref));
Assert.assertEquals(1, vc23.getChromosomeCount(Aref));
Assert.assertEquals(2, vc1.getChromosomeCount(Aref));
Assert.assertEquals(0, vc4.getChromosomeCount(Aref));
Assert.assertEquals(2, vc14.getChromosomeCount(Aref));
Assert.assertEquals(0, vc5.getChromosomeCount(Aref));
Assert.assertEquals(3, vc12.getCalledChrCount(Aref));
Assert.assertEquals(1, vc23.getCalledChrCount(Aref));
Assert.assertEquals(2, vc1.getCalledChrCount(Aref));
Assert.assertEquals(0, vc4.getCalledChrCount(Aref));
Assert.assertEquals(2, vc14.getCalledChrCount(Aref));
Assert.assertEquals(0, vc5.getCalledChrCount(Aref));
}
public void testGetGenotypeMethods() {
@ -827,14 +826,14 @@ public class VariantContextUnitTest extends BaseTest {
VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, T)).genotypes(gc).make();
Assert.assertEquals(vc.getNSamples(), nSamples);
if ( nSamples > 0 ) {
Assert.assertEquals(vc.isPolymorphic(), nT > 0);
Assert.assertEquals(vc.isMonomorphic(), nT == 0);
Assert.assertEquals(vc.isPolymorphicInSamples(), nT > 0);
Assert.assertEquals(vc.isMonomorphicInSamples(), nT == 0);
}
Assert.assertEquals(vc.getChromosomeCount(), nA + nT);
Assert.assertEquals(vc.getCalledChrCount(), nA + nT);
Assert.assertEquals(vc.getChromosomeCount(Allele.NO_CALL), nNoCallAlleles);
Assert.assertEquals(vc.getChromosomeCount(Aref), nA);
Assert.assertEquals(vc.getChromosomeCount(T), nT);
Assert.assertEquals(vc.getCalledChrCount(Allele.NO_CALL), nNoCallAlleles);
Assert.assertEquals(vc.getCalledChrCount(Aref), nA);
Assert.assertEquals(vc.getCalledChrCount(T), nT);
Assert.assertEquals(vc.getNoCallCount(), nNoCall);
Assert.assertEquals(vc.getHomRefCount(), nHomRef);