For Chris: pull out the chromosome counting code into VCUtils so that other tools can make use of it. Transitioned SelectVariants over to use it.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4456 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-10-08 04:37:54 +00:00
parent 3838823262
commit acd238f3f2
3 changed files with 71 additions and 31 deletions

View File

@ -46,6 +46,7 @@ public class VariantContextUtils {
* @param negLog10PError qual
* @param filters filters: use null for unfiltered and empty set for passes filters
* @param attributes attributes
* @return VariantContext object
*/
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes, double negLog10PError, Set<String> filters, Map<String, ?> attributes) {
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes != null ? VariantContext.genotypeCollectionToMap(new TreeMap<String, Genotype>(), genotypes) : null, negLog10PError, filters, attributes);
@ -56,6 +57,7 @@ public class VariantContextUtils {
* @param name name
* @param loc location
* @param alleles alleles
* @return VariantContext object
*/
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles) {
return new VariantContext (name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, VariantContext.NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
@ -67,6 +69,7 @@ public class VariantContextUtils {
* @param loc location
* @param alleles alleles
* @param genotypes genotypes
* @return VariantContext object
*/
public static VariantContext toVC(String name, GenomeLoc loc, Collection<Allele> alleles, Collection<Genotype> genotypes) {
return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null);
@ -76,12 +79,60 @@ public class VariantContextUtils {
* Copy constructor
*
* @param other the VariantContext to copy
* @return VariantContext object
*/
public static VariantContext toVC(VariantContext other) {
return new VariantContext(other.getName(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.getFilters(), other.getAttributes());
}
/**
/**
* Update the attributes of the attributes map given the VariantContext to reflect the proper chromosome-based VCF tags
*
* @param vc the VariantContext
* @param attributes the attributes map to populate; must not be null; may contain old values
* @param removeStaleValues should we remove stale values from the mapping?
*/
public static void calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
// if everyone is a no-call, remove the old attributes if requested
if ( vc.getChromosomeCount() == 0 && removeStaleValues ) {
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )
attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) )
attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) )
attributes.remove(VCFConstants.ALLELE_NUMBER_KEY);
return;
}
attributes.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getChromosomeCount());
// if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
for ( Allele allele : vc.getAlternateAlleles() ) {
alleleCounts.add(vc.getChromosomeCount(allele));
alleleFreqs.add((double)vc.getChromosomeCount(allele) / (double)vc.getChromosomeCount());
}
attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts);
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs);
}
// otherwise, remove them if present and requested
else if ( removeStaleValues ) {
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) )
attributes.remove(VCFConstants.ALLELE_COUNT_KEY);
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) )
attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY);
}
// otherwise, set them to 0
else {
attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0);
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0);
}
}
/**
* A simple but common wrapper for matching VariantContext objects using JEXL expressions
*/
public static class JexlVCMatchExp {

View File

@ -25,13 +25,13 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broad.tribble.util.variantcontext.Allele;
import org.broad.tribble.util.variantcontext.VariantContext;
import org.broad.tribble.vcf.VCFHeaderLineType;
import org.broad.tribble.vcf.VCFInfoHeaderLine;
import org.broad.tribble.vcf.VCFConstants;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
@ -51,20 +51,7 @@ public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation
return null;
Map<String, Object> map = new HashMap<String, Object>();
map.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getChromosomeCount());
if ( vc.getAlternateAlleles().size() > 0 ) {
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
for ( Allele allele : vc.getAlternateAlleles() ) {
alleleCounts.add(vc.getChromosomeCount(allele));
alleleFreqs.add((double)vc.getChromosomeCount(allele) / (double)vc.getChromosomeCount());
}
map.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts);
map.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs);
}
VariantContextUtils.calculateChromosomeCounts(vc, map, false);
return map;
}

View File

@ -243,33 +243,35 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
HashMap<String, Object> attributes = new HashMap<String, Object>(sub.getAttributes());
int alleleCount = 0;
int numberOfAlleles = 0;
VariantContextUtils.calculateChromosomeCounts(sub, attributes, false);
// because we may want to select against the chromosome count attributes,
// we need to convert them to literals instead of arrays
if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) && attributes.get(VCFConstants.ALLELE_COUNT_KEY) instanceof List ) {
List<Integer> counts = (List<Integer>)attributes.get(VCFConstants.ALLELE_COUNT_KEY);
if ( counts.size() == 1 )
attributes.put(VCFConstants.ALLELE_COUNT_KEY, counts.get(0));
}
if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) && attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY) instanceof List ) {
List<Double> freqs = (List<Double>)attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY);
if ( freqs.size() == 1 )
attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, freqs.get(0));
}
int depth = 0;
for (String sample : sub.getSampleNames()) {
Genotype g = sub.getGenotype(sample);
if (g.isNotFiltered() && g.isCalled()) {
numberOfAlleles += g.getPloidy();
if (g.isHet()) { alleleCount++; }
else if (g.isHomVar()) { alleleCount += 2; }
String dp = (String) g.getAttribute("DP");
String dp = (String) g.getAttribute(VCFConstants.DEPTH_KEY);
if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) {
depth += Integer.valueOf(dp);
}
}
}
attributes.put("AC", alleleCount);
attributes.put("AN", numberOfAlleles);
if (numberOfAlleles == 0) {
attributes.put("AF", 0.0);
} else {
attributes.put("AF", ((double) alleleCount) / ((double) numberOfAlleles));
}
attributes.put("DP", depth);
attributes.put(VCFConstants.DEPTH_KEY, depth);
sub = VariantContext.modifyAttributes(sub, attributes);