From acd238f3f291112e0e11507e966667224a09622c Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 8 Oct 2010 04:37:54 +0000 Subject: [PATCH] For Chris: pull out the chromosome counting code into VCUtils so that other tools can make use of it. Transitioned SelectVariants over to use it. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4456 348d0f76-0448-11de-a6fe-93d51630548a --- .../variantcontext/VariantContextUtils.java | 53 ++++++++++++++++++- .../walkers/annotator/ChromosomeCounts.java | 17 +----- .../walkers/variantutils/SelectVariants.java | 32 +++++------ 3 files changed, 71 insertions(+), 31 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index 9c983d128..9dd745935 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -46,6 +46,7 @@ public class VariantContextUtils { * @param negLog10PError qual * @param filters filters: use null for unfiltered and empty set for passes filters * @param attributes attributes + * @return VariantContext object */ public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles, Collection genotypes, double negLog10PError, Set filters, Map attributes) { return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes != null ? VariantContext.genotypeCollectionToMap(new TreeMap(), genotypes) : null, negLog10PError, filters, attributes); @@ -56,6 +57,7 @@ public class VariantContextUtils { * @param name name * @param loc location * @param alleles alleles + * @return VariantContext object */ public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles) { return new VariantContext (name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, VariantContext.NO_GENOTYPES, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); @@ -67,6 +69,7 @@ public class VariantContextUtils { * @param loc location * @param alleles alleles * @param genotypes genotypes + * @return VariantContext object */ public static VariantContext toVC(String name, GenomeLoc loc, Collection alleles, Collection genotypes) { return new VariantContext(name, loc.getContig(), loc.getStart(), loc.getStop(), alleles, genotypes, InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null); @@ -76,12 +79,60 @@ public class VariantContextUtils { * Copy constructor * * @param other the VariantContext to copy + * @return VariantContext object */ public static VariantContext toVC(VariantContext other) { return new VariantContext(other.getName(), other.getChr(), other.getStart(), other.getEnd(), other.getAlleles(), other.getGenotypes(), other.getNegLog10PError(), other.getFilters(), other.getAttributes()); } - /** + /** + * Update the attributes of the attributes map given the VariantContext to reflect the proper chromosome-based VCF tags + * + * @param vc the VariantContext + * @param attributes the attributes map to populate; must not be null; may contain old values + * @param removeStaleValues should we remove stale values from the mapping? + */ + public static void calculateChromosomeCounts(VariantContext vc, Map attributes, boolean removeStaleValues) { + // if everyone is a no-call, remove the old attributes if requested + if ( vc.getChromosomeCount() == 0 && removeStaleValues ) { + if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) + attributes.remove(VCFConstants.ALLELE_COUNT_KEY); + if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) + attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); + if ( attributes.containsKey(VCFConstants.ALLELE_NUMBER_KEY) ) + attributes.remove(VCFConstants.ALLELE_NUMBER_KEY); + return; + } + + attributes.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getChromosomeCount()); + + // if there are alternate alleles, record the relevant tags + if ( vc.getAlternateAlleles().size() > 0 ) { + ArrayList alleleFreqs = new ArrayList(); + ArrayList alleleCounts = new ArrayList(); + for ( Allele allele : vc.getAlternateAlleles() ) { + alleleCounts.add(vc.getChromosomeCount(allele)); + alleleFreqs.add((double)vc.getChromosomeCount(allele) / (double)vc.getChromosomeCount()); + } + + attributes.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts); + attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs); + } + // otherwise, remove them if present and requested + else if ( removeStaleValues ) { + if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) ) + attributes.remove(VCFConstants.ALLELE_COUNT_KEY); + if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) + attributes.remove(VCFConstants.ALLELE_FREQUENCY_KEY); + } + // otherwise, set them to 0 + else { + attributes.put(VCFConstants.ALLELE_COUNT_KEY, 0); + attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, 0.0); + } + } + + /** * A simple but common wrapper for matching VariantContext objects using JEXL expressions */ public static class JexlVCMatchExp { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 7114e7dbe..ba3909244 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,13 +25,13 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.broad.tribble.util.variantcontext.Allele; import org.broad.tribble.util.variantcontext.VariantContext; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; @@ -51,20 +51,7 @@ public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation return null; Map map = new HashMap(); - map.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getChromosomeCount()); - - if ( vc.getAlternateAlleles().size() > 0 ) { - ArrayList alleleFreqs = new ArrayList(); - ArrayList alleleCounts = new ArrayList(); - for ( Allele allele : vc.getAlternateAlleles() ) { - alleleCounts.add(vc.getChromosomeCount(allele)); - alleleFreqs.add((double)vc.getChromosomeCount(allele) / (double)vc.getChromosomeCount()); - } - - map.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts); - map.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs); - } - + VariantContextUtils.calculateChromosomeCounts(vc, map, false); return map; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index 26ec24fd6..12cb31322 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -243,33 +243,35 @@ public class SelectVariants extends RodWalker { HashMap attributes = new HashMap(sub.getAttributes()); - int alleleCount = 0; - int numberOfAlleles = 0; + VariantContextUtils.calculateChromosomeCounts(sub, attributes, false); + + // because we may want to select against the chromosome count attributes, + // we need to convert them to literals instead of arrays + if ( attributes.containsKey(VCFConstants.ALLELE_COUNT_KEY) && attributes.get(VCFConstants.ALLELE_COUNT_KEY) instanceof List ) { + List counts = (List)attributes.get(VCFConstants.ALLELE_COUNT_KEY); + if ( counts.size() == 1 ) + attributes.put(VCFConstants.ALLELE_COUNT_KEY, counts.get(0)); + } + if ( attributes.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) && attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY) instanceof List ) { + List freqs = (List)attributes.get(VCFConstants.ALLELE_FREQUENCY_KEY); + if ( freqs.size() == 1 ) + attributes.put(VCFConstants.ALLELE_FREQUENCY_KEY, freqs.get(0)); + } + int depth = 0; for (String sample : sub.getSampleNames()) { Genotype g = sub.getGenotype(sample); if (g.isNotFiltered() && g.isCalled()) { - numberOfAlleles += g.getPloidy(); - if (g.isHet()) { alleleCount++; } - else if (g.isHomVar()) { alleleCount += 2; } - - String dp = (String) g.getAttribute("DP"); + String dp = (String) g.getAttribute(VCFConstants.DEPTH_KEY); if (dp != null && ! dp.equals(VCFConstants.MISSING_DEPTH_v3) && ! dp.equals(VCFConstants.MISSING_VALUE_v4) ) { depth += Integer.valueOf(dp); } } } - attributes.put("AC", alleleCount); - attributes.put("AN", numberOfAlleles); - if (numberOfAlleles == 0) { - attributes.put("AF", 0.0); - } else { - attributes.put("AF", ((double) alleleCount) / ((double) numberOfAlleles)); - } - attributes.put("DP", depth); + attributes.put(VCFConstants.DEPTH_KEY, depth); sub = VariantContext.modifyAttributes(sub, attributes);