diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index b3a8dbebd..057dba1f7 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -38,13 +39,12 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import org.broadinstitute.sting.utils.variantcontext.Allele; +import org.broadinstitute.sting.gatk.walkers.Walker; +import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils; -import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; /** @@ -59,11 +59,18 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; + private Set founderIds = new HashSet(); + public Map annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( ! vc.hasGenotypes() ) return null; - return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap(), true); + return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap(), true,founderIds); + } + + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ){ + //If families were given, get the founders ids + founderIds = ((Walker)walker).getSampleDB().getFounderIds(); } public Map annotate(Map>> stratifiedContexts, VariantContext vc) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 39b351e50..0a3d5415e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -805,11 +805,22 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return chromosome count */ public int getCalledChrCount() { - int n = 0; + return getCalledChrCount(new HashSet(0)); + } - for ( final Genotype g : getGenotypes() ) { - for ( final Allele a : g.getAlleles() ) - n += a.isNoCall() ? 0 : 1; + /** + * Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS) + * + * @param sampleIds IDs of samples to take into account. If empty then all samples are included. + * @return chromosome count + */ + public int getCalledChrCount(Set sampleIds) { + int n = 0; + GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds); + + for ( final Genotype g : genotypes) { + for ( final Allele a : g.getAlleles() ) + n += a.isNoCall() ? 0 : 1; } return n; @@ -822,10 +833,22 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @return chromosome count */ public int getCalledChrCount(Allele a) { - int n = 0; + return getCalledChrCount(a,new HashSet(0)); + } - for ( final Genotype g : getGenotypes() ) { - n += g.getAlleles(a).size(); + /** + * Returns the number of chromosomes carrying allele A in the genotypes + * + * @param a allele + * @param sampleIds - IDs of samples to take into account. If empty then all samples are included. + * @return chromosome count + */ + public int getCalledChrCount(Allele a, Set sampleIds) { + int n = 0; + GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds); + + for ( final Genotype g : genotypes ) { + n += g.getAlleles(a).size(); } return n; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index de5deef57..e6da735fe 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -64,6 +64,21 @@ public class VariantContextUtils { * @return the attributes map provided as input, returned for programming convenience */ public static Map calculateChromosomeCounts(VariantContext vc, Map attributes, boolean removeStaleValues) { + return calculateChromosomeCounts(vc, attributes, removeStaleValues, new HashSet(0)); + } + + /** + * Update the attributes of the attributes map given the VariantContext to reflect the + * proper chromosome-based VCF tags + * + * @param vc the VariantContext + * @param attributes the attributes map to populate; must not be null; may contain old values + * @param removeStaleValues should we remove stale values from the mapping? + * @param founderIds - Set of founders Ids to take into account. AF and FC will be calculated over the founders. + * If empty or null, counts are generated for all samples as unrelated individuals + * @return the attributes map provided as input, returned for programming convenience + */ + public static Map calculateChromosomeCounts(VariantContext vc, Map attributes, boolean removeStaleValues, final Set founderIds) { final int AN = vc.getCalledChrCount(); // if everyone is a no-call, remove the old attributes if requested @@ -82,16 +97,20 @@ public class VariantContextUtils { // if there are alternate alleles, record the relevant tags if ( vc.getAlternateAlleles().size() > 0 ) { - final ArrayList alleleFreqs = new ArrayList(); - final ArrayList alleleCounts = new ArrayList(); + ArrayList alleleFreqs = new ArrayList(); + ArrayList alleleCounts = new ArrayList(); + ArrayList foundersAlleleCounts = new ArrayList(); + double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds); + int foundersAltChromosomes; for ( Allele allele : vc.getAlternateAlleles() ) { - int altChromosomes = vc.getCalledChrCount(allele); - alleleCounts.add(altChromosomes); + foundersAltChromosomes = vc.getCalledChrCount(allele,founderIds); + alleleCounts.add(vc.getCalledChrCount(allele)); + foundersAlleleCounts.add(foundersAltChromosomes); if ( AN == 0 ) { alleleFreqs.add("0.0"); } else { // todo -- this is a performance problem - final String freq = String.format(makePrecisionFormatStringFromDenominatorValue((double)AN), ((double)altChromosomes / (double)AN)); + final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes)); alleleFreqs.add(freq); } } @@ -116,9 +135,22 @@ public class VariantContextUtils { * @param removeStaleValues should we remove stale values from the mapping? */ public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) { - final VariantContext vc = builder.make(); - final Map attrs = calculateChromosomeCounts(vc, new HashMap(vc.getAttributes()), removeStaleValues); - builder.attributes(attrs); + VariantContext vc = builder.make(); + builder.attributes(calculateChromosomeCounts(vc, new HashMap(vc.getAttributes()), removeStaleValues, new HashSet(0))); + } + + /** + * Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper + * chromosome-based VCF tags based on the current VC produced by builder.make() + * + * @param builder the VariantContextBuilder we are updating + * @param founderIds - Set of founders to take into account. AF and FC will be calculated over the founders only. + * If empty or null, counts are generated for all samples as unrelated individuals + * @param removeStaleValues should we remove stale values from the mapping? + */ + public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues, final Set founderIds) { + VariantContext vc = builder.make(); + builder.attributes(calculateChromosomeCounts(vc, new HashMap(vc.getAttributes()), removeStaleValues, founderIds)); } public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 02026b375..497110641 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -187,4 +187,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing TDT annotation", spec); } + + @Test + public void testChromosomeCountsPed() { + final String MD5 = "32df3ceb63c277df442ed55fb8684933"; + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" + + " -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1, + Arrays.asList(MD5)); + executeTest("Testing ChromosomeCounts annotation with PED file", spec); + } + }