PED support for ChromosomeCounts annotation

Signed-off-by: Eric Banks <ebanks@broadinstitute.org>
This commit is contained in:
Laurent Francioli 2012-04-25 17:52:23 +02:00 committed by Eric Banks
parent 19d5213d5a
commit 219b0a128b
4 changed files with 93 additions and 20 deletions

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -38,13 +39,12 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.*;
/**
@ -59,11 +59,18 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
private Set<String> founderIds = new HashSet<String>();
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() )
return null;
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true,founderIds);
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ){
//If families were given, get the founders ids
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
}
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {

View File

@ -805,11 +805,22 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return chromosome count
*/
public int getCalledChrCount() {
int n = 0;
return getCalledChrCount(new HashSet<String>(0));
}
for ( final Genotype g : getGenotypes() ) {
for ( final Allele a : g.getAlleles() )
n += a.isNoCall() ? 0 : 1;
/**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
*
* @param sampleIds IDs of samples to take into account. If empty then all samples are included.
* @return chromosome count
*/
public int getCalledChrCount(Set<String> sampleIds) {
int n = 0;
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
for ( final Genotype g : genotypes) {
for ( final Allele a : g.getAlleles() )
n += a.isNoCall() ? 0 : 1;
}
return n;
@ -822,10 +833,22 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @return chromosome count
*/
public int getCalledChrCount(Allele a) {
int n = 0;
return getCalledChrCount(a,new HashSet<String>(0));
}
for ( final Genotype g : getGenotypes() ) {
n += g.getAlleles(a).size();
/**
* Returns the number of chromosomes carrying allele A in the genotypes
*
* @param a allele
* @param sampleIds - IDs of samples to take into account. If empty then all samples are included.
* @return chromosome count
*/
public int getCalledChrCount(Allele a, Set<String> sampleIds) {
int n = 0;
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
for ( final Genotype g : genotypes ) {
n += g.getAlleles(a).size();
}
return n;

View File

@ -64,6 +64,21 @@ public class VariantContextUtils {
* @return the attributes map provided as input, returned for programming convenience
*/
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
return calculateChromosomeCounts(vc, attributes, removeStaleValues, new HashSet<String>(0));
}
/**
* Update the attributes of the attributes map given the VariantContext to reflect the
* proper chromosome-based VCF tags
*
* @param vc the VariantContext
* @param attributes the attributes map to populate; must not be null; may contain old values
* @param removeStaleValues should we remove stale values from the mapping?
* @param founderIds - Set of founders Ids to take into account. AF and FC will be calculated over the founders.
* If empty or null, counts are generated for all samples as unrelated individuals
* @return the attributes map provided as input, returned for programming convenience
*/
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues, final Set<String> founderIds) {
final int AN = vc.getCalledChrCount();
// if everyone is a no-call, remove the old attributes if requested
@ -82,16 +97,20 @@ public class VariantContextUtils {
// if there are alternate alleles, record the relevant tags
if ( vc.getAlternateAlleles().size() > 0 ) {
final ArrayList<String> alleleFreqs = new ArrayList<String>();
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
ArrayList<String> alleleFreqs = new ArrayList<String>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds);
int foundersAltChromosomes;
for ( Allele allele : vc.getAlternateAlleles() ) {
int altChromosomes = vc.getCalledChrCount(allele);
alleleCounts.add(altChromosomes);
foundersAltChromosomes = vc.getCalledChrCount(allele,founderIds);
alleleCounts.add(vc.getCalledChrCount(allele));
foundersAlleleCounts.add(foundersAltChromosomes);
if ( AN == 0 ) {
alleleFreqs.add("0.0");
} else {
// todo -- this is a performance problem
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue((double)AN), ((double)altChromosomes / (double)AN));
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes));
alleleFreqs.add(freq);
}
}
@ -116,9 +135,22 @@ public class VariantContextUtils {
* @param removeStaleValues should we remove stale values from the mapping?
*/
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
final VariantContext vc = builder.make();
final Map<String, Object> attrs = calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues);
builder.attributes(attrs);
VariantContext vc = builder.make();
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, new HashSet<String>(0)));
}
/**
* Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper
* chromosome-based VCF tags based on the current VC produced by builder.make()
*
* @param builder the VariantContextBuilder we are updating
* @param founderIds - Set of founders to take into account. AF and FC will be calculated over the founders only.
* If empty or null, counts are generated for all samples as unrelated individuals
* @param removeStaleValues should we remove stale values from the mapping?
*/
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues, final Set<String> founderIds) {
VariantContext vc = builder.make();
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, founderIds));
}
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {

View File

@ -187,4 +187,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
executeTest("Testing TDT annotation", spec);
}
@Test
public void testChromosomeCountsPed() {
final String MD5 = "32df3ceb63c277df442ed55fb8684933";
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
Arrays.asList(MD5));
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
}
}