PED support for ChromosomeCounts annotation
Signed-off-by: Eric Banks <ebanks@broadinstitute.org>
This commit is contained in:
parent
19d5213d5a
commit
219b0a128b
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -38,13 +39,12 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -59,11 +59,18 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
|||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
|
||||
private Set<String> founderIds = new HashSet<String>();
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
|
||||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true,founderIds);
|
||||
}
|
||||
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ){
|
||||
//If families were given, get the founders ids
|
||||
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||
|
|
|
|||
|
|
@ -805,11 +805,22 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return chromosome count
|
||||
*/
|
||||
public int getCalledChrCount() {
|
||||
int n = 0;
|
||||
return getCalledChrCount(new HashSet<String>(0));
|
||||
}
|
||||
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
for ( final Allele a : g.getAlleles() )
|
||||
n += a.isNoCall() ? 0 : 1;
|
||||
/**
|
||||
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
|
||||
*
|
||||
* @param sampleIds IDs of samples to take into account. If empty then all samples are included.
|
||||
* @return chromosome count
|
||||
*/
|
||||
public int getCalledChrCount(Set<String> sampleIds) {
|
||||
int n = 0;
|
||||
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
|
||||
|
||||
for ( final Genotype g : genotypes) {
|
||||
for ( final Allele a : g.getAlleles() )
|
||||
n += a.isNoCall() ? 0 : 1;
|
||||
}
|
||||
|
||||
return n;
|
||||
|
|
@ -822,10 +833,22 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
* @return chromosome count
|
||||
*/
|
||||
public int getCalledChrCount(Allele a) {
|
||||
int n = 0;
|
||||
return getCalledChrCount(a,new HashSet<String>(0));
|
||||
}
|
||||
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
n += g.getAlleles(a).size();
|
||||
/**
|
||||
* Returns the number of chromosomes carrying allele A in the genotypes
|
||||
*
|
||||
* @param a allele
|
||||
* @param sampleIds - IDs of samples to take into account. If empty then all samples are included.
|
||||
* @return chromosome count
|
||||
*/
|
||||
public int getCalledChrCount(Allele a, Set<String> sampleIds) {
|
||||
int n = 0;
|
||||
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
|
||||
|
||||
for ( final Genotype g : genotypes ) {
|
||||
n += g.getAlleles(a).size();
|
||||
}
|
||||
|
||||
return n;
|
||||
|
|
|
|||
|
|
@ -64,6 +64,21 @@ public class VariantContextUtils {
|
|||
* @return the attributes map provided as input, returned for programming convenience
|
||||
*/
|
||||
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
|
||||
return calculateChromosomeCounts(vc, attributes, removeStaleValues, new HashSet<String>(0));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the attributes of the attributes map given the VariantContext to reflect the
|
||||
* proper chromosome-based VCF tags
|
||||
*
|
||||
* @param vc the VariantContext
|
||||
* @param attributes the attributes map to populate; must not be null; may contain old values
|
||||
* @param removeStaleValues should we remove stale values from the mapping?
|
||||
* @param founderIds - Set of founders Ids to take into account. AF and FC will be calculated over the founders.
|
||||
* If empty or null, counts are generated for all samples as unrelated individuals
|
||||
* @return the attributes map provided as input, returned for programming convenience
|
||||
*/
|
||||
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues, final Set<String> founderIds) {
|
||||
final int AN = vc.getCalledChrCount();
|
||||
|
||||
// if everyone is a no-call, remove the old attributes if requested
|
||||
|
|
@ -82,16 +97,20 @@ public class VariantContextUtils {
|
|||
|
||||
// if there are alternate alleles, record the relevant tags
|
||||
if ( vc.getAlternateAlleles().size() > 0 ) {
|
||||
final ArrayList<String> alleleFreqs = new ArrayList<String>();
|
||||
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
ArrayList<String> alleleFreqs = new ArrayList<String>();
|
||||
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
|
||||
double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds);
|
||||
int foundersAltChromosomes;
|
||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||
int altChromosomes = vc.getCalledChrCount(allele);
|
||||
alleleCounts.add(altChromosomes);
|
||||
foundersAltChromosomes = vc.getCalledChrCount(allele,founderIds);
|
||||
alleleCounts.add(vc.getCalledChrCount(allele));
|
||||
foundersAlleleCounts.add(foundersAltChromosomes);
|
||||
if ( AN == 0 ) {
|
||||
alleleFreqs.add("0.0");
|
||||
} else {
|
||||
// todo -- this is a performance problem
|
||||
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue((double)AN), ((double)altChromosomes / (double)AN));
|
||||
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes));
|
||||
alleleFreqs.add(freq);
|
||||
}
|
||||
}
|
||||
|
|
@ -116,9 +135,22 @@ public class VariantContextUtils {
|
|||
* @param removeStaleValues should we remove stale values from the mapping?
|
||||
*/
|
||||
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
|
||||
final VariantContext vc = builder.make();
|
||||
final Map<String, Object> attrs = calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues);
|
||||
builder.attributes(attrs);
|
||||
VariantContext vc = builder.make();
|
||||
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, new HashSet<String>(0)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper
|
||||
* chromosome-based VCF tags based on the current VC produced by builder.make()
|
||||
*
|
||||
* @param builder the VariantContextBuilder we are updating
|
||||
* @param founderIds - Set of founders to take into account. AF and FC will be calculated over the founders only.
|
||||
* If empty or null, counts are generated for all samples as unrelated individuals
|
||||
* @param removeStaleValues should we remove stale values from the mapping?
|
||||
*/
|
||||
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues, final Set<String> founderIds) {
|
||||
VariantContext vc = builder.make();
|
||||
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, founderIds));
|
||||
}
|
||||
|
||||
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
||||
|
|
|
|||
|
|
@ -187,4 +187,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing TDT annotation", spec);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testChromosomeCountsPed() {
|
||||
final String MD5 = "32df3ceb63c277df442ed55fb8684933";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
||||
Arrays.asList(MD5));
|
||||
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue