PED support for ChromosomeCounts annotation
Signed-off-by: Eric Banks <ebanks@broadinstitute.org>
This commit is contained in:
parent
19d5213d5a
commit
219b0a128b
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -38,13 +39,12 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -59,11 +59,18 @@ public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnn
|
||||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||||
|
|
||||||
|
private Set<String> founderIds = new HashSet<String>();
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( ! vc.hasGenotypes() )
|
if ( ! vc.hasGenotypes() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true);
|
return VariantContextUtils.calculateChromosomeCounts(vc, new HashMap<String, Object>(), true,founderIds);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ){
|
||||||
|
//If families were given, get the founders ids
|
||||||
|
founderIds = ((Walker)walker).getSampleDB().getFounderIds();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(Map<String, Map<Allele, List<GATKSAMRecord>>> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -805,9 +805,20 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @return chromosome count
|
* @return chromosome count
|
||||||
*/
|
*/
|
||||||
public int getCalledChrCount() {
|
public int getCalledChrCount() {
|
||||||
int n = 0;
|
return getCalledChrCount(new HashSet<String>(0));
|
||||||
|
}
|
||||||
|
|
||||||
for ( final Genotype g : getGenotypes() ) {
|
/**
|
||||||
|
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
|
||||||
|
*
|
||||||
|
* @param sampleIds IDs of samples to take into account. If empty then all samples are included.
|
||||||
|
* @return chromosome count
|
||||||
|
*/
|
||||||
|
public int getCalledChrCount(Set<String> sampleIds) {
|
||||||
|
int n = 0;
|
||||||
|
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
|
||||||
|
|
||||||
|
for ( final Genotype g : genotypes) {
|
||||||
for ( final Allele a : g.getAlleles() )
|
for ( final Allele a : g.getAlleles() )
|
||||||
n += a.isNoCall() ? 0 : 1;
|
n += a.isNoCall() ? 0 : 1;
|
||||||
}
|
}
|
||||||
|
|
@ -822,9 +833,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
||||||
* @return chromosome count
|
* @return chromosome count
|
||||||
*/
|
*/
|
||||||
public int getCalledChrCount(Allele a) {
|
public int getCalledChrCount(Allele a) {
|
||||||
int n = 0;
|
return getCalledChrCount(a,new HashSet<String>(0));
|
||||||
|
}
|
||||||
|
|
||||||
for ( final Genotype g : getGenotypes() ) {
|
/**
|
||||||
|
* Returns the number of chromosomes carrying allele A in the genotypes
|
||||||
|
*
|
||||||
|
* @param a allele
|
||||||
|
* @param sampleIds - IDs of samples to take into account. If empty then all samples are included.
|
||||||
|
* @return chromosome count
|
||||||
|
*/
|
||||||
|
public int getCalledChrCount(Allele a, Set<String> sampleIds) {
|
||||||
|
int n = 0;
|
||||||
|
GenotypesContext genotypes = sampleIds.isEmpty() ? getGenotypes() : getGenotypes(sampleIds);
|
||||||
|
|
||||||
|
for ( final Genotype g : genotypes ) {
|
||||||
n += g.getAlleles(a).size();
|
n += g.getAlleles(a).size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,21 @@ public class VariantContextUtils {
|
||||||
* @return the attributes map provided as input, returned for programming convenience
|
* @return the attributes map provided as input, returned for programming convenience
|
||||||
*/
|
*/
|
||||||
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
|
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues) {
|
||||||
|
return calculateChromosomeCounts(vc, attributes, removeStaleValues, new HashSet<String>(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the attributes of the attributes map given the VariantContext to reflect the
|
||||||
|
* proper chromosome-based VCF tags
|
||||||
|
*
|
||||||
|
* @param vc the VariantContext
|
||||||
|
* @param attributes the attributes map to populate; must not be null; may contain old values
|
||||||
|
* @param removeStaleValues should we remove stale values from the mapping?
|
||||||
|
* @param founderIds - Set of founders Ids to take into account. AF and FC will be calculated over the founders.
|
||||||
|
* If empty or null, counts are generated for all samples as unrelated individuals
|
||||||
|
* @return the attributes map provided as input, returned for programming convenience
|
||||||
|
*/
|
||||||
|
public static Map<String, Object> calculateChromosomeCounts(VariantContext vc, Map<String, Object> attributes, boolean removeStaleValues, final Set<String> founderIds) {
|
||||||
final int AN = vc.getCalledChrCount();
|
final int AN = vc.getCalledChrCount();
|
||||||
|
|
||||||
// if everyone is a no-call, remove the old attributes if requested
|
// if everyone is a no-call, remove the old attributes if requested
|
||||||
|
|
@ -82,16 +97,20 @@ public class VariantContextUtils {
|
||||||
|
|
||||||
// if there are alternate alleles, record the relevant tags
|
// if there are alternate alleles, record the relevant tags
|
||||||
if ( vc.getAlternateAlleles().size() > 0 ) {
|
if ( vc.getAlternateAlleles().size() > 0 ) {
|
||||||
final ArrayList<String> alleleFreqs = new ArrayList<String>();
|
ArrayList<String> alleleFreqs = new ArrayList<String>();
|
||||||
final ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
|
||||||
|
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
|
||||||
|
double totalFoundersChromosomes = (double)vc.getCalledChrCount(founderIds);
|
||||||
|
int foundersAltChromosomes;
|
||||||
for ( Allele allele : vc.getAlternateAlleles() ) {
|
for ( Allele allele : vc.getAlternateAlleles() ) {
|
||||||
int altChromosomes = vc.getCalledChrCount(allele);
|
foundersAltChromosomes = vc.getCalledChrCount(allele,founderIds);
|
||||||
alleleCounts.add(altChromosomes);
|
alleleCounts.add(vc.getCalledChrCount(allele));
|
||||||
|
foundersAlleleCounts.add(foundersAltChromosomes);
|
||||||
if ( AN == 0 ) {
|
if ( AN == 0 ) {
|
||||||
alleleFreqs.add("0.0");
|
alleleFreqs.add("0.0");
|
||||||
} else {
|
} else {
|
||||||
// todo -- this is a performance problem
|
// todo -- this is a performance problem
|
||||||
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue((double)AN), ((double)altChromosomes / (double)AN));
|
final String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalFoundersChromosomes), ((double)foundersAltChromosomes / totalFoundersChromosomes));
|
||||||
alleleFreqs.add(freq);
|
alleleFreqs.add(freq);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -116,9 +135,22 @@ public class VariantContextUtils {
|
||||||
* @param removeStaleValues should we remove stale values from the mapping?
|
* @param removeStaleValues should we remove stale values from the mapping?
|
||||||
*/
|
*/
|
||||||
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
|
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
|
||||||
final VariantContext vc = builder.make();
|
VariantContext vc = builder.make();
|
||||||
final Map<String, Object> attrs = calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues);
|
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, new HashSet<String>(0)));
|
||||||
builder.attributes(attrs);
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update the attributes of the attributes map in the VariantContextBuilder to reflect the proper
|
||||||
|
* chromosome-based VCF tags based on the current VC produced by builder.make()
|
||||||
|
*
|
||||||
|
* @param builder the VariantContextBuilder we are updating
|
||||||
|
* @param founderIds - Set of founders to take into account. AF and FC will be calculated over the founders only.
|
||||||
|
* If empty or null, counts are generated for all samples as unrelated individuals
|
||||||
|
* @param removeStaleValues should we remove stale values from the mapping?
|
||||||
|
*/
|
||||||
|
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues, final Set<String> founderIds) {
|
||||||
|
VariantContext vc = builder.make();
|
||||||
|
builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, founderIds));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
public static String makePrecisionFormatStringFromDenominatorValue(double maxValue) {
|
||||||
|
|
|
||||||
|
|
@ -187,4 +187,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
||||||
executeTest("Testing TDT annotation", spec);
|
executeTest("Testing TDT annotation", spec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testChromosomeCountsPed() {
|
||||||
|
final String MD5 = "32df3ceb63c277df442ed55fb8684933";
|
||||||
|
WalkerTestSpec spec = new WalkerTestSpec(
|
||||||
|
"-T VariantAnnotator -R " + b37KGReference + " -A ChromosomeCounts --variant:vcf " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf" +
|
||||||
|
" -L " + validationDataLocation + "ug.random50000.subset300bp.chr1.family.vcf -NO_HEADER -ped " + validationDataLocation + "ug.random50000.family.ped -o %s", 1,
|
||||||
|
Arrays.asList(MD5));
|
||||||
|
executeTest("Testing ChromosomeCounts annotation with PED file", spec);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue