Cleaned up annotations

- Moved AverageAltAlleleLength, MappingQualityZeroFraction and TechnologyComposition to Private
  - VariantType, TransmissionDisequilibriumTest, MVLikelihoodRatio and GCContent are no longer Experimental
  - AlleleBalanceBySample, HardyWeinberg and HomopolymerRun are Experimental and available to users with a big bold caveat message
  - Refactored getMeanAltAlleleLength() out of AverageAltAlleleLength into GATKVariantContextUtils in order to make QualByDepth independent of where AverageAltAlleleLength lives
  - Unrelated change, bundled in for convenience: made HC argument includeUnmappedreads @Hidden
  - Removed unnecessary check in AverageAltAlleleLength
This commit is contained in:
Geraldine Van der Auwera 2013-03-12 16:32:59 -04:00
parent acaa96f853
commit 61349ecefa
20 changed files with 148 additions and 353 deletions

View File

@ -58,8 +58,12 @@ import java.util.*;
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele).
* Note that the base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
* U-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities
*
* <p>This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for base qualities(ref bases vs. bases of the alternate allele).</p>
*
* <h3>Caveat</h3>
* <p>The base quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
*/
public class BaseQualityRankSumTest extends RankSumTest implements StandardAnnotation {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

View File

@ -65,9 +65,15 @@ import java.util.*;
/**
* Allele count in genotypes, for each ALT allele, in the same order as listed;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
* Allele counts and frequency for each ALT allele and total number of alleles in called genotypes
*
* <p>This annotation tool outputs the following:
*
* <ul>
* <li>Allele count in genotypes, for each ALT allele, in the same order as listed</li>
* <li>Allele Frequency, for each ALT allele, in the same order as listed</li>
* <li>Total number of alleles in called genotypes</li>
* </ul></p>
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {

View File

@ -57,14 +57,15 @@ import org.broadinstitute.variant.variantcontext.Allele;
import java.util.*;
/**
* Created with IntelliJ IDEA.
* User: rpoplin
* Date: 6/28/12
*/
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases (reads with ref bases vs. those with the alternate allele)
* Note that the clipping rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
* U-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases
*
* <p>This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for reads with clipped bases (reads with ref bases vs. those with the alternate allele).</p>
*
* <h3>Caveat</h3>
* <p>The clipping rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
*
* @author rpoplin
* @since 6/28/12
*/
public class ClippingRankSumTest extends RankSumTest {

View File

@ -68,9 +68,16 @@ import java.util.Map;
/**
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
* GC content of the reference around this site
*
* <p>The GC content is the number of GC bases relative to the total number of bases (# GC bases / # all bases) around this site on the reference.</p>
*
* <h3>Caveat</h3>
* <p>The window size used to calculate the GC content around the site is set by the tool used for annotation
* (currently UnifiedGenotyper, HaplotypeCaller or VariantAnnotator). See the Technical Document for each tool
* to find out what window size they use.</p>
*/
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public class GCContent extends InfoFieldAnnotation {
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
@ -86,7 +93,7 @@ public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnota
public List<String> getKeyNames() { return Arrays.asList("GC"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("GC", 1, VCFHeaderLineType.Integer, "GC content within 20 bp +/- the variant")); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("GC", 1, VCFHeaderLineType.Integer, "GC content around the variant (see docs for window size details)")); }
public boolean useZeroQualityReads() { return false; }

View File

@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
@ -68,11 +69,16 @@ import java.util.Map;
/**
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
* Hardy-Weinberg test for disequilibrium
*
* <p>Requires at least 10 samples in order to run. Only genotypes with sufficient quality (>10) will be taken into account.</p>
* <p>This annotation calculates the Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium.</p>
*
* <h3>Caveats</h3>
* <h4>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</h4>
* <p>Right now we just ignore genotypes that are not confident, but this throws off our HW ratios.
* More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is het or hom var.</p>
*/
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10;

View File

@ -50,6 +50,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -63,13 +64,16 @@ import java.util.List;
import java.util.Map;
/**
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
* Largest contiguous homopolymer run of the variant allele
*
* <p>Computed only for bi-allelic sites.</p>
* <p>Calculates the length of the largest contiguous homopolymer run of the variant allele in either direction on the reference.</p>
*
* <h3>Note that this annotation is no longer supported, as we have found that it does not give satisfactory results. Use at your own risk!</h3>
* <h3>Caveats</h3>
* <p>This can only be computed for bi-allelic sites.</p>
* <h4>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</h4>
* <p>This needs to be computed in a more accurate manner. We currently look only at direct runs of the alternate allele adjacent to this position.</p>
*/
public class HomopolymerRun extends InfoFieldAnnotation {
public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalAnnotation {
private boolean ANNOTATE_INDELS = true;

View File

@ -65,19 +65,20 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.*;
/**
* Likelihood of the site being a mendelian violation versus the likelihood of the site transmitting according to mendelian rules.
* Likelihood of being a Mendelian Violation
*
* <p>
* Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
* versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
* diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
* the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
* </p>
* <p>Given a variant context, this tool uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
* versus the likelihood of the site transmitting according to mendelian rules. </p>
*
* <p>Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).</p>
* <p>Note that this annotation requires a valid ped file.</p>
*
* <h3>Caveat</h3>
* <p>This tool assumes that the organism is diploid. When multiple trios are present, the annotation is simply the maximum
* of the likelihood ratios, rather than the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain
* sites and many trios.</p>
*/
public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR";

View File

@ -59,8 +59,12 @@ import java.util.*;
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
* Note that the mapping quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
* U-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities
*
* <p>This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele).</p>
*
* <h3>Caveat</h3>
* <p>The mapping quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
*/
public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation {

View File

@ -54,6 +54,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.variantcontext.Genotype;
@ -113,7 +114,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( depth == 0 )
return null;
double altAlleleLength = AverageAltAlleleLength.getMeanAltAlleleLength(vc);
double altAlleleLength = GATKVariantContextUtils.getMeanAltAlleleLength(vc);
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", QD));

View File

@ -65,8 +65,12 @@ import org.broadinstitute.variant.variantcontext.Allele;
import java.util.*;
/**
* The u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
* Note that the read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
* U-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele
*
* <p>This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele. If the alternate allele is only seen near the ends of reads, this is indicative of error.</p>
*
* <h3>Caveat</h3>
* <p>The read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.</p>
*/
public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation {

View File

@ -65,11 +65,13 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Annotates variants that are composed of tandem repeats
*
* <p>Note that this annotation is currently not compatible with HaplotypeCaller.</p>
* <p>This tool outputs the number of times the tandem repeat unit is repeated, for each allele (including reference).</p>
*
* <h2>Caveat</h2>
* <p>This annotation is currently not compatible with HaplotypeCaller.</p>
*/
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
private static final String STR_PRESENT = "STR";

View File

@ -67,10 +67,19 @@ import java.util.*;
/**
* Wittkowski transmission disequilibrium test
*
* <p>Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).</p>
* <p>Test statistic from Wittkowski transmission disequilibrium test.
* The calculation is based on the following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT</p>
*
* <p>Note that this annotation requires a valid ped file.</p>
*
* <h3>Caveat</h3>
* <p>This annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).</p>
*
* @author rpoplin, lfran, ebanks
* @since 11/14/11
*/
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
private Set<Sample> trios = null;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information

View File

@ -50,7 +50,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.IndelUtils;
@ -62,8 +61,11 @@ import java.util.*;
/**
* Assigns a roughly correct category of the variant type (SNP, MNP, insertion, deletion, etc.)
*
* <p>This tool assigns a roughly correct category of the variant type (SNP, MNP, insertion, deletion, etc.).
* It also specifies whether the variant is multiallelic (>2 alleles).</p>
*/
public class VariantType extends InfoFieldAnnotation implements ExperimentalAnnotation {
public class VariantType extends InfoFieldAnnotation {
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,

View File

@ -212,6 +212,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
* the mates contig and alignment start. If this flag is provided the haplotype caller will see such reads,
* and may make use of them in assembly and calling, where possible.
*/
@Hidden
@Argument(fullName="includeUmappedReads", shortName="unmapped", doc="If provided, unmapped reads with chromosomal coordinates (i.e., those placed to their maps) will be included in the assembly and calling", required = false)
protected boolean includeUnmappedReads = false;

View File

@ -46,7 +46,13 @@ import java.util.List;
/**
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
* Allele balance per sample
*
* <p>The allele balance is the fraction of ref bases over ref + alt bases.</p>
*
* <h3>Caveats</h3>
* <p>Note that this annotation will only work properly for biallelic het-called samples.</p>
* <h4>This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.</h4>
*/
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {

View File

@ -1,117 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.variantcontext.Allele;
import org.broadinstitute.variant.variantcontext.Genotype;
import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 1/3/13
* Time: 11:36 AM
* To change this template use File | Settings | File Templates.
*/
public class AverageAltAlleleLength extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation, ExperimentalAnnotation {
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Average Allele Length"));
}
public List<String> getKeyNames() { return Arrays.asList("AAL"); }
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap ) {
if ( !vc.hasLog10PError() )
return null;
final GenotypesContext genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() == 0 )
return null;
Map<String, Object> map = new HashMap<String, Object>();
double length = getMeanAltAlleleLength(vc);
map.put(getKeyNames().get(0),String.format("%.2f",length));
return map;
}
public static double getMeanAltAlleleLength(VariantContext vc) {
double averageLength = 1.0;
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
// adjust for the event length
int averageLengthNum = 0;
int averageLengthDenom = 0;
int refLength = vc.getReference().length();
for ( Allele a : vc.getAlternateAlleles() ) {
int numAllele = vc.getCalledChrCount(a);
int alleleSize;
if ( a.length() == refLength ) {
// SNP or MNP
byte[] a_bases = a.getBases();
byte[] ref_bases = vc.getReference().getBases();
int n_mismatch = 0;
for ( int idx = 0; idx < a_bases.length; idx++ ) {
if ( a_bases[idx] != ref_bases[idx] )
n_mismatch++;
}
alleleSize = n_mismatch;
}
else if ( a.isSymbolic() ) {
alleleSize = 1;
} else {
alleleSize = Math.abs(refLength-a.length());
}
averageLengthNum += alleleSize*numAllele;
averageLengthDenom += numAllele;
}
averageLength = ( (double) averageLengthNum )/averageLengthDenom;
}
return averageLength;
}
}

View File

@ -1,85 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Fraction of all reads across samples that have mapping quality zero
*/
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( stratifiedContexts.size() == 0 )
return null;
int mq0 = 0;
int depth = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
depth += context.size();
final ReadBackedPileup pileup = context.getBasePileup();
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;
}
}
if (depth > 0) {
double mq0f = (double)mq0 / (double )depth;
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%1.4f", mq0f));
return map;
}
else
return null;
}
public List<String> getKeyNames() { return Arrays.asList("MQ0Fraction"); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Fraction of Mapping Quality Zero Reads")); }
}

View File

@ -45,11 +45,12 @@ import java.util.*;
/**
* A set of genomic annotations based on the output of the SnpEff variant effect predictor tool
* (http://snpeff.sourceforge.net/).
*
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
* <p>See <a href='http://snpeff.sourceforge.net/'>http://snpeff.sourceforge.net/</a> for more information on the SnpEff tool</p>.
*
* <p>For each variant, this tol chooses one of the effects of highest biological impact from the SnpEff
* output file (which must be provided on the command line via --snpEffFile filename.vcf),
* and adds annotations on that effect.
* and adds annotations on that effect.</p>
*
* @author David Roazen
*/

View File

@ -1,101 +0,0 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.ReadUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Counts of bases from Illumina, 454, and SOLiD at this site
*/
@Hidden
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
private String nIllumina = "NumIllumina";
private String n454 ="Num454";
private String nSolid = "NumSOLiD";
private String nOther = "NumOther";
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
final ReferenceContext ref,
final Map<String, AlignmentContext> stratifiedContexts,
final VariantContext vc,
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
if ( stratifiedContexts.size() == 0 )
return null;
int readsIllumina = 0;
int readsSolid = 0;
int reads454 = 0;
int readsOther = 0;
for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
AlignmentContext context = sample.getValue();
final ReadBackedPileup pileup = context.getBasePileup();
for ( PileupElement p : pileup ) {
if(ReadUtils.is454Read(p.getRead()))
reads454++;
else if (ReadUtils.isSOLiDRead(p.getRead()))
readsSolid++;
else if (ReadUtils.isIlluminaRead(p.getRead()))
readsIllumina++;
else
readsOther++;
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(nIllumina, String.format("%d", readsIllumina));
map.put(n454, String.format("%d", reads454));
map.put(nSolid, String.format("%d", readsSolid));
map.put(nOther, String.format("%d", readsOther));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(nIllumina,n454,nSolid,nOther); }
public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(nIllumina, 1, VCFHeaderLineType.Integer, "Number of Illumina reads"),
new VCFInfoHeaderLine(n454, 1, VCFHeaderLineType.Integer, "Number of 454 reads"),
new VCFInfoHeaderLine(nSolid, 1, VCFHeaderLineType.Integer, "Number of SOLiD reads"),
new VCFInfoHeaderLine(nOther, 1, VCFHeaderLineType.Integer, "Number of Other technology reads")); }
}

View File

@ -51,7 +51,6 @@ public class GATKVariantContextUtils {
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_INTERSECTION = "Intersection";
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@ -97,6 +96,46 @@ public class GATKVariantContextUtils {
MIX_TYPES
}
/**
* Refactored out of the AverageAltAlleleLength annotation class
* @param vc the variant context
* @return the average length of the alt allele (a double)
*/
public static double getMeanAltAlleleLength(VariantContext vc) {
double averageLength = 1.0;
if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
// adjust for the event length
int averageLengthNum = 0;
int averageLengthDenom = 0;
int refLength = vc.getReference().length();
for ( Allele a : vc.getAlternateAlleles() ) {
int numAllele = vc.getCalledChrCount(a);
int alleleSize;
if ( a.length() == refLength ) {
// SNP or MNP
byte[] a_bases = a.getBases();
byte[] ref_bases = vc.getReference().getBases();
int n_mismatch = 0;
for ( int idx = 0; idx < a_bases.length; idx++ ) {
if ( a_bases[idx] != ref_bases[idx] )
n_mismatch++;
}
alleleSize = n_mismatch;
}
else if ( a.isSymbolic() ) {
alleleSize = 1;
} else {
alleleSize = Math.abs(refLength-a.length());
}
averageLengthNum += alleleSize*numAllele;
averageLengthDenom += numAllele;
}
averageLength = ( (double) averageLengthNum )/averageLengthDenom;
}
return averageLength;
}
/**
* create a genome location, given a variant context
* @param genomeLocParser parser
@ -114,14 +153,14 @@ public class GATKVariantContextUtils {
}
/**
* If this is a BiAlleic SNP, is it a transition?
* If this is a BiAllelic SNP, is it a transition?
*/
public static boolean isTransition(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION;
}
/**
* If this is a BiAlleic SNP, is it a transversion?
* If this is a BiAllelic SNP, is it a transversion?
*/
public static boolean isTransversion(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;