getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("GC", 1, VCFHeaderLineType.Integer, "GC content around the variant (see docs for window size details)")); }
public boolean useZeroQualityReads() { return false; }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
index b349be285..43ec537a4 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
@@ -51,6 +51,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
@@ -68,11 +69,16 @@ import java.util.Map;
/**
- * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
+ * Hardy-Weinberg test for disequilibrium
*
- * Requires at least 10 samples in order to run. Only genotypes with sufficient quality (>10) will be taken into account.
+ * This annotation calculates the Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium.
+ *
+ * Caveats
+ * This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.
+ * Right now we just ignore genotypes that are not confident, but this throws off our HW ratios.
+ * More analysis is needed to determine the right thing to do when the genotyper cannot decide whether a given sample is het or hom var.
*/
-public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
+public class HardyWeinberg extends InfoFieldAnnotation implements ExperimentalAnnotation {
private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
index f9663d33e..4039241ac 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
@@ -50,6 +50,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.GenomeLoc;
@@ -63,13 +64,16 @@ import java.util.List;
import java.util.Map;
/**
- * Largest contiguous homopolymer run of the variant allele in either direction on the reference.
+ * Largest contiguous homopolymer run of the variant allele
*
- * Computed only for bi-allelic sites.
+ * Calculates the length of the largest contiguous homopolymer run of the variant allele in either direction on the reference.
*
- * Note that this annotation is no longer supported, as we have found that it does not give satisfactory results. Use at your own risk!
+ * Caveats
+ * This can only be computed for bi-allelic sites.
+ * This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.
+ * This needs to be computed in a more accurate manner. We currently look only at direct runs of the alternate allele adjacent to this position.
*/
-public class HomopolymerRun extends InfoFieldAnnotation {
+public class HomopolymerRun extends InfoFieldAnnotation implements ExperimentalAnnotation {
private boolean ANNOTATE_INDELS = true;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
index 58d720899..ad974a083 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MVLikelihoodRatio.java
@@ -65,19 +65,20 @@ import org.broadinstitute.variant.variantcontext.VariantContext;
import java.util.*;
/**
- * Likelihood of the site being a mendelian violation versus the likelihood of the site transmitting according to mendelian rules.
+ * Likelihood of being a Mendelian Violation
*
- *
- * Given a variant context, uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
- * versus the likelihood of the site transmitting according to mendelian rules. This assumes that the organism is
- * diploid. When multiple trios are present, the annotation is simply the maximum of the likelihood ratios, rather than
- * the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain sites and many trios.
- *
+ * Given a variant context, this tool uses the genotype likelihoods to assess the likelihood of the site being a mendelian violation
+ * versus the likelihood of the site transmitting according to mendelian rules.
*
- * Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).
+ * Note that this annotation requires a valid ped file.
+ *
+ * Caveat
+ * This tool assumes that the organism is diploid. When multiple trios are present, the annotation is simply the maximum
+ * of the likelihood ratios, rather than the strict 1-Prod(1-p_i) calculation, as this can scale poorly for uncertain
+ * sites and many trios.
*/
-public class MVLikelihoodRatio extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
+public class MVLikelihoodRatio extends InfoFieldAnnotation implements RodRequiringAnnotation {
private MendelianViolation mendelianViolation = null;
public static final String MVLR_KEY = "MVLR";
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
index 8c401eecd..b30df04a8 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@@ -59,8 +59,12 @@ import java.util.*;
/**
- * The u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
- * Note that the mapping quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
+ * U-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities
+ *
+ * This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele).
+ *
+ * Caveat
+ * The mapping quality rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
*/
public class MappingQualityRankSumTest extends RankSumTest implements StandardAnnotation {
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
index 80bbfc2e4..6f875b23c 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
@@ -54,6 +54,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
+import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.variant.variantcontext.Genotype;
@@ -113,7 +114,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
if ( depth == 0 )
return null;
- double altAlleleLength = AverageAltAlleleLength.getMeanAltAlleleLength(vc);
+ double altAlleleLength = GATKVariantContextUtils.getMeanAltAlleleLength(vc);
double QD = -10.0 * vc.getLog10PError() / ((double)depth * altAlleleLength);
Map map = new HashMap();
map.put(getKeyNames().get(0), String.format("%.2f", QD));
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
index ae0d2a87b..182a9226f 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@@ -65,8 +65,12 @@ import org.broadinstitute.variant.variantcontext.Allele;
import java.util.*;
/**
- * The u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
- * Note that the read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
+ * U-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele
+ *
+ * This tool calculates the u-based z-approximation from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele. If the alternate allele is only seen near the ends of reads, this is indicative of error.
+ *
+ * Caveat
+ * The read position rank sum test can not be calculated for sites without a mixture of reads showing both the reference and alternate alleles.
*/
public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotation {
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
index d976592cb..332d18341 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TandemRepeatAnnotator.java
@@ -65,11 +65,13 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
-
/**
* Annotates variants that are composed of tandem repeats
*
- * Note that this annotation is currently not compatible with HaplotypeCaller.
+ * This tool outputs the number of times the tandem repeat unit is repeated, for each allele (including reference).
+ *
+ * Caveat
+ * This annotation is currently not compatible with HaplotypeCaller.
*/
public class TandemRepeatAnnotator extends InfoFieldAnnotation implements StandardAnnotation {
private static final String STR_PRESENT = "STR";
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
index f29899f7f..f8efd7c3f 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TransmissionDisequilibriumTest.java
@@ -67,10 +67,19 @@ import java.util.*;
/**
* Wittkowski transmission disequilibrium test
*
- * Note that this annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).
+ * Test statistic from Wittkowski transmission disequilibrium test.
+ * The calculation is based on the following derivation in http://en.wikipedia.org/wiki/Transmission_disequilibrium_test#A_modified_version_of_the_TDT
+ *
+ * Note that this annotation requires a valid ped file.
+ *
+ * Caveat
+ * This annotation can only be used with VariantAnnotator (not with UnifiedGenotyper or HaplotypeCaller).
+ *
+ * @author rpoplin, lfran, ebanks
+ * @since 11/14/11
*/
-public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements ExperimentalAnnotation, RodRequiringAnnotation {
+public class TransmissionDisequilibriumTest extends InfoFieldAnnotation implements RodRequiringAnnotation {
private Set trios = null;
private final static int MIN_NUM_VALID_TRIOS = 5; // don't calculate this population-level statistic if there are less than X trios with full genotype likelihood information
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java
index 89b0bcf96..555c75deb 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantType.java
@@ -50,7 +50,6 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
import org.broadinstitute.sting.utils.IndelUtils;
@@ -62,8 +61,11 @@ import java.util.*;
/**
* Assigns a roughly correct category of the variant type (SNP, MNP, insertion, deletion, etc.)
+ *
+ * This tool assigns a roughly correct category of the variant type (SNP, MNP, insertion, deletion, etc.).
+ * It also specifies whether the variant is multiallelic (>2 alleles).
*/
-public class VariantType extends InfoFieldAnnotation implements ExperimentalAnnotation {
+public class VariantType extends InfoFieldAnnotation {
public Map annotate(final RefMetaDataTracker tracker,
final AnnotatorCompatible walker,
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 7948b93a9..3f3d7123a 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -212,6 +212,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem
* the mates contig and alignment start. If this flag is provided the haplotype caller will see such reads,
* and may make use of them in assembly and calling, where possible.
*/
+ @Hidden
@Argument(fullName="includeUmappedReads", shortName="unmapped", doc="If provided, unmapped reads with chromosomal coordinates (i.e., those placed to their maps) will be included in the assembly and calling", required = false)
protected boolean includeUnmappedReads = false;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
index fbba6722e..608257b54 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@@ -46,7 +46,13 @@ import java.util.List;
/**
- * The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
+ * Allele balance per sample
+ *
+ * The allele balance is the fraction of ref bases over ref + alt bases.
+ *
+ * Caveats
+ * Note that this annotation will only work properly for biallelic het-called samples.
+ * This is an experimental annotation. As such, it is unsupported; we do not make any guarantees that it will work properly, and you use it at your own risk.
*/
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java
deleted file mode 100644
index 17a33bdca..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-*
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package org.broadinstitute.sting.gatk.walkers.annotator;
-
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
-import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
-import org.broadinstitute.variant.variantcontext.Allele;
-import org.broadinstitute.variant.variantcontext.Genotype;
-import org.broadinstitute.variant.variantcontext.GenotypesContext;
-import org.broadinstitute.variant.variantcontext.VariantContext;
-import org.broadinstitute.variant.vcf.VCFHeaderLineType;
-import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Created by IntelliJ IDEA.
- * User: chartl
- * Date: 1/3/13
- * Time: 11:36 AM
- * To change this template use File | Settings | File Templates.
- */
-public class AverageAltAlleleLength extends InfoFieldAnnotation implements ActiveRegionBasedAnnotation, ExperimentalAnnotation {
-
- public List getDescriptions() {
- return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "Average Allele Length"));
- }
-
- public List getKeyNames() { return Arrays.asList("AAL"); }
-
- public Map annotate(final RefMetaDataTracker tracker,
- final AnnotatorCompatible walker,
- final ReferenceContext ref,
- final Map stratifiedContexts,
- final VariantContext vc,
- final Map perReadAlleleLikelihoodMap ) {
- if ( !vc.hasLog10PError() )
- return null;
-
- final GenotypesContext genotypes = vc.getGenotypes();
- if ( genotypes == null || genotypes.size() == 0 )
- return null;
-
- Map map = new HashMap();
-
- double length = getMeanAltAlleleLength(vc);
- map.put(getKeyNames().get(0),String.format("%.2f",length));
- return map;
- }
-
- public static double getMeanAltAlleleLength(VariantContext vc) {
- double averageLength = 1.0;
- if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
- // adjust for the event length
- int averageLengthNum = 0;
- int averageLengthDenom = 0;
- int refLength = vc.getReference().length();
- for ( Allele a : vc.getAlternateAlleles() ) {
- int numAllele = vc.getCalledChrCount(a);
- int alleleSize;
- if ( a.length() == refLength ) {
- // SNP or MNP
- byte[] a_bases = a.getBases();
- byte[] ref_bases = vc.getReference().getBases();
- int n_mismatch = 0;
- for ( int idx = 0; idx < a_bases.length; idx++ ) {
- if ( a_bases[idx] != ref_bases[idx] )
- n_mismatch++;
- }
- alleleSize = n_mismatch;
- }
- else if ( a.isSymbolic() ) {
- alleleSize = 1;
- } else {
- alleleSize = Math.abs(refLength-a.length());
- }
- averageLengthNum += alleleSize*numAllele;
- averageLengthDenom += numAllele;
- }
- averageLength = ( (double) averageLengthNum )/averageLengthDenom;
- }
-
- return averageLength;
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java
deleted file mode 100644
index 65d2f0757..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-*
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package org.broadinstitute.sting.gatk.walkers.annotator;
-
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
-import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
-import org.broadinstitute.variant.vcf.VCFHeaderLineType;
-import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.variant.variantcontext.VariantContext;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Fraction of all reads across samples that have mapping quality zero
- */
-public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
-
- public Map annotate(final RefMetaDataTracker tracker,
- final AnnotatorCompatible walker,
- final ReferenceContext ref,
- final Map stratifiedContexts,
- final VariantContext vc,
- final Map stratifiedPerReadAlleleLikelihoodMap) {
- if ( stratifiedContexts.size() == 0 )
- return null;
-
- int mq0 = 0;
- int depth = 0;
- for ( Map.Entry sample : stratifiedContexts.entrySet() ) {
- AlignmentContext context = sample.getValue();
- depth += context.size();
- final ReadBackedPileup pileup = context.getBasePileup();
- for (PileupElement p : pileup ) {
- if ( p.getMappingQual() == 0 )
- mq0++;
- }
- }
- if (depth > 0) {
- double mq0f = (double)mq0 / (double )depth;
-
- Map map = new HashMap();
- map.put(getKeyNames().get(0), String.format("%1.4f", mq0f));
- return map;
- }
- else
- return null;
- }
-
- public List getKeyNames() { return Arrays.asList("MQ0Fraction"); }
-
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Fraction of Mapping Quality Zero Reads")); }
-}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
index 17002ba39..bc365c59c 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@@ -45,11 +45,12 @@ import java.util.*;
/**
* A set of genomic annotations based on the output of the SnpEff variant effect predictor tool
- * (http://snpeff.sourceforge.net/).
*
- * For each variant, chooses one of the effects of highest biological impact from the SnpEff
+ * See http://snpeff.sourceforge.net/ for more information on the SnpEff tool
.
+ *
+ * For each variant, this tol chooses one of the effects of highest biological impact from the SnpEff
* output file (which must be provided on the command line via --snpEffFile filename.vcf),
- * and adds annotations on that effect.
+ * and adds annotations on that effect.
*
* @author David Roazen
*/
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java
deleted file mode 100644
index dbaafb1ed..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
-* Copyright (c) 2012 The Broad Institute
-*
-* Permission is hereby granted, free of charge, to any person
-* obtaining a copy of this software and associated documentation
-* files (the "Software"), to deal in the Software without
-* restriction, including without limitation the rights to use,
-* copy, modify, merge, publish, distribute, sublicense, and/or sell
-* copies of the Software, and to permit persons to whom the
-* Software is furnished to do so, subject to the following
-* conditions:
-*
-* The above copyright notice and this permission notice shall be
-* included in all copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
-* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-*/
-
-package org.broadinstitute.sting.gatk.walkers.annotator;
-
-import org.broadinstitute.sting.commandline.Hidden;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
-import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
-import org.broadinstitute.variant.vcf.VCFHeaderLineType;
-import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.sam.ReadUtils;
-import org.broadinstitute.variant.variantcontext.VariantContext;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Counts of bases from Illumina, 454, and SOLiD at this site
- */
-@Hidden
-public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
- private String nIllumina = "NumIllumina";
- private String n454 ="Num454";
- private String nSolid = "NumSOLiD";
- private String nOther = "NumOther";
- public Map annotate(final RefMetaDataTracker tracker,
- final AnnotatorCompatible walker,
- final ReferenceContext ref,
- final Map stratifiedContexts,
- final VariantContext vc,
- final Map stratifiedPerReadAlleleLikelihoodMap) {
- if ( stratifiedContexts.size() == 0 )
- return null;
-
- int readsIllumina = 0;
- int readsSolid = 0;
- int reads454 = 0;
- int readsOther = 0;
-
- for ( Map.Entry sample : stratifiedContexts.entrySet() ) {
- AlignmentContext context = sample.getValue();
- final ReadBackedPileup pileup = context.getBasePileup();
- for ( PileupElement p : pileup ) {
- if(ReadUtils.is454Read(p.getRead()))
- reads454++;
- else if (ReadUtils.isSOLiDRead(p.getRead()))
- readsSolid++;
- else if (ReadUtils.isIlluminaRead(p.getRead()))
- readsIllumina++;
- else
- readsOther++;
- }
- }
-
- Map map = new HashMap();
- map.put(nIllumina, String.format("%d", readsIllumina));
- map.put(n454, String.format("%d", reads454));
- map.put(nSolid, String.format("%d", readsSolid));
- map.put(nOther, String.format("%d", readsOther));
- return map;
- }
-
- public List getKeyNames() { return Arrays.asList(nIllumina,n454,nSolid,nOther); }
-
- public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(nIllumina, 1, VCFHeaderLineType.Integer, "Number of Illumina reads"),
- new VCFInfoHeaderLine(n454, 1, VCFHeaderLineType.Integer, "Number of 454 reads"),
- new VCFInfoHeaderLine(nSolid, 1, VCFHeaderLineType.Integer, "Number of SOLiD reads"),
- new VCFInfoHeaderLine(nOther, 1, VCFHeaderLineType.Integer, "Number of Other technology reads")); }
-
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java
index 398b32669..627bee3ea 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variant/GATKVariantContextUtils.java
@@ -51,7 +51,6 @@ public class GATKVariantContextUtils {
public final static String MERGE_FILTER_IN_ALL = "FilteredInAll";
public final static String MERGE_INTERSECTION = "Intersection";
-
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.
@@ -97,6 +96,46 @@ public class GATKVariantContextUtils {
MIX_TYPES
}
+ /**
+ * Refactored out of the AverageAltAlleleLength annotation class
+ * @param vc the variant context
+ * @return the average length of the alt allele (a double)
+ */
+ public static double getMeanAltAlleleLength(VariantContext vc) {
+ double averageLength = 1.0;
+ if ( ! vc.isSNP() && ! vc.isSymbolic() ) {
+ // adjust for the event length
+ int averageLengthNum = 0;
+ int averageLengthDenom = 0;
+ int refLength = vc.getReference().length();
+ for ( Allele a : vc.getAlternateAlleles() ) {
+ int numAllele = vc.getCalledChrCount(a);
+ int alleleSize;
+ if ( a.length() == refLength ) {
+ // SNP or MNP
+ byte[] a_bases = a.getBases();
+ byte[] ref_bases = vc.getReference().getBases();
+ int n_mismatch = 0;
+ for ( int idx = 0; idx < a_bases.length; idx++ ) {
+ if ( a_bases[idx] != ref_bases[idx] )
+ n_mismatch++;
+ }
+ alleleSize = n_mismatch;
+ }
+ else if ( a.isSymbolic() ) {
+ alleleSize = 1;
+ } else {
+ alleleSize = Math.abs(refLength-a.length());
+ }
+ averageLengthNum += alleleSize*numAllele;
+ averageLengthDenom += numAllele;
+ }
+ averageLength = ( (double) averageLengthNum )/averageLengthDenom;
+ }
+
+ return averageLength;
+ }
+
/**
* create a genome location, given a variant context
* @param genomeLocParser parser
@@ -114,14 +153,14 @@ public class GATKVariantContextUtils {
}
/**
- * If this is a BiAlleic SNP, is it a transition?
+ * If this is a BiAllelic SNP, is it a transition?
*/
public static boolean isTransition(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSITION;
}
/**
- * If this is a BiAlleic SNP, is it a transversion?
+ * If this is a BiAllelic SNP, is it a transversion?
*/
public static boolean isTransversion(VariantContext context) {
return getSNPSubstitutionType(context) == BaseUtils.BaseSubstitutionType.TRANSVERSION;