Merge branch 'stable'

This commit is contained in:
Mark DePristo 2011-09-23 09:04:52 -04:00
commit 27ce3c822e
33 changed files with 403 additions and 331 deletions

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
* @author Your Name
* @since Date created
*/
@Hidden
public class GATKDocsExample extends RodWalker<Integer, Integer> {
/**
* Put detailed documentation about the argument here. No need to duplicate the summary information

View File

@ -43,6 +43,9 @@ import java.util.List;
import java.util.Map;
/**
* The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
*/
public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
*/
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {

View File

@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.Map;
/**
* Abstract base class for all annotations that are normalized by depth
*/
public abstract class AnnotationByDepth extends InfoFieldAnnotation {

View File

@ -47,6 +47,9 @@ import java.util.List;
import java.util.Map;
/**
* Count of A, C, G, T bases across all samples
*/
public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
*/
public class BaseQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

View File

@ -44,6 +44,11 @@ import java.util.List;
import java.util.Map;
/**
* Allele count in genotypes, for each ALT allele, in the same order as listed;
* allele Frequency, for each ALT allele, in the same order as listed; total number
* of alleles in called genotypes.
*/
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };

View File

@ -16,7 +16,12 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Total (unfiltered) depth over all samples.
*
* Affected by downsampling (-dcov) though, so the max value one can obtain for N samples with -dcov D
* is N * D
*/
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -23,6 +23,25 @@ import java.util.List;
import java.util.Map;
/**
* The depth of coverage of each VCF allele in this sample
*
* Complementary fields that two important ways of thinking about the depth of the data for this sample
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
* because the AD includes reads and bases that were filtered by the Unified Genotyper, one should not base
* assumptions about the underlying genotype based on it; instead, the genotype likelihoods (PLs) are what
* determine the genotype calls (see below).
*/
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static String REF_ALLELE = "REF";

View File

@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
* being seen on only the forward or only the reverse strand) in the reads? More bias is
* indicative of false positive calls.
*/
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map;
/**
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
*/
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
*/
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map;
/**
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
*/
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10;

View File

@ -16,7 +16,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
*/
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
private boolean ANNOTATE_INDELS = true;

View File

@ -17,14 +17,15 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 5/16/11
*/
// A set of annotations calculated directly from the GLs
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
/**
* Likelihood-based (using PL field) test for the inbreeding among samples.
*
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
* more information.
*/
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10;

View File

@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: Mar 11, 2011
* Time: 11:47:33 AM
* To change this template use File | Settings | File Templates.
* Rough category of indel type (insertion, deletion, multi-allelic, other)
*/
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map;
/**
* Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
*/
public class LowMQ extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
import java.util.List;
/**
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
*/
public class MappingQualityRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }

View File

@ -19,6 +19,9 @@ import java.util.List;
import java.util.Map;
/**
* Total count across all samples of mapping quality zero reads
*/
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,85 +1,81 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Feb 4, 2011
* Time: 6:46:25 PM
* To change this template use File | Settings | File Templates.
*/
public class MappingQualityZeroBySample extends GenotypeAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker,
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
int mq0 = 0;
ReadBackedPileup pileup = null;
if (vc.isIndel() && context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
else return null;
if (pileup != null) {
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", mq0));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
new VCFFormatHeaderLine(getKeyNames().get(0), 1,
VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
}
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Count for each sample of mapping quality zero reads
*/
public class MappingQualityZeroBySample extends GenotypeAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker,
AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
int mq0 = 0;
ReadBackedPileup pileup = null;
if (vc.isIndel() && context.hasExtendedEventPileup())
pileup = context.getExtendedEventPileup();
else if (context.hasBasePileup())
pileup = context.getBasePileup();
else return null;
if (pileup != null) {
for (PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 )
mq0++;
}
}
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%d", mq0));
return map;
}
public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
new VCFFormatHeaderLine(getKeyNames().get(0), 1,
VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
}

View File

@ -17,8 +17,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Fraction of all reads across samples that have mapping quality zero
*/
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,11 +17,8 @@ import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 5/16/11
* The number of N bases, counting only SOLiD data
*/
public class NBaseCount extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if( stratifiedContexts.size() == 0 )

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -15,7 +16,11 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
*
* Low scores are indicative of false positive calls and artifacts.
*/
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,6 +21,9 @@ import java.util.List;
import java.util.Map;
/**
* Root Mean Square of the mapping quality of the reads across all samples.
*/
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -21,7 +21,9 @@ import java.util.List;
import java.util.Map;
/**
* Abstract root for all RankSum based annotations
*/
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
static final boolean DEBUG = false;

View File

@ -1,209 +1,207 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: asivache
* Date: Feb 4, 2011
* Time: 3:59:27 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc);
return null;
}
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
if ( ! stratifiedContext.hasBasePileup() ) return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlternateAlleles() )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
int mq0 = 0; // number of "ref" reads that are acually mq0
for ( PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
// we need to add counts in the correct order
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
}
map.put(getKeyNames().get(1), fracs);
return map;
}
private Map<String,Object> annotateIndel(AlignmentContext
stratifiedContext, VariantContext
vc) {
if ( ! stratifiedContext.hasExtendedEventPileup() ) {
return null;
}
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
return null;
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0
HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
}
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( e.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( e.isInsertion() ) {
final String b = e.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
} else {
if ( e.isDeletion() ) {
if ( e.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
}
// else {
// System.out.print(" deletion of WRONG length found");
// }
}
}
}
if ( mq0 == totalDepth ) return map;
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
fracs[i] = String.format("%.3f",
((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
map.put(getKeyNames().get(1), fracs);
//map.put(getKeyNames().get(0), counts);
return map;
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
}
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Unsupported
*/
@Hidden
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;
if ( vc.isSNP() )
return annotateSNP(stratifiedContext, vc);
if ( vc.isIndel() )
return annotateIndel(stratifiedContext, vc);
return null;
}
private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
if ( ! stratifiedContext.hasBasePileup() ) return null;
HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
for ( Allele allele : vc.getAlternateAlleles() )
alleleCounts.put(allele.getBases()[0], 0);
ReadBackedPileup pileup = stratifiedContext.getBasePileup();
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
int mq0 = 0; // number of "ref" reads that are acually mq0
for ( PileupElement p : pileup ) {
if ( p.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
}
if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
// we need to add counts in the correct order
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
}
map.put(getKeyNames().get(1), fracs);
return map;
}
private Map<String,Object> annotateIndel(AlignmentContext
stratifiedContext, VariantContext
vc) {
if ( ! stratifiedContext.hasExtendedEventPileup() ) {
return null;
}
ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
if ( pileup == null )
return null;
int totalDepth = pileup.size();
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
if ( totalDepth == 0 ) return map;
int mq0 = 0; // number of "ref" reads that are acually mq0
HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
Allele refAllele = vc.getReference();
for ( Allele allele : vc.getAlternateAlleles() ) {
if ( allele.isNoCall() ) {
continue; // this does not look so good, should we die???
}
alleleCounts.put(getAlleleRepresentation(allele), 0);
}
for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
if ( e.getMappingQual() == 0 ) {
mq0++;
continue;
}
if ( e.isInsertion() ) {
final String b = e.getEventBases();
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
} else {
if ( e.isDeletion() ) {
if ( e.getEventLength() == refAllele.length() ) {
// this is indeed the deletion allele recorded in VC
final String b = DEL;
if ( alleleCounts.containsKey(b) ) {
alleleCounts.put(b, alleleCounts.get(b)+1);
}
}
// else {
// System.out.print(" deletion of WRONG length found");
// }
}
}
}
if ( mq0 == totalDepth ) return map;
String[] fracs = new String[alleleCounts.size()];
for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
fracs[i] = String.format("%.3f",
((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
map.put(getKeyNames().get(1), fracs);
//map.put(getKeyNames().get(0), counts);
return map;
}
private String getAlleleRepresentation(Allele allele) {
if ( allele.isNull() ) { // deletion wrt the ref
return DEL;
} else { // insertion, pass actual bases
return allele.getBaseString();
}
}
// public String getIndelBases()
public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
public List<VCFFormatHeaderLine> getDescriptions() {
return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
1,
VCFHeaderLineType.Integer,
"Total read depth per sample, including MQ0"),
new VCFFormatHeaderLine(getKeyNames().get(1),
VCFHeaderLineCount.UNBOUNDED,
VCFHeaderLineType.Float,
"Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
}
}

View File

@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: 3/30/11
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
*/
public class ReadPosRankSumTest extends RankSumTest {
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }

View File

@ -15,8 +15,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* SB annotation value by depth of alt containing samples
*/
public class SBByDepth extends AnnotationByDepth {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -41,7 +41,9 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* List all of the samples in the info field
*/
public class SampleList extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -17,6 +17,9 @@ import java.util.List;
import java.util.Map;
/**
* Fraction of reads containing spanning deletions at this site.
*/
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -19,12 +20,9 @@ import java.util.List;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: delangel
* Date: 6/29/11
* Time: 3:14 PM
* To change this template use File | Settings | File Templates.
* Counts of bases from SLX, 454, and SOLiD at this site
*/
@Hidden
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
private String nSLX = "NumSLX";
private String n454 ="Num454";

View File

@ -42,6 +42,7 @@ import java.util.*;
*
* <p>Body test</p>
*/
@Hidden
public class DocumentationTest extends RodWalker<Integer, Integer> {
// the docs for the arguments are in the collection
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();

View File

@ -66,7 +66,8 @@ public class GenericDocumentationHandler extends DocumentedGATKFeatureHandler {
public boolean includeInDocs(ClassDoc doc) {
try {
Class type = HelpUtils.getClassForDoc(doc);
return JVMUtils.isConcrete(type);
boolean hidden = ! getDoclet().showHiddenFeatures() && type.isAnnotationPresent(Hidden.class);
return ! hidden && JVMUtils.isConcrete(type);
} catch ( ClassNotFoundException e ) {
return false;
}