diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java index fb094087d..9d441918f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java @@ -62,7 +62,7 @@ public class BaseCounts implements InfoFieldAnnotation { } } Map map = new HashMap(); - map.put("BaseCounts", counts); + map.put(getKeyNames().get(0), counts); return map; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java new file mode 100755 index 000000000..367bce7c6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GLstats.java @@ -0,0 +1,63 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broad.tribble.util.variantcontext.Genotype; +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeaderLineType; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*; +import org.broadinstitute.sting.utils.MathUtils; + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.Arrays; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 5/16/11 + */ + +// A set of annotations calculated directly from the GLs +public class GLstats implements InfoFieldAnnotation { + + private static final int MIN_SAMPLES = 10; + + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + + final Map genotypes = vc.getGenotypes(); + if ( genotypes == null || genotypes.size() < MIN_SAMPLES || !vc.isBiallelic() ) + return null; + + double refCount = 0.0; + double hetCount = 0.0; + double homCount = 0.0; + int N = 0; // number of samples that have likelihoods + for ( final Map.Entry genotypeMap : genotypes.entrySet() ) { + Genotype g = genotypeMap.getValue(); + if ( g.isNoCall() ) + continue; + + N++; + final double[] normalizedLikelihoods = MathUtils.normalizeFromLog10( g.getLikelihoods().getAsVector() ); + refCount += normalizedLikelihoods[0]; + hetCount += normalizedLikelihoods[1]; + homCount += normalizedLikelihoods[2]; + } + + final double p = ( 2.0 * refCount + hetCount ) / ( 2.0 * (refCount + hetCount + homCount) ); // expected reference allele frequency + final double q = 1.0 - p; // expected alternative allele frequency + final double F = 1.0 - ( hetCount / ( 2.0 * p * q * (double)N ) ); // inbreeding coefficient + + Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%.4f", F)); + return map; + } + + public List getKeyNames() { return Arrays.asList("InbreedingCoeff"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("InbreedingCoeff", 1, VCFHeaderLineType.Float, "Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation")); } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java new file mode 100755 index 000000000..760f68b9e --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java @@ -0,0 +1,51 @@ +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broad.tribble.util.variantcontext.VariantContext; +import org.broad.tribble.vcf.VCFHeaderLineType; +import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.pileup.PileupElement; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by IntelliJ IDEA. + * User: rpoplin + * Date: 5/16/11 + */ + +public class NBaseCount implements InfoFieldAnnotation { + public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { + if( stratifiedContexts.size() == 0 ) + return null; + + int countNBaseSolid = 0; + int countRegularBaseSolid = 0; + + for( final Map.Entry sample : stratifiedContexts.entrySet() ) { + for( final PileupElement p : sample.getValue().getBasePileup()) { + if( p.getRead().getReadGroup().getPlatform().toUpperCase().contains("SOLID") ) { + if( BaseUtils.isNBase( p.getBase() ) ) { + countNBaseSolid++; + } else if( BaseUtils.isRegularBase( p.getBase() ) ) { + countRegularBaseSolid++; + } + } + } + } + final Map map = new HashMap(); + map.put(getKeyNames().get(0), String.format("%.4f", (double)countNBaseSolid / (double)(countNBaseSolid + countRegularBaseSolid + 1))); + return map; + } + + public List getKeyNames() { return Arrays.asList("PercentNBaseSolid"); } + + public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("PercentNBaseSolid", 4, VCFHeaderLineType.Float, "Percentage of N bases in the pileup (counting only SOLiD reads)")); } +} diff --git a/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/java/src/org/broadinstitute/sting/utils/BaseUtils.java index 3f2448ea5..491e4e25e 100644 --- a/java/src/org/broadinstitute/sting/utils/BaseUtils.java +++ b/java/src/org/broadinstitute/sting/utils/BaseUtils.java @@ -238,7 +238,7 @@ public class BaseUtils { } static public boolean isNBase(byte base) { - return base == 'N'; + return base == 'N' || base == 'n'; } /**