diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java new file mode 100644 index 000000000..bf3b0c0e8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/AlleleBalanceHistogramWalker.java @@ -0,0 +1,88 @@ +package org.broadinstitute.sting.oneoffprojects.walkers; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.RodVCF; +import org.broadinstitute.sting.gatk.walkers.DataSource; +import org.broadinstitute.sting.gatk.walkers.RMD; +import org.broadinstitute.sting.gatk.walkers.Requires; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; +import org.broadinstitute.sting.utils.pileup.PileupElement; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +/** + * Created by IntelliJ IDEA. + * User: chartl + * Date: Jan 26, 2010 + * Time: 3:25:11 PM + * To change this template use File | Settings | File Templates. + */ +@Requires(value= DataSource.REFERENCE,referenceMetaData = {@RMD(name="variants",type= RodVCF.class)}) +public class AlleleBalanceHistogramWalker extends RodWalker, Map>> { + + + public Map> reduceInit() { + return new HashMap>(); + } + + public Map> reduce(Map alleleBalances, Map> aggregateBalances ) { + if ( alleleBalances != null ) { + for ( String name : alleleBalances.keySet() ) { + if ( alleleBalances.get(name) != null ) { + aggregateBalances.get(name).add(alleleBalances.get(name)); + } + } + } + + return aggregateBalances; + } + + public Map map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + RodVCF vcfRod = (RodVCF) tracker.lookup("variants",null); + if ( vcfRod == null ) { + return null; + } + VCFRecord record = vcfRod.getRecord(); + + return getAlleleBalanceBySample(record,ref,context); + } + + private HashMap getAlleleBalanceBySample(VCFRecord vcf, ReferenceContext ref, AlignmentContext context) { + Map sampleContext = StratifiedAlignmentContext.splitContextBySample(context.getBasePileup(),null,null); + HashMap balances = new HashMap(); + for ( String sample : vcf.getSampleNames() ) { + balances.put(sample, getAlleleBalance(ref,sampleContext.get(sample),vcf.getGenotype(sample))); + } + + return balances; + } + + private Double getAlleleBalance(ReferenceContext ref, StratifiedAlignmentContext context, VCFGenotypeRecord vcf) { + int refBases = 0; + int altBases = 0; + for ( PileupElement e : context.getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup() ) { + if ( BaseUtils.basesAreEqual( e.getBase(), (byte) ref.getBase() ) ) { + refBases++; + } else if ( BaseUtils.basesAreEqual(e.getBase(), (byte) vcf.toVariation(ref.getBase()).getAlternativeBaseForSNP() ) ) { + altBases++; + } + } + + if ( refBases > 0 || altBases > 0) { + return ( ( double ) altBases ) / ( ( double ) altBases + ( double ) refBases ); + } else { + return null; + } + } + + +}