diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalance.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalance.java index b6974a6e0..e04a0c3e1 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalance.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalance.java @@ -25,19 +25,21 @@ package org.broadinstitute.gatk.tools.walkers.annotator; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypesContext; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.MathUtils; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.GenotypesContext; -import htsjdk.variant.variantcontext.VariantContext; import java.util.Arrays; import java.util.HashMap; @@ -55,16 +57,14 @@ import java.util.Map; */ public class AlleleBalance extends InfoFieldAnnotation { - - char[] BASES = {'A','C','G','T'}; public Map annotate(final RefMetaDataTracker tracker, final AnnotatorCompatible walker, final ReferenceContext ref, final Map stratifiedContexts, final VariantContext vc, final Map stratifiedPerReadAlleleLikelihoodMap) { - if ( stratifiedContexts.size() == 0 ) - return null; + //if ( stratifiedContexts.size() == 0 ) + // return null; if ( !vc.isBiallelic() ) return null; @@ -78,55 +78,45 @@ public class AlleleBalance extends InfoFieldAnnotation { double weightHet = 0.0; double overallNonDiploid = 0.0; for ( Genotype genotype : genotypes ) { - // we care only about het calls - AlignmentContext context = stratifiedContexts.get(genotype.getSampleName()); - if ( context == null ) - continue; - - final ReadBackedPileup pileup = context.getBasePileup(); if ( vc.isSNP() ) { - final String bases = new String(pileup.getBases()); - if ( bases.length() == 0 ) - return null; - double pTrue = 1.0 - Math.pow(10.0,genotype.getLog10PError()); + final int[] counts = getCounts(genotype, stratifiedContexts, vc); + // If AD was not calculated, we can't continue + if(counts == null) + continue; + + final int n_allele = counts.length; + int count_sum = 0; + for(int i=0; i bestOtherCount ) - bestOtherCount = count; + if( counts[i] > bestOtherCount ) + bestOtherCount = counts[i]; } - final int otherCount = bases.length() - alleleCount; - ratioHom += pTrue*( (double) alleleCount)/(alleleCount+bestOtherCount); + final int otherCount = count_sum - alleleCount; + ratioHom += pTrue*( (double) alleleCount)/((double) (alleleCount+bestOtherCount)); weightHom += pTrue; - overallNonDiploid += ((double ) otherCount)/(bases.length()*genotypes.size()); + overallNonDiploid += ((double ) otherCount)/((double) count_sum*genotypes.size()); } // Allele Balance for indels was not being computed correctly (since there was no allele matching). Instead of // prolonging the life of imperfect code, I've decided to delete it. If someone else wants to try again from @@ -136,7 +126,7 @@ public class AlleleBalance extends InfoFieldAnnotation { // make sure we had a het genotype - Map map = new HashMap(); + Map map = new HashMap<>(); if ( weightHet > 0.0 ) { map.put("ABHet",ratioHet/weightHet); } @@ -151,10 +141,62 @@ public class AlleleBalance extends InfoFieldAnnotation { return map; } + /** + * Provide a centralized method of getting the number of reads per allele, + * depending on the input given. Will use the following (in order of preference): + * - genotype.getAD() + * - reads from an AlignmentContext + * - reads from a PerReadAlleleLikelihoodMap (Not yet implemented) + * + * + * @param genotype The genotype of interest + * @param stratifiedContexts A mapping + * @param vc + * @return + */ + private int[] getCounts(final Genotype genotype, + final Map stratifiedContexts, + final VariantContext vc){ + + // Can't do anything without a genotype here + if(genotype == null) + return null; + + int[] retVal = genotype.getAD(); + AlignmentContext context; + + if ( retVal == null && stratifiedContexts != null && + (context = stratifiedContexts.get(genotype.getSampleName())) != null){ + // If we get to this point, the getAD() function returned no information + // about AlleleDepth by Sample - perhaps it wasn't annotated? + // In that case, let's try to build it up using the algorithm that + // was here in v 3.1-1 and earlier + // Also, b/c of the assignment check in the if statement above, + // we know we have a valid AlignmentContext for this sample! + + final ReadBackedPileup pileup = context.getBasePileup(); + final String bases = new String(pileup.getBases()); + List alleles = vc.getAlleles(); + final int n_allele = alleles.size(); + retVal = new int[n_allele]; + + // Calculate the depth for each allele, under the assumption that + // the allele is a single base + int i=0; + for(Allele a : alleles){ + retVal[i] = MathUtils.countOccurrences(a.toString().charAt(0), bases); + i++; + } + + } + + return retVal; + + } public List getKeyNames() { return Arrays.asList("ABHet","ABHom","OND"); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("ABHet", 1, VCFHeaderLineType.Float, "Allele Balance for hets (ref/(ref+alt))"), new VCFInfoHeaderLine("ABHom", 1, VCFHeaderLineType.Float, "Allele Balance for homs (A/(A+O))"), new VCFInfoHeaderLine("OND", 1, VCFHeaderLineType.Float, "Overall non-diploid ratio (alleles/(alleles+non-alleles))")); } -} +} \ No newline at end of file diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalanceBySample.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalanceBySample.java index abf1cfc30..c29c231a8 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalanceBySample.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/tools/walkers/annotator/AlleleBalanceBySample.java @@ -25,24 +25,31 @@ package org.broadinstitute.gatk.tools.walkers.annotator; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.Genotype; +import htsjdk.variant.variantcontext.GenotypeBuilder; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFFormatHeaderLine; +import htsjdk.variant.vcf.VCFHeaderLineType; import org.broadinstitute.gatk.engine.contexts.AlignmentContext; import org.broadinstitute.gatk.engine.contexts.ReferenceContext; import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation; +import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele; import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap; -import org.broadinstitute.gatk.utils.MathUtils; -import htsjdk.variant.vcf.VCFFormatHeaderLine; -import htsjdk.variant.vcf.VCFHeaderLineType; -import htsjdk.variant.variantcontext.Allele; -import htsjdk.variant.variantcontext.Genotype; -import htsjdk.variant.variantcontext.GenotypeBuilder; -import htsjdk.variant.variantcontext.VariantContext; +import org.broadinstitute.gatk.utils.pileup.PileupElement; +import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import java.util.Arrays; -import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; /** @@ -64,50 +71,97 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim final Genotype g, final GenotypeBuilder gb, final PerReadAlleleLikelihoodMap alleleLikelihoodMap){ - if ( stratifiedContext == null ) + + + // We need a heterozygous genotype and either a context or alleleLikelihoodMap + if ( g == null || !g.isCalled() || !g.isHet() || ( stratifiedContext == null && alleleLikelihoodMap == null) ) + return; + + // Test for existence of allele, and manually check isSNP() + // and isBiallelic() while ignoring the allele + boolean biallelicSNP = vc.isSNP() && vc.isBiallelic(); + + if(vc.hasAllele(GVCF_NONREF)){ + // If we have the GVCF allele, then the SNP is biallelic + // iff there are 3 alleles and both the reference and first alt + // allele are length 1. + biallelicSNP = vc.getAlleles().size() == 3 && + vc.getReference().length() == 1 && + vc.getAlternateAllele(0).length() == 1; + } + + if ( !biallelicSNP ) + return; + + Double ratio; + if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty()) + ratio = annotateWithLikelihoods(alleleLikelihoodMap, vc); + else if ( stratifiedContext != null ) + ratio = annotateWithPileup(stratifiedContext, vc); + else return; - Double ratio = annotateSNP(stratifiedContext, vc, g); if (ratio == null) return; - gb.attribute(getKeyNames().get(0), Double.valueOf(String.format("%.2f", ratio.doubleValue()))); + gb.attribute(getKeyNames().get(0), Double.valueOf(String.format("%.2f", ratio))); } - private Double annotateSNP(AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { - double ratio = -1; + private static final Allele GVCF_NONREF = Allele.create("", false); - if ( !vc.isSNP() ) - return null; + private Double annotateWithPileup(final AlignmentContext stratifiedContext, final VariantContext vc) { - if ( !vc.isBiallelic() ) - return null; + final HashMap alleleCounts = new HashMap<>(); + for ( final Allele allele : vc.getAlleles() ) + alleleCounts.put(allele.getBases()[0], 0); - if ( g == null || !g.isCalled() ) - return null; + final ReadBackedPileup pileup = stratifiedContext.getBasePileup(); + for ( final PileupElement p : pileup ) { + if ( alleleCounts.containsKey(p.getBase()) ) + alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1); + } - if (!g.isHet()) - return null; - - Collection altAlleles = vc.getAlternateAlleles(); - if ( altAlleles.size() == 0 ) - return null; - - final String bases = new String(stratifiedContext.getBasePileup().getBases()); - if ( bases.length() == 0 ) - return null; - char refChr = vc.getReference().toString().charAt(0); - char altChr = vc.getAlternateAllele(0).toString().charAt(0); - - int refCount = MathUtils.countOccurrences(refChr, bases); - int altCount = MathUtils.countOccurrences(altChr, bases); + // we need to add counts in the correct order + final int[] counts = new int[alleleCounts.size()]; + counts[0] = alleleCounts.get(vc.getReference().getBases()[0]); + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) + counts[i+1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]); // sanity check - if ( refCount + altCount == 0 ) + if(counts[0] + counts[1] == 0) return null; - ratio = ((double)refCount / (double)(refCount + altCount)); - return ratio; + return ((double) counts[0] / (double)(counts[0] + counts[1])); + } + + private Double annotateWithLikelihoods(final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap, final VariantContext vc) { + final Set alleles = new HashSet<>(vc.getAlleles()); + + // make sure that there's a meaningful relationship between the alleles in the perReadAlleleLikelihoodMap and our VariantContext + if ( ! perReadAlleleLikelihoodMap.getAllelesSet().containsAll(alleles) ) + throw new IllegalStateException("VC alleles " + alleles + " not a strict subset of per read allele map alleles " + perReadAlleleLikelihoodMap.getAllelesSet()); + + final HashMap alleleCounts = new HashMap<>(); + for ( final Allele allele : vc.getAlleles() ) { alleleCounts.put(allele, 0); } + + for ( final Map.Entry> el : perReadAlleleLikelihoodMap.getLikelihoodReadMap().entrySet()) { + final MostLikelyAllele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue(), alleles); + if (! a.isInformative() ) continue; // read is non-informative + final int prevCount = alleleCounts.get(a.getMostLikelyAllele()); + alleleCounts.put(a.getMostLikelyAllele(), prevCount + 1); + } + + final int[] counts = new int[alleleCounts.size()]; + counts[0] = alleleCounts.get(vc.getReference()); + for (int i = 0; i < vc.getAlternateAlleles().size(); i++) + counts[i+1] = alleleCounts.get( vc.getAlternateAllele(i) ); + + // sanity check + if(counts[0] + counts[1] == 0) + return null; + + return ((double) counts[0] / (double)(counts[0] + counts[1])); + } public List getKeyNames() { return Arrays.asList("AB"); }