From 605984353fe799c19d4a02ef152e5f4878a14260 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Tue, 8 May 2012 09:33:38 -0400 Subject: [PATCH] Pool Caller improvements: a) New non-standard private annotation Heteroplasmy which measures mean heteroplasmy (pool AF) across called samples, meant for easier mtDNA calling. Pure homoplasmic variants (pool AF = 1 or 0) would have heteroplasmy=1. b) Don't output pool genotypes by default for large pool sizes because it makes file sizes explode and they're unreadable. c) Refactored classes ExactACCounts and ExactACSet and moved to superclass AlleleFrequencyCalculationModel because both Pool and Exact AF calculation models will use it. d) Initial refactorings and skeleton for linearized multi-allelic exact model (not done yet). e) Unit test for Pool AF calculation model. --- .../AlleleFrequencyCalculationModel.java | 78 +++++++++++++++++++ .../genotyper/ExactAFCalculationModel.java | 77 ------------------ 2 files changed, 78 insertions(+), 77 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java index be4ceae53..088e41a76 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/AlleleFrequencyCalculationModel.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.io.PrintStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; @@ -102,6 +103,83 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable { return genotypeLikelihoods; } + // ------------------------------------------------------------------------------------- + // + // Multi-allelic implementation. + // + // ------------------------------------------------------------------------------------- + + protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first + + // a wrapper around the int array so that we can make it hashable + protected static final class ExactACcounts { + + protected final int[] counts; + private int hashcode = -1; + + public ExactACcounts(final int[] counts) { + this.counts = counts; + } + + public int[] getCounts() { + return counts; + } + + @Override + public boolean equals(Object obj) { + return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts); + } + + @Override + public int hashCode() { + if ( hashcode == -1 ) + hashcode = Arrays.hashCode(counts); + return hashcode; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append(counts[0]); + for ( int i = 1; i < counts.length; i++ ) { + sb.append("/"); + sb.append(counts[i]); + } + return sb.toString(); + } + } + + // This class represents a column in the Exact AC calculation matrix + protected static final class ExactACset { + + // the counts of the various alternate alleles which this column represents + final ExactACcounts ACcounts; + + // the column of the matrix + final double[] log10Likelihoods; + + int sum = -1; + + public ExactACset(final int size, final ExactACcounts ACcounts) { + this.ACcounts = ACcounts; + log10Likelihoods = new double[size]; + Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); + } + + // sum of all the non-reference alleles + public int getACsum() { + if ( sum == -1 ) { + sum = 0; + for ( int count : ACcounts.getCounts() ) + sum += count; + } + return sum; + } + + public boolean equals(Object obj) { + return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); + } + } /** * Must be overridden by concrete subclasses * @param vc variant context with alleles and genotype likelihoods diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index 608a29e38..e91691c2c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -101,83 +101,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { return orderedBestAlleles; } - // ------------------------------------------------------------------------------------- - // - // Multi-allelic implementation. - // - // ------------------------------------------------------------------------------------- - - private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first - - // a wrapper around the int array so that we can make it hashable - private static final class ExactACcounts { - - private final int[] counts; - private int hashcode = -1; - - public ExactACcounts(final int[] counts) { - this.counts = counts; - } - - public int[] getCounts() { - return counts; - } - - @Override - public boolean equals(Object obj) { - return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts); - } - - @Override - public int hashCode() { - if ( hashcode == -1 ) - hashcode = Arrays.hashCode(counts); - return hashcode; - } - - @Override - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append(counts[0]); - for ( int i = 1; i < counts.length; i++ ) { - sb.append("/"); - sb.append(counts[i]); - } - return sb.toString(); - } - } - - // This class represents a column in the Exact AC calculation matrix - private static final class ExactACset { - - // the counts of the various alternate alleles which this column represents - final ExactACcounts ACcounts; - - // the column of the matrix - final double[] log10Likelihoods; - - int sum = -1; - - public ExactACset(final int size, final ExactACcounts ACcounts) { - this.ACcounts = ACcounts; - log10Likelihoods = new double[size]; - Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY); - } - - // sum of all the non-reference alleles - public int getACsum() { - if ( sum == -1 ) { - sum = 0; - for ( int count : ACcounts.getCounts() ) - sum += count; - } - return sum; - } - - public boolean equals(Object obj) { - return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts); - } - } public static void linearExactMultiAllelic(final GenotypesContext GLs, final int numAlternateAlleles,