Pool Caller improvements: a) New non-standard private annotation Heteroplasmy which measures mean heteroplasmy (pool AF) across called samples, meant for easier mtDNA calling. Pure homoplasmic variants (pool AF = 1 or 0) would have heteroplasmy=1. b) Don't output pool genotypes by default for large pool sizes because it makes file sizes explode and they're unreadable. c) Refactored classes ExactACCounts and ExactACSet and moved to superclass AlleleFrequencyCalculationModel because both Pool and Exact AF calculation models will use it. d) Initial refactorings and skeleton for linearized multi-allelic exact model (not done yet). e) Unit test for Pool AF calculation model.

This commit is contained in:
Guillermo del Angel 2012-05-08 09:33:38 -04:00
parent 6fac8f2c70
commit 605984353f
2 changed files with 78 additions and 77 deletions

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@ -102,6 +103,83 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
return genotypeLikelihoods;
}
// -------------------------------------------------------------------------------------
//
// Multi-allelic implementation.
//
// -------------------------------------------------------------------------------------
protected static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// a wrapper around the int array so that we can make it hashable
protected static final class ExactACcounts {
protected final int[] counts;
private int hashcode = -1;
public ExactACcounts(final int[] counts) {
this.counts = counts;
}
public int[] getCounts() {
return counts;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts) obj).counts);
}
@Override
public int hashCode() {
if ( hashcode == -1 )
hashcode = Arrays.hashCode(counts);
return hashcode;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(counts[0]);
for ( int i = 1; i < counts.length; i++ ) {
sb.append("/");
sb.append(counts[i]);
}
return sb.toString();
}
}
// This class represents a column in the Exact AC calculation matrix
protected static final class ExactACset {
// the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts;
// the column of the matrix
final double[] log10Likelihoods;
int sum = -1;
public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts;
log10Likelihoods = new double[size];
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
}
// sum of all the non-reference alleles
public int getACsum() {
if ( sum == -1 ) {
sum = 0;
for ( int count : ACcounts.getCounts() )
sum += count;
}
return sum;
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
}
/**
* Must be overridden by concrete subclasses
* @param vc variant context with alleles and genotype likelihoods

View File

@ -101,83 +101,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
return orderedBestAlleles;
}
// -------------------------------------------------------------------------------------
//
// Multi-allelic implementation.
//
// -------------------------------------------------------------------------------------
private static final int HOM_REF_INDEX = 0; // AA likelihoods are always first
// a wrapper around the int array so that we can make it hashable
private static final class ExactACcounts {
private final int[] counts;
private int hashcode = -1;
public ExactACcounts(final int[] counts) {
this.counts = counts;
}
public int[] getCounts() {
return counts;
}
@Override
public boolean equals(Object obj) {
return (obj instanceof ExactACcounts) && Arrays.equals(counts, ((ExactACcounts)obj).counts);
}
@Override
public int hashCode() {
if ( hashcode == -1 )
hashcode = Arrays.hashCode(counts);
return hashcode;
}
@Override
public String toString() {
StringBuffer sb = new StringBuffer();
sb.append(counts[0]);
for ( int i = 1; i < counts.length; i++ ) {
sb.append("/");
sb.append(counts[i]);
}
return sb.toString();
}
}
// This class represents a column in the Exact AC calculation matrix
private static final class ExactACset {
// the counts of the various alternate alleles which this column represents
final ExactACcounts ACcounts;
// the column of the matrix
final double[] log10Likelihoods;
int sum = -1;
public ExactACset(final int size, final ExactACcounts ACcounts) {
this.ACcounts = ACcounts;
log10Likelihoods = new double[size];
Arrays.fill(log10Likelihoods, Double.NEGATIVE_INFINITY);
}
// sum of all the non-reference alleles
public int getACsum() {
if ( sum == -1 ) {
sum = 0;
for ( int count : ACcounts.getCounts() )
sum += count;
}
return sum;
}
public boolean equals(Object obj) {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
}
public static void linearExactMultiAllelic(final GenotypesContext GLs,
final int numAlternateAlleles,