Genotyper now is using bytes not chars. Passes all tests.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3406 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-05-20 21:02:44 +00:00
parent 7389077b3b
commit a10fca0d5c
27 changed files with 161 additions and 158 deletions

View File

@ -139,7 +139,7 @@ public class ReferenceContext {
* @param n number of requested bases including and starting from the current locus
* @return
*/
public byte[] getBasesAtLocusAsByte(int n) {
public byte[] getBasesAtLocus(int n) {
int start = (int)(locus.getStart()-window.getStart());
int stop = ( n==(-1) ? bases.length : start+n );
@ -155,7 +155,7 @@ public class ReferenceContext {
}
@Deprecated
public char[] getBasesAtLocus(int n) {
return new String(getBasesAtLocusAsByte(n)).toCharArray();
public char[] getBasesAtLocusAsChar(int n) {
return new String(getBasesAtLocus(n)).toCharArray();
}
}

View File

@ -122,14 +122,9 @@ public class Allele implements Comparable<Allele> {
public Allele(byte base, boolean isRef) {
this( base1ToBases(base), isRef);
this( new byte[]{ base }, isRef);
}
private static byte[] base1ToBases(byte base) {
byte[] bases = { base };
return bases;
}
/**
* @param bases bases representing an allele
* @return true if the bases represent the null allele

View File

@ -172,8 +172,8 @@ public class DepthOfCoverageStats {
for ( String s : countsBySample.keySet() ) {
int total = 0;
int[] counts = countsBySample.get(s);
for ( char base : BaseUtils.EXTENDED_BASES ) {
if ( includeDeletions || ! ( base == 'D') ) { // note basesAreEqual assigns TRUE to (N,D) as both have simple index -1
for ( byte base : BaseUtils.EXTENDED_BASES ) {
if ( includeDeletions || ! ( base == BaseUtils.D) ) { // note basesAreEqual assigns TRUE to (N,D) as both have simple index -1
total += counts[BaseUtils.extendedBaseToBaseIndex(base)];
}
}

View File

@ -757,10 +757,10 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<CoverageAggregator.Ag
}
StringBuilder s = new StringBuilder();
int nbases = 0;
for ( char b : BaseUtils.EXTENDED_BASES ) {
for ( byte b : BaseUtils.EXTENDED_BASES ) {
nbases++;
if ( includeDeletions || b != 'D' ) {
s.append(b);
if ( includeDeletions || b != BaseUtils.D ) {
s.append((char)b);
s.append(":");
s.append(counts[BaseUtils.extendedBaseToBaseIndex(b)]);
if ( nbases < 6 ) {

View File

@ -49,11 +49,11 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// the GenotypeLikelihoods map
private HashMap<String, GenotypeLikelihoods> GLs = new HashMap<String, GenotypeLikelihoods>();
private HashMap<Character, AlleleFrequencyMatrix> AFMatrixMap = new HashMap<Character, AlleleFrequencyMatrix>();
private HashMap<Byte, AlleleFrequencyMatrix> AFMatrixMap = new HashMap<Byte, AlleleFrequencyMatrix>();
private enum GenotypeType { REF, HET, HOM }
protected void initialize(char ref,
protected void initialize(byte ref,
Map<String, StratifiedAlignmentContext> contexts,
StratifiedAlignmentContext.StratifiedContextType contextType) {
// initialize the GenotypeLikelihoods
@ -61,7 +61,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
AFMatrixMap.clear();
// for each alternate allele, create a new matrix
for ( char alt : BaseUtils.BASES ) {
for ( byte alt : BaseUtils.BASES ) {
if ( alt != ref )
AFMatrixMap.put(alt, new AlleleFrequencyMatrix(contexts.size()));
}
@ -83,7 +83,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// for each alternate allele, fill the matrix
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
for ( char alt : BaseUtils.BASES ) {
for ( byte alt : BaseUtils.BASES ) {
if ( alt != ref ) {
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);
@ -93,7 +93,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
}
}
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
protected void calculatelog10PofDgivenAFforAllF(byte ref, byte alt, int numFrequencies, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt);
@ -123,13 +123,13 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
}
}
protected Map<String, Genotype> makeGenotypeCalls(char ref, char alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
protected Map<String, Genotype> makeGenotypeCalls(byte ref, byte alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
HashMap<String, Genotype> calls = new HashMap<String, Genotype>();
// set up some variables we'll need in the loop
AlleleFrequencyMatrix matrix = AFMatrixMap.get(alt);
Allele refAllele = new Allele(Character.toString(ref), true);
Allele altAllele = new Allele(Character.toString(alt), false);
Allele refAllele = new Allele(ref, true);
Allele altAllele = new Allele(alt, false);
DiploidGenotype refGenotype = DiploidGenotype.createHomGenotype(ref);
DiploidGenotype hetGenotype = DiploidGenotype.createDiploidGenotype(ref, alt);
DiploidGenotype homGenotype = DiploidGenotype.createHomGenotype(alt);

View File

@ -76,15 +76,15 @@ public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
/**
* A matrix of value i x j -> log10(p) where
*
* i - char of the miscalled base (i.e., A)
* j - char of the presumed true base (i.e., C)
* i - byte of the miscalled base (i.e., A)
* j - byte of the presumed true base (i.e., C)
* log10p - empirical probability p that A is actually C
*
* The table is available for each technology
*/
private final static EnumMap<SequencerPlatform, double[][]> log10pTrueGivenMiscall = new EnumMap<SequencerPlatform, double[][]>(SequencerPlatform.class);
private static void addMisCall(final SequencerPlatform pl, char miscalledBase, char trueBase, double p) {
private static void addMisCall(final SequencerPlatform pl, byte miscalledBase, byte trueBase, double p) {
if ( ! log10pTrueGivenMiscall.containsKey(pl) )
log10pTrueGivenMiscall.put(pl, new double[4][4]);
@ -94,7 +94,7 @@ public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
misCallProbs[i][j] = log10(p);
}
private static double getProbMiscallIsBase(SequencerPlatform pl, char miscalledBase, char trueBase) {
private static double getProbMiscallIsBase(SequencerPlatform pl, byte miscalledBase, byte trueBase) {
int i = BaseUtils.simpleBaseToBaseIndex(miscalledBase);
int j = BaseUtils.simpleBaseToBaseIndex(trueBase);
@ -107,84 +107,84 @@ public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
private static void addSolexa() {
SequencerPlatform pl = SequencerPlatform.SOLEXA;
addMisCall(pl, 'A', 'C', 57.7/100.0);
addMisCall(pl, 'A', 'G', 17.1/100.0);
addMisCall(pl, 'A', 'T', 25.2/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.C, 57.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 17.1/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 25.2/100.0);
addMisCall(pl, 'C', 'A', 34.9/100.0);
addMisCall(pl, 'C', 'G', 11.3/100.0);
addMisCall(pl, 'C', 'T', 53.9/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 34.9/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 11.3/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 53.9/100.0);
addMisCall(pl, 'G', 'A', 31.9/100.0);
addMisCall(pl, 'G', 'C', 5.1/100.0);
addMisCall(pl, 'G', 'T', 63.0/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 31.9/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 5.1/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 63.0/100.0);
addMisCall(pl, 'T', 'A', 45.8/100.0);
addMisCall(pl, 'T', 'C', 22.1/100.0);
addMisCall(pl, 'T', 'G', 32.0/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 45.8/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 22.1/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 32.0/100.0);
}
private static void addSOLiD() {
SequencerPlatform pl = SequencerPlatform.SOLID;
addMisCall(pl, 'A', 'C', 18.7/100.0);
addMisCall(pl, 'A', 'G', 42.5/100.0);
addMisCall(pl, 'A', 'T', 38.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.C, 18.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 42.5/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 38.7/100.0);
addMisCall(pl, 'C', 'A', 27.0/100.0);
addMisCall(pl, 'C', 'G', 18.9/100.0);
addMisCall(pl, 'C', 'T', 54.1/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 27.0/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 18.9/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 54.1/100.0);
addMisCall(pl, 'G', 'A', 61.0/100.0);
addMisCall(pl, 'G', 'C', 15.7/100.0);
addMisCall(pl, 'G', 'T', 23.2/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 61.0/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 15.7/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 23.2/100.0);
addMisCall(pl, 'T', 'A', 40.5/100.0);
addMisCall(pl, 'T', 'C', 34.3/100.0);
addMisCall(pl, 'T', 'G', 25.2/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 40.5/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 34.3/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 25.2/100.0);
}
private static void add454() {
SequencerPlatform pl = SequencerPlatform.ROCHE454;
addMisCall(pl, 'A', 'C', 23.2/100.0);
addMisCall(pl, 'A', 'G', 42.6/100.0);
addMisCall(pl, 'A', 'T', 34.3/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.C, 23.2/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 42.6/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 34.3/100.0);
addMisCall(pl, 'C', 'A', 19.7/100.0);
addMisCall(pl, 'C', 'G', 8.4/100.0);
addMisCall(pl, 'C', 'T', 71.9/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 19.7/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 8.4/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 71.9/100.0);
addMisCall(pl, 'G', 'A', 71.5/100.0);
addMisCall(pl, 'G', 'C', 6.6/100.0);
addMisCall(pl, 'G', 'T', 21.9/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 71.5/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 6.6/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 21.9/100.0);
addMisCall(pl, 'T', 'A', 43.8/100.0);
addMisCall(pl, 'T', 'C', 37.8/100.0);
addMisCall(pl, 'T', 'G', 18.5/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 43.8/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 37.8/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 18.5/100.0);
}
private static void addCG() {
SequencerPlatform pl = SequencerPlatform.CG;
addMisCall(pl, 'A', 'C', 28.2/100.0);
addMisCall(pl, 'A', 'G', 28.7/100.0);
addMisCall(pl, 'A', 'T', 43.1/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.C, 28.2/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.G, 28.7/100.0);
addMisCall(pl, BaseUtils.A, BaseUtils.T, 43.1/100.0);
addMisCall(pl, 'C', 'A', 29.8/100.0);
addMisCall(pl, 'C', 'G', 18.6/100.0);
addMisCall(pl, 'C', 'T', 51.6/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.A, 29.8/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.G, 18.6/100.0);
addMisCall(pl, BaseUtils.C, BaseUtils.T, 51.6/100.0);
addMisCall(pl, 'G', 'A', 32.5/100.0);
addMisCall(pl, 'G', 'C', 21.4/100.0);
addMisCall(pl, 'G', 'T', 46.1/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.A, 32.5/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.C, 21.4/100.0);
addMisCall(pl, BaseUtils.G, BaseUtils.T, 46.1/100.0);
addMisCall(pl, 'T', 'A', 42.6/100.0);
addMisCall(pl, 'T', 'C', 34.1/100.0);
addMisCall(pl, 'T', 'G', 23.3/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.A, 42.6/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.C, 34.1/100.0);
addMisCall(pl, BaseUtils.T, BaseUtils.G, 23.3/100.0);
}
private static void addUnknown() {
SequencerPlatform pl = SequencerPlatform.UNKNOWN;
for ( char b1 : BaseUtils.BASES ) {
for ( char b2 : BaseUtils.BASES ) {
for ( byte b1 : BaseUtils.BASES ) {
for ( byte b2 : BaseUtils.BASES ) {
if ( b1 != b2 )
addMisCall(pl, b1, b2, 1.0/3.0);
}
@ -245,7 +245,7 @@ public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
//
// -----------------------------------------------------------------------------------------------------------------
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) {
boolean fwdStrand = ! read.getReadNegativeStrandFlag();
SequencerPlatform pl = getReadSequencerPlatform(read);

View File

@ -93,7 +93,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @param base base
* @return log10 likelihood as a double
*/
public double getLog10Likelihood(char base) {
public double getLog10Likelihood(byte base) {
return getLog10Likelihood(BaseUtils.simpleBaseToBaseIndex(base));
}
@ -117,7 +117,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @param base base
* @return likelihoods as a double
*/
public double getLikelihood(char base) {
public double getLikelihood(byte base) {
return getLikelihood(BaseUtils.simpleBaseToBaseIndex(base));
}
@ -144,20 +144,20 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @param offset offset on read
* @return 1 if the base was considered good enough to add to the likelihoods (not Q0 or 'N', for example)
*/
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
FourBaseProbabilities fbl = computeLog10Likelihoods(observedBase, qualityScore, read, offset);
if ( fbl == null )
return 0;
for ( char base : BaseUtils.BASES ) {
for ( byte base : BaseUtils.BASES ) {
double likelihood = fbl.getLikelihood(base);
log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)] += likelihood;
}
if ( verbose ) {
for ( char base : BaseUtils.BASES ) { System.out.printf("%s\t", base); }
for ( byte base : BaseUtils.BASES ) { System.out.printf("%s\t", (char)base); }
System.out.println();
for ( char base : BaseUtils.BASES ) { System.out.printf("%.2f\t", log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)]); }
for ( byte base : BaseUtils.BASES ) { System.out.printf("%.2f\t", log10Likelihoods[BaseUtils.simpleBaseToBaseIndex(base)]); }
System.out.println();
}
@ -174,7 +174,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @param offset offset on read
* @return likelihoods for this observation or null if the base was not considered good enough to add to the likelihoods (Q0 or 'N', for example)
*/
public FourBaseProbabilities computeLog10Likelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
public FourBaseProbabilities computeLog10Likelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
if ( badBase(observedBase) ) {
return null;
}
@ -185,7 +185,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
FourBaseProbabilities fbl = (FourBaseProbabilities)this.clone();
fbl.log10Likelihoods = zeros.clone();
for ( char base : BaseUtils.BASES ) {
for ( byte base : BaseUtils.BASES ) {
double likelihood = log10PofObservingBaseGivenChromosome(observedBase, base, qualityScore, read, offset);
if ( verbose ) {
@ -224,7 +224,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @param observedBase observed base
* @return true if the base is a bad base
*/
private boolean badBase(char observedBase) {
private boolean badBase(byte observedBase) {
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
}
@ -236,7 +236,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
public String toString() {
double sum = 0;
StringBuilder s = new StringBuilder();
for ( char base : BaseUtils.BASES ) {
for ( byte base : BaseUtils.BASES ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(base);
s.append(String.format("%s %.10f ", base, log10Likelihoods[baseIndex]));
sum += Math.pow(10, log10Likelihoods[baseIndex]);
@ -265,7 +265,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
public boolean validate(boolean throwException) {
try {
for ( char base : BaseUtils.BASES ) {
for ( byte base : BaseUtils.BASES ) {
int i = BaseUtils.simpleBaseToBaseIndex(base);
if ( ! MathUtils.wellFormedDouble(log10Likelihoods[i]) || ! MathUtils.isNegativeOrZero(log10Likelihoods[i]) ) {
@ -302,7 +302,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @return log10 likelihood
*/
protected double log10PofObservingBaseGivenChromosome(char observedBase, char chromBase, byte qual, SAMRecord read, int offset) {
protected double log10PofObservingBaseGivenChromosome(byte observedBase, byte chromBase, byte qual, SAMRecord read, int offset) {
if (qual == 0) { // zero quals are wrong
throw new RuntimeException(String.format("Unexpected Q0 base discovered in log10PofObservingBaseGivenChromosome: %c %s %d at %d in %s",
observedBase, chromBase, qual, offset, read));
@ -333,7 +333,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
* @param offset offset on read
* @return log10 likelihood
*/
protected abstract double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset);
protected abstract double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset);
//
// Constant static data
@ -341,7 +341,7 @@ public abstract class FourBaseProbabilities implements Cloneable {
private final static double[] zeros = new double[BaseUtils.BASES.length];
static {
for ( char base : BaseUtils.BASES ) {
for ( byte base : BaseUtils.BASES ) {
zeros[BaseUtils.simpleBaseToBaseIndex(base)] = 0.0;
}
}

View File

@ -74,7 +74,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @return call
*/
public abstract VariantCallContext callLocus(RefMetaDataTracker tracker,
char ref,
byte ref,
GenomeLoc loc,
Map<String, StratifiedAlignmentContext> stratifiedContexts,
DiploidGenotypePriors priors);
@ -88,7 +88,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @return call
*/
public abstract VariantCallContext callExtendedLocus(RefMetaDataTracker tracker,
char[] ref,
byte[] ref,
GenomeLoc loc,
Map<String, StratifiedAlignmentContext> stratifiedContexts);

View File

@ -284,7 +284,7 @@ public class GenotypeLikelihoods implements Cloneable {
if ( p.isDeletion() )
continue;
char base = (char)p.getBase();
byte base = p.getBase();
if ( ! ignoreBadBases || ! badBase(base) ) {
byte qual = capBaseQualsAtMappingQual ? (byte)Math.min((int)p.getQual(), p.getMappingQual()) : p.getQual();
n += add(base, qual, p.getRead(), p.getOffset());
@ -294,7 +294,7 @@ public class GenotypeLikelihoods implements Cloneable {
return n;
}
public int add(char observedBase, byte qualityScore, SAMRecord read, int offset) {
public int add(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
// Handle caching if requested. Just look up the cached result if its available, or compute and store it
GenotypeLikelihoods gl;
@ -332,11 +332,11 @@ public class GenotypeLikelihoods implements Cloneable {
static GenotypeLikelihoods[][][][][][] CACHE = new GenotypeLikelihoods[BaseMismatchModel.values().length][EmpiricalSubstitutionProbabilities.SequencerPlatform.values().length][BaseUtils.BASES.length][QualityUtils.MAX_QUAL_SCORE+1][MAX_PLOIDY][2];
protected boolean inCache( char observedBase, byte qualityScore, int ploidy, SAMRecord read) {
protected boolean inCache( byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
return getCache(CACHE, observedBase, qualityScore, ploidy, read) != null;
}
protected GenotypeLikelihoods getCachedGenotypeLikelihoods( char observedBase, byte qualityScore, int ploidy, SAMRecord read) {
protected GenotypeLikelihoods getCachedGenotypeLikelihoods( byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
GenotypeLikelihoods gl = getCache(CACHE, observedBase, qualityScore, ploidy, read);
if ( gl == null )
throw new RuntimeException(String.format("BUG: trying to fetch an unset cached genotype likelihood at base=%c, qual=%d, ploidy=%d, read=%s",
@ -344,14 +344,14 @@ public class GenotypeLikelihoods implements Cloneable {
return gl;
}
protected GenotypeLikelihoods calculateCachedGenotypeLikelihoods(char observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset) {
protected GenotypeLikelihoods calculateCachedGenotypeLikelihoods(byte observedBase, byte qualityScore, int ploidy, SAMRecord read, int offset) {
GenotypeLikelihoods gl = calculateGenotypeLikelihoods(observedBase, qualityScore, read, offset);
setCache(CACHE, observedBase, qualityScore, ploidy, read, gl);
return gl;
}
protected void setCache( GenotypeLikelihoods[][][][][][] cache,
char observedBase, byte qualityScore, int ploidy,
byte observedBase, byte qualityScore, int ploidy,
SAMRecord read, GenotypeLikelihoods val ) {
int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
@ -364,7 +364,7 @@ public class GenotypeLikelihoods implements Cloneable {
}
protected GenotypeLikelihoods getCache( GenotypeLikelihoods[][][][][][] cache,
char observedBase, byte qualityScore, int ploidy, SAMRecord read) {
byte observedBase, byte qualityScore, int ploidy, SAMRecord read) {
int m = FourBaseProbabilitiesFactory.getBaseMismatchModel(fourBaseLikelihoods).ordinal();
int a = fourBaseLikelihoods.getReadSequencerPlatformIndex(read);
int i = BaseUtils.simpleBaseToBaseIndex(observedBase);
@ -374,7 +374,7 @@ public class GenotypeLikelihoods implements Cloneable {
return cache[m][a][i][j][k][x];
}
protected GenotypeLikelihoods calculateGenotypeLikelihoods(char observedBase, byte qualityScore, SAMRecord read, int offset) {
protected GenotypeLikelihoods calculateGenotypeLikelihoods(byte observedBase, byte qualityScore, SAMRecord read, int offset) {
FourBaseProbabilities fbl = fourBaseLikelihoods.computeLog10Likelihoods(observedBase, qualityScore, read, offset);
if ( fbl == null )
return null;
@ -436,7 +436,7 @@ public class GenotypeLikelihoods implements Cloneable {
* @param observedBase observed base
* @return true if the base is a bad base
*/
protected boolean badBase(char observedBase) {
protected boolean badBase(byte observedBase) {
return BaseUtils.simpleBaseToBaseIndex(observedBase) == -1;
}

View File

@ -35,7 +35,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
protected double[] PofFs = new double[BaseUtils.BASES.length];
// the alternate allele with the largest sum of quality scores
protected Character bestAlternateAllele = null;
protected Byte bestAlternateAllele = null;
// are we at a 'trigger' track site?
protected boolean atTriggerTrack = false;
@ -48,11 +48,11 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
filter.add("LowQual");
}
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, byte[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> stratifiedContexts) {
return null;
}
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
public VariantCallContext callLocus(RefMetaDataTracker tracker, byte ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
int numSamples = getNSamples(contexts);
int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero)
@ -74,7 +74,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return vcc;
}
// otherwise, choose any alternate allele (it doesn't really matter)
bestAlternateAllele = (ref != 'A' ? 'A' : 'C');
bestAlternateAllele = (byte)(ref != 'A' ? 'A' : 'C');
}
// calculate likelihoods if there are non-ref bases
@ -105,7 +105,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return contexts.size();
}
protected void initializeBestAlternateAllele(char ref, Map<String, StratifiedAlignmentContext> contexts) {
protected void initializeBestAlternateAllele(byte ref, Map<String, StratifiedAlignmentContext> contexts) {
int[] qualCounts = new int[4];
for ( String sample : contexts.keySet() ) {
@ -118,7 +118,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( p.isDeletion() )
continue;
int index = BaseUtils.simpleBaseToBaseIndex((char)p.getBase());
int index = BaseUtils.simpleBaseToBaseIndex(p.getBase());
if ( index >= 0 )
qualCounts[index] += p.getQual();
}
@ -127,7 +127,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// set the non-ref base with maximum quality score sum
int maxCount = 0;
bestAlternateAllele = null;
for ( char altAllele : BaseUtils.BASES ) {
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele == ref )
continue;
int index = BaseUtils.simpleBaseToBaseIndex(altAllele);
@ -138,7 +138,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
}
}
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
protected void initialize(byte ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
// by default, no initialization is done
return;
}
@ -201,10 +201,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
vcc.confidentlyCalled = QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING;
}
protected void calculateAlleleFrequencyPosteriors(char ref, int frequencyEstimationPoints, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
protected void calculateAlleleFrequencyPosteriors(byte ref, int frequencyEstimationPoints, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
// initialization
for ( char altAllele : BaseUtils.BASES ) {
for ( byte altAllele : BaseUtils.BASES ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
alleleFrequencyPosteriors[baseIndex] = new double[frequencyEstimationPoints];
log10PofDgivenAFi[baseIndex] = new double[frequencyEstimationPoints];
@ -226,7 +226,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
* @param contexts stratified alignment contexts
* @param contextType which stratification to use
*/
protected void calculatelog10PofDgivenAFforAllF(char ref, char alt, int numFrequencies, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
protected void calculatelog10PofDgivenAFforAllF(byte ref, byte alt, int numFrequencies, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(alt);
// for each minor allele frequency, calculate log10PofDgivenAFi
@ -245,7 +245,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
*
* @return value of PofDgivenAF for allele frequency f
*/
protected double calculateLog10PofDgivenAFforF(char ref, char alt, double f, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
protected double calculateLog10PofDgivenAFforF(byte ref, byte alt, double f, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
return 0.0;
}
@ -265,7 +265,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
* @param ref the ref base
* @param frequencyEstimationPoints number of allele frequencies
*/
protected void calculatePofFs(char ref, int frequencyEstimationPoints) {
protected void calculatePofFs(byte ref, int frequencyEstimationPoints) {
// only calculate for the most likely alternate allele
int baseIndex = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
@ -286,14 +286,14 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
* @param loc the GenomeLoc
* @param frequencyEstimationPoints number of allele frequencies
*/
protected void printAlleleFrequencyData(char ref, GenomeLoc loc, int frequencyEstimationPoints) {
protected void printAlleleFrequencyData(byte ref, GenomeLoc loc, int frequencyEstimationPoints) {
for (int i = 0; i < frequencyEstimationPoints; i++) {
StringBuilder AFline = new StringBuilder("AFINFO\t");
AFline.append(loc).append("\t");
AFline.append(i + "/" + (frequencyEstimationPoints-1) + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/ (frequencyEstimationPoints-1)));
AFline.append(String.format("%.8f", log10AlleleFrequencyPriors[i]) + "\t");
for ( char altAllele : BaseUtils.BASES ) {
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele != ref ) {
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
double PofDgivenAF = log10PofDgivenAFi[baseIndex][i];
@ -307,9 +307,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println(AFline);
}
for ( char altAllele : BaseUtils.BASES ) {
for ( byte altAllele : BaseUtils.BASES ) {
if ( altAllele != ref ) {
char base = Character.toLowerCase(altAllele);
char base = Character.toLowerCase((char)altAllele);
int baseIndex = BaseUtils.simpleBaseToBaseIndex(altAllele);
if ( MathUtils.compareDoubles(PofFs[baseIndex], 0.0) != 0 ) {
double phredScaledConfidence = QualityUtils.phredScaleErrorRate(alleleFrequencyPosteriors[baseIndex][0]);
@ -329,12 +329,12 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
verboseWriter.println();
}
protected Map<String, Genotype> makeGenotypeCalls(char ref, char alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
protected Map<String, Genotype> makeGenotypeCalls(byte ref, byte alt, int frequency, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc) {
// by default, we return no genotypes
return new HashMap<String, Genotype>();
}
protected VariantCallContext createCalls(RefMetaDataTracker tracker, char ref, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
protected VariantCallContext createCalls(RefMetaDataTracker tracker, byte ref, Map<String, StratifiedAlignmentContext> contexts, GenomeLoc loc, int frequencyEstimationPoints) {
// only need to look at the most likely alternate allele
int indexOfMax = BaseUtils.simpleBaseToBaseIndex(bestAlternateAllele);
@ -365,9 +365,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( beagleWriter != null ) {
beagleWriter.print(loc);
beagleWriter.print(' ');
beagleWriter.print(ref);
beagleWriter.print((char)ref);
beagleWriter.print(' ');
beagleWriter.print(bestAlternateAllele);
beagleWriter.print((char)((byte)bestAlternateAllele));
}
// populate the sample-specific data (output it to beagle also if requested)
@ -379,9 +379,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// next, the variant context data (alleles, attributes, etc.)
ArrayList<Allele> alleles = new ArrayList<Allele>();
alleles.add(new Allele(Character.toString(ref), true));
alleles.add(new Allele(ref, true));
if ( bestAFguess != 0 )
alleles.add(new Allele(bestAlternateAllele.toString(), false));
alleles.add(new Allele(bestAlternateAllele, false));
// *** note that calculating strand bias involves overwriting data structures, so we do that last
HashMap<String, Object> attributes = new HashMap<String, Object>();

View File

@ -29,7 +29,7 @@ public class OneStateErrorProbabilities extends FourBaseProbabilities {
* @param offset offset on read
* @return log10 likelihood
*/
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) {
return 0; // equivalent to e model
}
}

View File

@ -27,12 +27,12 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
private String bestEvent = null;
public VariantCallContext callLocus(RefMetaDataTracker tracker, char ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
public VariantCallContext callLocus(RefMetaDataTracker tracker, byte ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts, DiploidGenotypePriors priors) {
// cachedContext = contexts;
return null;
}
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, char[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) {
public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, byte[] ref, GenomeLoc loc, Map<String, StratifiedAlignmentContext> contexts) {
totalIndels = 0;
totalCoverage = 0;
@ -91,7 +91,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
return vcc;
}
protected void initializeAlleles(char [] ref, Map<String, StratifiedAlignmentContext> contexts) {
protected void initializeAlleles(byte[] ref, Map<String, StratifiedAlignmentContext> contexts) {
for ( String sample : contexts.keySet() ) {
@ -102,7 +102,7 @@ public class SimpleIndelCalculationModel extends GenotypeCalculationModel {
// calculate the sum of quality scores for each base
ReadBackedExtendedEventPileup pileup = context.getExtendedEventPileup();
List<Pair<String,Integer>> all_events = pileup.getEventStringsWithCounts(Utils.charSeq2byteSeq(ref));
List<Pair<String,Integer>> all_events = pileup.getEventStringsWithCounts(ref);
for ( Pair<String,Integer> p : all_events ) {
if ( p.second > bestIndelCount ) {
bestIndelCount = p.second;

View File

@ -32,7 +32,7 @@ public class ThreeStateErrorProbabilities extends FourBaseProbabilities {
* @param offset offset on read
* @return log10 likelihood
*/
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
protected double log10PofTrueBaseGivenMiscall(byte observedBase, byte chromBase, SAMRecord read, int offset) {
return -log103; // equivalent to e / 3 model
}
}

View File

@ -114,10 +114,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
// initialize the writers
if ( verboseWriter != null ) {
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
char base = Character.toUpperCase(altAllele);
header.append("POfDGivenAFFor" + base + "\t");
header.append("PosteriorAFFor" + base + "\t");
for ( byte altAllele : BaseUtils.BASES ) {
//char base = Character.toUpperCase(altAllele);
header.append("POfDGivenAFFor" + (char)altAllele + "\t");
header.append("PosteriorAFFor" + (char)altAllele + "\t");
}
verboseWriter.println(header);
}

View File

@ -159,7 +159,7 @@ public class UnifiedGenotyperEngine {
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
}
char ref = Character.toUpperCase(refContext.getBaseAsChar());
byte ref = refContext.getBase();
if ( !BaseUtils.isRegularBase(ref) )
return null;
@ -228,7 +228,7 @@ public class UnifiedGenotyperEngine {
}
}
if ( call != null ) call.setRefAllele(Character.toString(ref));
if ( call != null ) call.setRefAllele(ref);
return call;
}

View File

@ -33,7 +33,7 @@ public class VariantCallContext {
this.confidentlyCalled = confidentlyCalledP;
}
public void setRefAllele(String refAllele) {
this.refAllele = refAllele;
public void setRefAllele(byte refAllele) {
this.refAllele = new String(new byte[]{refAllele});
}
}

View File

@ -24,7 +24,7 @@ public class AlignedReadsHistoWalker extends ReadWalker<Integer, Integer> {
}
// Do we actually want to operate on the context?
public boolean filter(char[] ref, SAMRecord read) {
public boolean filter(byte[] ref, SAMRecord read) {
// we only want aligned reads
return !read.getReadUnmappedFlag();
}

View File

@ -464,9 +464,9 @@ class BaseTransitionTable implements Comparable {
public void print( PrintStream out ) {
StringBuilder s = new StringBuilder();
for ( char observedBase : BaseUtils.BASES ) {
for ( char refBase : BaseUtils.BASES ) {
s.append(String.format("%s\t%s",observedBase,refBase));
for ( byte observedBase : BaseUtils.BASES ) {
for ( byte refBase : BaseUtils.BASES ) {
s.append(String.format("%s\t%s",(char)observedBase,(char)refBase));
for ( Comparable c : conditions ) {
s.append(String.format("\t%s",c.toString()));
}

View File

@ -77,8 +77,8 @@ public class ProportionOfNonrefBasesSupportingSNP implements InfoFieldAnnotation
int[] counts = p.getBaseCounts();
int nonrefCounts = 0;
int snpCounts = counts[BaseUtils.simpleBaseToBaseIndex(snp)];
for ( char c : BaseUtils.BASES ) {
if ( ! BaseUtils.basesAreEqual((byte) c, (byte) ref) ) {
for ( byte c : BaseUtils.BASES ) {
if ( ! BaseUtils.basesAreEqual(c, (byte) ref) ) {
nonrefCounts += counts[BaseUtils.simpleBaseToBaseIndex(c)];
}
}

View File

@ -154,7 +154,7 @@ public class CalculateBaseLikelihoodsWalker extends LocusWalker<Integer, Pair<Lo
if (!ReadsToDiscard.contains(read.getReadName()) && BaseUtils.simpleBaseToBaseIndex(base) != -1) {
//consider base in likelihood calculations if it looks good and has high mapping score
G.add(base, qual, read, offset);
G.add((byte)base, qual, read, offset);
//if (DEBUG){
if (base == 'A'){numAs++;}
else if (base == 'C'){numCs++;}

View File

@ -369,7 +369,7 @@ public class CallHLAWalker extends LocusWalker<Integer, Pair<Long, Long>>{
}
}else{
//consider base in likelihood calculations if it looks good and has high mapping score
G.add(base, qual, read, offset);
G.add((byte)base, qual, read, offset);
readname = read.getReadName();
if (!AllReadNames.contains(readname)){AllReadNames.add(readname); AllReads.add(read);}
if (base == 'A'){numAs++; depth++;}

View File

@ -134,7 +134,7 @@ public class LocusMismatchWalker extends LocusWalker<String,Integer> implements
if ( nMismatches < maxNumMismatches && nMismatches >= minMismatches && usableDepth >= minDepth ) {
String baseCountString = "";
for ( char b : BaseUtils.BASES ) {
for ( byte b : BaseUtils.BASES ) {
baseCountString += baseCounts[BaseUtils.simpleBaseToBaseIndex(b)] + " ";
}
return String.format("%s %c %10s %5.2f %d %d %d %s",

View File

@ -8,11 +8,19 @@ import java.util.Random;
* BaseUtils contains some basic utilities for manipulating nucleotides.
*/
public class BaseUtils {
public final static byte A = (byte)'A';
public final static byte C = (byte)'C';
public final static byte G = (byte)'G';
public final static byte T = (byte)'T';
public final static byte N = (byte)'N';
public final static byte D = (byte)'D';
//
// todo -- we need a generalized base abstraction using the Base enum.
//
public final static char[] BASES = { 'A', 'C', 'G', 'T' };
public final static char[] EXTENDED_BASES = { 'A', 'C', 'G', 'T', 'N', 'D' };
public final static byte[] BASES = { 'A', 'C', 'G', 'T' };
public final static byte[] EXTENDED_BASES = { 'A', 'C', 'G', 'T', 'N', 'D' };
public enum Base {
A ( 'A', 0 ),
@ -175,8 +183,7 @@ public class BaseUtils {
}
}
@Deprecated
static public int extendedBaseToBaseIndex(char base) {
static public int extendedBaseToBaseIndex(byte base) {
switch (base) {
case 'd':
case 'D': return DELETION_INDEX;
@ -197,7 +204,7 @@ public class BaseUtils {
}
static public boolean isRegularBase(byte base) {
return isRegularBase((char)base);
return simpleBaseToBaseIndex(base) != -1;
}
@Deprecated

View File

@ -49,6 +49,7 @@ public enum DiploidGenotype {
public byte base1, base2;
@Deprecated
private DiploidGenotype(char base1, char base2) {
this((byte)base1, (byte)base2);
}
@ -87,7 +88,7 @@ public enum DiploidGenotype {
* @param hom the character to turn into a hom genotype, i.e. if it is A, then returned will be AA
* @return the diploid genotype
*/
public static DiploidGenotype createHomGenotype(char hom) {
public static DiploidGenotype createHomGenotype(byte hom) {
int index = BaseUtils.simpleBaseToBaseIndex(hom);
if ( index == -1 )
throw new IllegalArgumentException(hom + " is not a valid base character");
@ -100,7 +101,7 @@ public enum DiploidGenotype {
* @param base2 base2
* @return the diploid genotype
*/
public static DiploidGenotype createDiploidGenotype(char base1, char base2) {
public static DiploidGenotype createDiploidGenotype(byte base1, byte base2) {
int index1 = BaseUtils.simpleBaseToBaseIndex(base1);
if ( index1 == -1 )
throw new IllegalArgumentException(base1 + " is not a valid base character");

View File

@ -101,7 +101,7 @@ public class GeliTextWriter implements GeliGenotypeWriter {
double nextVrsBest = lks[9] - lks[8];
double nextVrsRef = 0;
if (ref != 'X')
nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype((byte)ref).ordinal()];
int readCount = 0;
double maxMappingQual = 0;

View File

@ -16,7 +16,7 @@ import java.util.Arrays;
* To change this template use File | Settings | File Templates.
*/
public class PileupElement {
public static final byte DELETION_BASE = 'D';
public static final byte DELETION_BASE = BaseUtils.D;
public static final byte DELETION_QUAL = 0;
protected SAMRecord read;

View File

@ -82,7 +82,7 @@ public class DiploidGenotypeUnitTest extends BaseTest {
@Test
public void testCreateGenotype() {
char ref = 'A';
byte ref = 'A';
DiploidGenotype g = DiploidGenotype.createHomGenotype(ref);
Assert.assertTrue("AA".equals(g.toString()));