Naughty use of Strings in HaplotypeScore literally double the runtime of Unified Genotyper. Moved over to bytes and no longer allow Strings in the Haplotype util class. New round of profiling on tap for tomorrow.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4528 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-10-20 03:32:21 +00:00
parent f9541b78d3
commit a205900eff
2 changed files with 14 additions and 32 deletions

View File

@ -44,7 +44,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static String REGEXP_WILDCARD = ".";
private final static char REGEXP_WILDCARD = '.';
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, StratifiedAlignmentContext> stratifiedContexts, VariantContext vc) {
if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
@ -82,7 +82,6 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
}
}
private List<Haplotype> computeHaplotypes(ReadBackedPileup pileup, int contextSize) {
// Compute all possible haplotypes consistent with current pileup
ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
@ -168,7 +167,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
byte[] haplotypeBases = new byte[contextSize];
for(int i=0; i < contextSize; i++) {
haplotypeBases[i] = REGEXP_WILDCARD.getBytes()[0];
haplotypeBases[i] = (byte)REGEXP_WILDCARD;
}
double[] baseQualities = new double[contextSize];
@ -185,30 +184,26 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
baseQualities[i] = (double)read.getBaseQualities()[baseOffset];
}
return new Haplotype(haplotypeBases, baseQualities);
}
private Haplotype getConsensusHaplotype(Haplotype haplotypeA, Haplotype haplotypeB) {
String a = haplotypeA.toString();
String b = haplotypeB.toString();
byte[] a = haplotypeA.getBasesAsBytes();
byte[] b = haplotypeB.getBasesAsBytes();
if (a.length() != b.length())
if (a.length != b.length)
throw new ReviewedStingException("Haplotypes a and b must be of same length");
char chA, chB;
char wc = REGEXP_WILDCARD.charAt(0);
byte chA, chB;
byte wc = (byte)REGEXP_WILDCARD;
final int length = a.length;
byte[] consensusChars = new byte[length];
double[] consensusQuals = new double[length];
char[] consensusChars = new char[a.length()];
double[] consensusQuals = new double[a.length()];
for (int i=0; i < a.length(); i++) {
chA = a.charAt(i);
chB = b.charAt(i);
for (int i=0; i < length; i++) {
chA = a[i];
chB = b[i];
if ((chA != chB) && (chA != wc) && (chB != wc))
return null;
@ -228,12 +223,9 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
consensusChars[i] = chA;
consensusQuals[i] = haplotypeA.getQuals()[i]+haplotypeB.getQuals()[i];
}
}
return new Haplotype(new String(consensusChars), consensusQuals);
return new Haplotype(consensusChars, consensusQuals);
}
// calculate the haplotype scores by walking over all reads and comparing them to the haplotypes
private double scoreReadsAgainstHaplotypes(List<Haplotype> haplotypes, ReadBackedPileup pileup, int contextSize) {

View File

@ -33,7 +33,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
public class Haplotype {
protected byte[] bases = null;
@ -59,11 +58,6 @@ public class Haplotype {
this.quals = quals;
}
public Haplotype(String bases, double[] quals) {
this.bases = bases.getBytes();
this.quals = quals;
}
public Haplotype(byte[] bases) {
this(bases, 0);
}
@ -78,9 +72,6 @@ public class Haplotype {
this.isReference = isRef;
}
public String toString() { return new String(this.bases); }
public double getQualitySum() {
double s = 0;
for (int k=0; k < bases.length; k++) {
@ -104,7 +95,6 @@ public class Haplotype {
return isReference;
}
public static List<Haplotype> makeHaplotypeListFromVariantContextAlleles(VariantContext vc, ReferenceContext ref, final int haplotypeSize) {