From a205900effd37cfc6931dffbdea55fccb2e9af61 Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 20 Oct 2010 03:32:21 +0000 Subject: [PATCH] Naughty use of Strings in HaplotypeScore literally double the runtime of Unified Genotyper. Moved over to bytes and no longer allow Strings in the Haplotype util class. New round of profiling on tap for tomorrow. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4528 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/HaplotypeScore.java | 36 ++++++++----------- .../sting/utils/genotype/Haplotype.java | 10 ------ 2 files changed, 14 insertions(+), 32 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index bfa1c4db5..d27fec8fb 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -44,7 +44,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils; public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { private final static boolean DEBUG = false; private final static int MIN_CONTEXT_WING_SIZE = 10; - private final static String REGEXP_WILDCARD = "."; + private final static char REGEXP_WILDCARD = '.'; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { if ( !vc.isBiallelic() || !vc.isSNP() || stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here @@ -82,7 +82,6 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { } } - private List computeHaplotypes(ReadBackedPileup pileup, int contextSize) { // Compute all possible haplotypes consistent with current pileup ArrayList haplotypeList = new ArrayList(); @@ -168,7 +167,7 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { byte[] haplotypeBases = new byte[contextSize]; for(int i=0; i < contextSize; i++) { - haplotypeBases[i] = REGEXP_WILDCARD.getBytes()[0]; + haplotypeBases[i] = (byte)REGEXP_WILDCARD; } double[] baseQualities = new double[contextSize]; @@ -185,30 +184,26 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { baseQualities[i] = (double)read.getBaseQualities()[baseOffset]; } - return new Haplotype(haplotypeBases, baseQualities); - } - - private Haplotype getConsensusHaplotype(Haplotype haplotypeA, Haplotype haplotypeB) { - String a = haplotypeA.toString(); - String b = haplotypeB.toString(); + byte[] a = haplotypeA.getBasesAsBytes(); + byte[] b = haplotypeB.getBasesAsBytes(); - if (a.length() != b.length()) + if (a.length != b.length) throw new ReviewedStingException("Haplotypes a and b must be of same length"); - char chA, chB; - char wc = REGEXP_WILDCARD.charAt(0); + byte chA, chB; + byte wc = (byte)REGEXP_WILDCARD; + final int length = a.length; + byte[] consensusChars = new byte[length]; + double[] consensusQuals = new double[length]; - char[] consensusChars = new char[a.length()]; - double[] consensusQuals = new double[a.length()]; - - for (int i=0; i < a.length(); i++) { - chA = a.charAt(i); - chB = b.charAt(i); + for (int i=0; i < length; i++) { + chA = a[i]; + chB = b[i]; if ((chA != chB) && (chA != wc) && (chB != wc)) return null; @@ -228,12 +223,9 @@ public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { consensusChars[i] = chA; consensusQuals[i] = haplotypeA.getQuals()[i]+haplotypeB.getQuals()[i]; } - - } - - return new Haplotype(new String(consensusChars), consensusQuals); + return new Haplotype(consensusChars, consensusQuals); } // calculate the haplotype scores by walking over all reads and comparing them to the haplotypes private double scoreReadsAgainstHaplotypes(List haplotypes, ReadBackedPileup pileup, int contextSize) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java index 53a5b9bf5..c01596ae7 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Haplotype.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Set; public class Haplotype { protected byte[] bases = null; @@ -59,11 +58,6 @@ public class Haplotype { this.quals = quals; } - public Haplotype(String bases, double[] quals) { - this.bases = bases.getBytes(); - this.quals = quals; - } - public Haplotype(byte[] bases) { this(bases, 0); } @@ -78,9 +72,6 @@ public class Haplotype { this.isReference = isRef; } - - public String toString() { return new String(this.bases); } - public double getQualitySum() { double s = 0; for (int k=0; k < bases.length; k++) { @@ -104,7 +95,6 @@ public class Haplotype { return isReference; } - public static List makeHaplotypeListFromVariantContextAlleles(VariantContext vc, ReferenceContext ref, final int haplotypeSize) {