From 445735a4a53f22a5ddd214b95f8e0f4eed8bd593 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Wed, 16 Jan 2013 11:10:13 -0500 Subject: [PATCH] There was no reason to be sharing the Haplotype infrastructure between the HaplotypeCaller and the HaplotypeScore annotation since they were really looking for different things. Separated them out, adding efficiencies for the HaplotypeScore version. --- .../walkers/annotator/HaplotypeScore.java | 57 +++++++++++++++---- .../SimpleDeBruijnAssembler.java | 2 +- .../broadinstitute/sting/utils/Haplotype.java | 49 ++-------------- 3 files changed, 52 insertions(+), 56 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java index fe4075117..af6304297 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java @@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.variant.utils.BaseUtils; -import org.broadinstitute.sting.utils.Haplotype; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.variant.vcf.VCFHeaderLineType; @@ -217,14 +216,14 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final Haplotype haplotype1 = consensusHaplotypeQueue.poll(); List hlist = new ArrayList(); - hlist.add(new Haplotype(haplotype1.getBases(), 60)); + hlist.add(new Haplotype(haplotype1.getBases(), (byte)60)); for (int k = 1; k < haplotypesToCompute; k++) { Haplotype haplotype2 = consensusHaplotypeQueue.poll(); if (haplotype2 == null) { haplotype2 = haplotype1; } // Sometimes only the reference haplotype can be found - hlist.add(new Haplotype(haplotype2.getBases(), 20)); + hlist.add(new Haplotype(haplotype2.getBases(), (byte)20)); } return hlist; } else @@ -236,8 +235,8 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final byte[] haplotypeBases = new byte[contextSize]; Arrays.fill(haplotypeBases, (byte) REGEXP_WILDCARD); - final double[] baseQualities = new double[contextSize]; - Arrays.fill(baseQualities, 0.0); + final byte[] baseQualities = new byte[contextSize]; + Arrays.fill(baseQualities, (byte)0); byte[] readBases = read.getReadBases(); readBases = AlignmentUtils.readToAlignmentByteArray(read.getCigar(), readBases); // Adjust the read bases based on the Cigar string @@ -267,7 +266,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot readQuals[baseOffset] = (byte) 0; } // quals less than 5 are used as codes and don't have actual probabilistic meaning behind them haplotypeBases[i] = readBases[baseOffset]; - baseQualities[i] = (double) readQuals[baseOffset]; + baseQualities[i] = readQuals[baseOffset]; } return new Haplotype(haplotypeBases, baseQualities); @@ -286,10 +285,10 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot final int length = a.length; final byte[] consensusChars = new byte[length]; - final double[] consensusQuals = new double[length]; + final byte[] consensusQuals = new byte[length]; - final double[] qualsA = haplotypeA.getQuals(); - final double[] qualsB = haplotypeB.getQuals(); + final byte[] qualsA = haplotypeA.getQuals(); + final byte[] qualsB = haplotypeB.getQuals(); for (int i = 0; i < length; i++) { chA = a[i]; @@ -300,7 +299,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot if ((chA == wc) && (chB == wc)) { consensusChars[i] = wc; - consensusQuals[i] = 0.0; + consensusQuals[i] = 0; } else if ((chA == wc)) { consensusChars[i] = chB; consensusQuals[i] = qualsB[i]; @@ -309,7 +308,7 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot consensusQuals[i] = qualsA[i]; } else { consensusChars[i] = chA; - consensusQuals[i] = qualsA[i] + qualsB[i]; + consensusQuals[i] = (byte)((int)qualsA[i] + (int)qualsB[i]); } } @@ -433,7 +432,6 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot } - public List getKeyNames() { return Arrays.asList("HaplotypeScore"); } @@ -441,4 +439,39 @@ public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnot public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine("HaplotypeScore", 1, VCFHeaderLineType.Float, "Consistency of the site with at most two segregating haplotypes")); } + + private static class Haplotype { + private final byte[] bases; + private final byte[] quals; + private int qualitySum = -1; + + public Haplotype( final byte[] bases, final byte[] quals ) { + this.bases = bases; + this.quals = quals; + } + + public Haplotype( final byte[] bases, final byte qual ) { + this.bases = bases; + quals = new byte[bases.length]; + Arrays.fill(quals, qual); + } + + public double getQualitySum() { + if ( qualitySum == -1 ) { + qualitySum = 0; + for ( final byte qual : quals ) { + qualitySum += (int)qual; + } + } + return qualitySum; + } + + public byte[] getQuals() { + return quals.clone(); + } + + public byte[] getBases() { + return bases.clone(); + } + } } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java index e1a94eee7..e16994fa4 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java @@ -338,7 +338,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine { for( final DefaultDirectedGraph graph : graphs ) { for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) { - final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() ); + final Haplotype h = new Haplotype( path.getBases( graph ) ); if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java index 8c40b9972..66aed1173 100644 --- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java +++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java @@ -37,8 +37,8 @@ import org.broadinstitute.variant.variantcontext.VariantContext; import java.io.Serializable; import java.util.*; -public class Haplotype extends Allele { - protected final double[] quals; +public class Haplotype extends Allele { + private GenomeLoc genomeLocation = null; private HashMap eventMap = null; private Cigar cigar; @@ -51,49 +51,23 @@ public class Haplotype extends Allele { * Main constructor * * @param bases bases - * @param quals quals * @param isRef is reference allele? */ - public Haplotype( final byte[] bases, final double[] quals, final boolean isRef ) { - super(bases.clone(), isRef); - this.quals = quals.clone(); - } - - /** - * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual - * - * @param bases bases - * @param qual qual - */ - public Haplotype( final byte[] bases, final int qual, final boolean isRef ) { - super(bases.clone(), isRef); - quals = new double[bases.length]; - Arrays.fill(quals, (double)qual); - } - - public Haplotype( final byte[] bases, final int qual ) { - this(bases, qual, false); - } - public Haplotype( final byte[] bases, final boolean isRef ) { - this(bases, 0, isRef); - } - - public Haplotype( final byte[] bases, final double[] quals ) { - this(bases, quals, false); + super(bases.clone(), isRef); } public Haplotype( final byte[] bases ) { - this(bases, 0, false); + this(bases, false); } protected Haplotype( final byte[] bases, final Event artificialEvent ) { - this(bases, 0, false); + this(bases, false); this.artificialEvent = artificialEvent; } public Haplotype( final byte[] bases, final GenomeLoc loc ) { - this(bases, 0, false); + this(bases, false); this.genomeLocation = loc; } @@ -110,22 +84,11 @@ public class Haplotype extends Allele { this.eventMap = eventMap; } - public double getQualitySum() { - double s = 0; - for (int k=0; k < quals.length; k++) { - s += quals[k]; - } - return s; - } - @Override public String toString() { return getDisplayString(); } - public double[] getQuals() { - return quals.clone(); - } public byte[] getBases() { return super.getBases().clone(); }