From a3aef8fa53c0c66b20023e4f448a6c3c18911eaa Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 22 Nov 2011 17:19:30 -0500 Subject: [PATCH] Final performance optimization for GenotypesContext --- .../varianteval/util/VariantEvalUtils.java | 2 +- .../variantcontext/GenotypesContext.java | 28 ++++++------------- .../utils/variantcontext/VariantContext.java | 8 ++++-- .../variantcontext/VariantContextUtils.java | 1 + 4 files changed, 16 insertions(+), 23 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java index aa246b58d..b319407d1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java @@ -267,7 +267,7 @@ public class VariantEvalUtils { * @return a new VariantContext with just the requested sample */ public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) { - return getSubsetOfVariantContext(vc, new HashSet(Arrays.asList(sampleName))); + return getSubsetOfVariantContext(vc, Collections.singleton(sampleName)); } /** diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 248fdad9d..845c65c9c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -646,28 +646,18 @@ public class GenotypesContext implements List { */ @Requires("samples != null") @Ensures("result != null") - public GenotypesContext subsetToSamples( final Collection samples ) { - return subsetToSamples(new HashSet(samples)); - } - - /** - * {@link #subsetToSamples(java.util.Collection)} - * @param samples - * @return - */ - @Requires("samples != null") - @Ensures("result != null") public GenotypesContext subsetToSamples( final Set samples ) { - if ( samples.size() == size() ) + final int nSamples = samples.size(); + final int nGenotypes = size(); + + if ( nSamples == nGenotypes ) return this; - else if ( samples.isEmpty() ) + else if ( nSamples == 0 ) return NO_GENOTYPES; - else { - GenotypesContext subset = create(samples.size()); - for ( final Genotype g : getGenotypes() ) { - if ( samples.contains(g.getSampleName()) ) { - subset.add(g); - } + else { // nGenotypes < nSamples + final GenotypesContext subset = create(samples.size()); + for ( final String sample : samples ) { + subset.add(get(sample)); } return subset; } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 331ca97d3..247e412dd 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -726,19 +726,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ public GenotypesContext getGenotypes(String sampleName) { - return getGenotypes(Arrays.asList(sampleName)); + return getGenotypes(Collections.singleton(sampleName)); } /** * Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map * for consistency with the multi-get function. * + * For testing convenience only + * * @param sampleNames a unique list of sample names * @return * @throws IllegalArgumentException if sampleName isn't bound to a genotype */ - public GenotypesContext getGenotypes(Collection sampleNames) { - return getGenotypes().subsetToSamples(sampleNames); + protected GenotypesContext getGenotypes(Collection sampleNames) { + return getGenotypes().subsetToSamples(new HashSet(sampleNames)); } public GenotypesContext getGenotypes(Set sampleNames) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java index 0d3f7fae7..91a018c4e 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java @@ -86,6 +86,7 @@ public class VariantContextUtils { for ( Allele allele : vc.getAlternateAlleles() ) { int altChromosomes = vc.getCalledChrCount(allele); alleleCounts.add(altChromosomes); + // todo -- this is a performance problem String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes)); alleleFreqs.add(freq); }