Final performance optimization for GenotypesContext

This commit is contained in:
Mark DePristo 2011-11-22 17:19:30 -05:00
parent 990c02e4de
commit a3aef8fa53
4 changed files with 16 additions and 23 deletions

View File

@ -267,7 +267,7 @@ public class VariantEvalUtils {
* @return a new VariantContext with just the requested sample
*/
public VariantContext getSubsetOfVariantContext(VariantContext vc, String sampleName) {
return getSubsetOfVariantContext(vc, new HashSet<String>(Arrays.asList(sampleName)));
return getSubsetOfVariantContext(vc, Collections.singleton(sampleName));
}
/**

View File

@ -646,28 +646,18 @@ public class GenotypesContext implements List<Genotype> {
*/
@Requires("samples != null")
@Ensures("result != null")
public GenotypesContext subsetToSamples( final Collection<String> samples ) {
return subsetToSamples(new HashSet<String>(samples));
}
/**
* {@link #subsetToSamples(java.util.Collection)}
* @param samples
* @return
*/
@Requires("samples != null")
@Ensures("result != null")
public GenotypesContext subsetToSamples( final Set<String> samples ) {
if ( samples.size() == size() )
final int nSamples = samples.size();
final int nGenotypes = size();
if ( nSamples == nGenotypes )
return this;
else if ( samples.isEmpty() )
else if ( nSamples == 0 )
return NO_GENOTYPES;
else {
GenotypesContext subset = create(samples.size());
for ( final Genotype g : getGenotypes() ) {
if ( samples.contains(g.getSampleName()) ) {
subset.add(g);
}
else { // nGenotypes < nSamples
final GenotypesContext subset = create(samples.size());
for ( final String sample : samples ) {
subset.add(get(sample));
}
return subset;
}

View File

@ -726,19 +726,21 @@ public class VariantContext implements Feature { // to enable tribble intergrati
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public GenotypesContext getGenotypes(String sampleName) {
return getGenotypes(Arrays.asList(sampleName));
return getGenotypes(Collections.singleton(sampleName));
}
/**
* Returns a map from sampleName -> Genotype for each sampleName in sampleNames. Returns a map
* for consistency with the multi-get function.
*
* For testing convenience only
*
* @param sampleNames a unique list of sample names
* @return
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
public GenotypesContext getGenotypes(Collection<String> sampleNames) {
return getGenotypes().subsetToSamples(sampleNames);
protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
return getGenotypes().subsetToSamples(new HashSet<String>(sampleNames));
}
public GenotypesContext getGenotypes(Set<String> sampleNames) {

View File

@ -86,6 +86,7 @@ public class VariantContextUtils {
for ( Allele allele : vc.getAlternateAlleles() ) {
int altChromosomes = vc.getCalledChrCount(allele);
alleleCounts.add(altChromosomes);
// todo -- this is a performance problem
String freq = String.format(makePrecisionFormatStringFromDenominatorValue(totalChromosomes), ((double)altChromosomes / totalChromosomes));
alleleFreqs.add(freq);
}