Multithreading support for the unified genotyper. Tests on a 10Mbase region on pilot 1 show a 6.8x improvement

when running 8 ways parallel.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2430 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-12-23 00:48:06 +00:00
parent 164a94a3d0
commit e29e8e52b9
2 changed files with 25 additions and 24 deletions

View File

@ -48,17 +48,17 @@ public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
} }
} }
private static SAMRecord lastReadForPL = null; private static ThreadLocal<SAMRecord> lastReadForPL = new ThreadLocal<SAMRecord>();
private static SequencerPlatform plOfLastRead = null; private static ThreadLocal<SequencerPlatform> plOfLastRead = new ThreadLocal<SequencerPlatform>();
public static SequencerPlatform getReadSequencerPlatform( SAMRecord read ) { public static SequencerPlatform getReadSequencerPlatform( SAMRecord read ) {
if ( lastReadForPL != read ) { if ( lastReadForPL.get() != read ) {
lastReadForPL = read; lastReadForPL.set(read);
SAMReadGroupRecord readGroup = read.getReadGroup(); SAMReadGroupRecord readGroup = read.getReadGroup();
final String platformName = readGroup == null ? null : readGroup.getPlatform(); final String platformName = readGroup == null ? null : readGroup.getPlatform();
plOfLastRead = standardizeSequencerPlatform(platformName); plOfLastRead.set(standardizeSequencerPlatform(platformName));
} }
return plOfLastRead; return plOfLastRead.get();
} }
public int getReadSequencerPlatformIndex( SAMRecord read ) { public int getReadSequencerPlatformIndex( SAMRecord read ) {

View File

@ -57,7 +57,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
public GenotypeWriter writer = null; public GenotypeWriter writer = null;
// the model used for calculating genotypes // the model used for calculating genotypes
private GenotypeCalculationModel gcm; private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
// samples in input // samples in input
private Set<String> samples; private Set<String> samples;
@ -75,7 +75,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
* *
**/ **/
public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) { public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
gcm.close(); //gcm.close();
this.UAC = UAC; this.UAC = UAC;
initialize(); initialize();
} }
@ -111,20 +111,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// for ( String sample : samples ) // for ( String sample : samples )
// logger.debug("SAMPLE: " + sample); // logger.debug("SAMPLE: " + sample);
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(writer != null) {
if(writer instanceof VCFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if(writer instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if(writer instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format);
// *** If we were called by another walker, then we don't *** // *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. *** // *** want to do any of the other initialization steps. ***
if ( writer == null ) if ( writer == null )
@ -191,6 +177,21 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
* @param rawContext contextual information around the locus * @param rawContext contextual information around the locus
*/ */
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) { public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(writer != null) {
if(writer instanceof VCFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if(writer instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if(writer instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
if(gcm.get() == null)
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format));
char ref = Character.toUpperCase(refContext.getBase()); char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) ) if ( !BaseUtils.isRegularBase(ref) )
return null; return null;
@ -218,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
return null; return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE); DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors); Pair<VariationCall, List<Genotype>> call = gcm.get().calculateGenotype(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible // annotate the call, if possible
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) { if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
@ -280,7 +281,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// Close any file writers // Close any file writers
public void onTraversalDone(Integer sum) { public void onTraversalDone(Integer sum) {
gcm.close(); //gcm.close();
logger.info("Processed " + sum + " loci that are callable for SNPs"); logger.info("Processed " + sum + " loci that are callable for SNPs");
} }