Multithreading support for the unified genotyper. Tests on a 10Mbase region on pilot 1 show a 6.8x improvement

when running 8 ways parallel.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2430 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2009-12-23 00:48:06 +00:00
parent 164a94a3d0
commit e29e8e52b9
2 changed files with 25 additions and 24 deletions

View File

@ -48,17 +48,17 @@ public class EmpiricalSubstitutionProbabilities extends FourBaseProbabilities {
}
}
private static SAMRecord lastReadForPL = null;
private static SequencerPlatform plOfLastRead = null;
private static ThreadLocal<SAMRecord> lastReadForPL = new ThreadLocal<SAMRecord>();
private static ThreadLocal<SequencerPlatform> plOfLastRead = new ThreadLocal<SequencerPlatform>();
public static SequencerPlatform getReadSequencerPlatform( SAMRecord read ) {
if ( lastReadForPL != read ) {
lastReadForPL = read;
if ( lastReadForPL.get() != read ) {
lastReadForPL.set(read);
SAMReadGroupRecord readGroup = read.getReadGroup();
final String platformName = readGroup == null ? null : readGroup.getPlatform();
plOfLastRead = standardizeSequencerPlatform(platformName);
plOfLastRead.set(standardizeSequencerPlatform(platformName));
}
return plOfLastRead;
return plOfLastRead.get();
}
public int getReadSequencerPlatformIndex( SAMRecord read ) {

View File

@ -57,7 +57,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
public GenotypeWriter writer = null;
// the model used for calculating genotypes
private GenotypeCalculationModel gcm;
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
// samples in input
private Set<String> samples;
@ -75,7 +75,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
*
**/
public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
gcm.close();
//gcm.close();
this.UAC = UAC;
initialize();
}
@ -111,20 +111,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// for ( String sample : samples )
// logger.debug("SAMPLE: " + sample);
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(writer != null) {
if(writer instanceof VCFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if(writer instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if(writer instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
gcm = GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format);
// *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. ***
if ( writer == null )
@ -191,6 +177,21 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
* @param rawContext contextual information around the locus
*/
public Pair<VariationCall, List<Genotype>> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext rawContext) {
GenotypeWriterFactory.GENOTYPE_FORMAT format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
if(writer != null) {
if(writer instanceof VCFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
else if(writer instanceof GLFGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
else if(writer instanceof GeliGenotypeWriter)
format = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
if(gcm.get() == null)
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format));
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
@ -218,7 +219,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
return null;
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, UAC.heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
Pair<VariationCall, List<Genotype>> call = gcm.calculateGenotype(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
Pair<VariationCall, List<Genotype>> call = gcm.get().calculateGenotype(tracker, ref, rawContext.getLocation(), stratifiedContexts, priors);
// annotate the call, if possible
if ( call != null && call.first != null && call.first instanceof ArbitraryFieldsBacked ) {
@ -280,7 +281,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// Close any file writers
public void onTraversalDone(Integer sum) {
gcm.close();
//gcm.close();
logger.info("Processed " + sum + " loci that are callable for SNPs");
}