Threading support for beagle output files.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2569 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2010-01-13 02:42:16 +00:00
parent 0513690416
commit 02e23e2d9c
4 changed files with 29 additions and 59 deletions

View File

@ -35,8 +35,8 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double CONFIDENCE_THRESHOLD; protected double CONFIDENCE_THRESHOLD;
protected double MINIMUM_ALLELE_FREQUENCY; protected double MINIMUM_ALLELE_FREQUENCY;
protected boolean REPORT_SLOD; protected boolean REPORT_SLOD;
protected PrintWriter verboseWriter; protected PrintStream verboseWriter;
protected PrintWriter beagleWriter; protected PrintStream beagleWriter;
/** /**
* Create a new GenotypeCalculationModel object * Create a new GenotypeCalculationModel object
@ -58,8 +58,8 @@ public abstract class GenotypeCalculationModel implements Cloneable {
Logger logger, Logger logger,
UnifiedArgumentCollection UAC, UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintWriter verboseWriter, PrintStream verboseWriter,
PrintWriter beagleWriter) { PrintStream beagleWriter) {
this.samples = new TreeSet<String>(samples); this.samples = new TreeSet<String>(samples);
this.logger = logger; this.logger = logger;
baseModel = UAC.baseModel; baseModel = UAC.baseModel;
@ -73,22 +73,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY; MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
REPORT_SLOD = ! UAC.NO_SLOD; REPORT_SLOD = ! UAC.NO_SLOD;
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
if ( verboseWriter != null )
initializeVerboseWriter(verboseWriter);
this.beagleWriter = beagleWriter; this.beagleWriter = beagleWriter;
if ( beagleWriter != null )
initializeBeagleWriter(beagleWriter);
}
protected void initializeVerboseWriter(PrintWriter writer) { }
protected void initializeBeagleWriter(PrintWriter writer) {
writer.print("marker alleleA alleleB");
for ( String sample : samples ) {
writer.print(' ');
writer.print(sample);
}
writer.println();
} }
/** /**

View File

@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import java.util.Set; import java.util.Set;
import java.io.PrintWriter; import java.io.PrintStream;
public class GenotypeCalculationModelFactory { public class GenotypeCalculationModelFactory {
@ -60,8 +60,8 @@ public class GenotypeCalculationModelFactory {
Logger logger, Logger logger,
UnifiedArgumentCollection UAC, UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintWriter verboseWriter, PrintStream verboseWriter,
PrintWriter beagleWriter) { PrintStream beagleWriter) {
GenotypeCalculationModel gcm; GenotypeCalculationModel gcm;
switch ( UAC.genotypeModel ) { switch ( UAC.genotypeModel ) {
case EM_POINT_ESTIMATE: case EM_POINT_ESTIMATE:

View File

@ -107,16 +107,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
} }
} }
protected void initializeVerboseWriter(PrintWriter verboseWriter) {
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
for ( char altAllele : BaseUtils.BASES ) {
char base = Character.toLowerCase(altAllele);
header.append("POfDGivenAFFor" + base + "\t");
header.append("PosteriorAFFor" + base + "\t");
}
verboseWriter.println(header);
}
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) { protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
// by default, no initialization is done // by default, no initialization is done
return; return;

View File

@ -39,8 +39,7 @@ import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
import org.broadinstitute.sting.utils.genotype.vcf.*; import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*; import java.util.*;
import java.io.PrintWriter; import java.io.PrintStream;
import java.io.FileNotFoundException;
/** /**
@ -57,15 +56,10 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
public GenotypeWriter writer = null; public GenotypeWriter writer = null;
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false) @Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
public String VERBOSE = null; public PrintStream verboseWriter = null;
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false) @Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
public String BEAGLE = null; public PrintStream beagleWriter = null;
// the verbose and beagle writers
private PrintWriter verboseWriter = null;
private PrintWriter beagleWriter = null;
// the model used for calculating genotypes // the model used for calculating genotypes
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>(); private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
@ -109,7 +103,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff"); sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
throw new IllegalArgumentException(sb.toString()); throw new IllegalArgumentException(sb.toString());
} }
if ( BEAGLE != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) { if ( beagleWriter != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model."); throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
} }
@ -118,11 +112,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name // no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
if ( UAC.ASSUME_SINGLE_SAMPLE != null ) if ( UAC.ASSUME_SINGLE_SAMPLE != null )
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads"); throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
// TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads
// no VERBOSE because we'd need to deal with parallelizing the writing
if ( VERBOSE != null || BEAGLE != null )
throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads");
} }
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names // get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
@ -149,13 +138,24 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
} }
// initialize the writers // initialize the writers
try { if ( verboseWriter != null ) {
if ( VERBOSE != null ) if(UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE) {
verboseWriter = new PrintWriter(VERBOSE); StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
if ( BEAGLE != null ) for ( char altAllele : BaseUtils.BASES ) {
beagleWriter = new PrintWriter(BEAGLE); char base = Character.toLowerCase(altAllele);
} catch (FileNotFoundException e) { header.append("POfDGivenAFFor" + base + "\t");
throw new StingException("UnifiedGenotyper [verbose/beagle]: could not open file for writing"); header.append("PosteriorAFFor" + base + "\t");
}
verboseWriter.println(header);
}
}
if ( beagleWriter != null ) {
beagleWriter.print("marker alleleA alleleB");
for ( String sample : samples ) {
beagleWriter.print(' ');
beagleWriter.print(sample);
}
beagleWriter.println();
} }
// *** If we were called by another walker, then we don't *** // *** If we were called by another walker, then we don't ***
@ -328,11 +328,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
// Close any file writers // Close any file writers
public void onTraversalDone(Integer sum) { public void onTraversalDone(Integer sum) {
if ( verboseWriter != null )
verboseWriter.close();
if ( beagleWriter != null )
beagleWriter.close();
logger.info("Processed " + sum + " loci that are callable for SNPs"); logger.info("Processed " + sum + " loci that are callable for SNPs");
} }
} }