Threading support for beagle output files.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2569 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0513690416
commit
02e23e2d9c
|
|
@ -35,8 +35,8 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
protected double CONFIDENCE_THRESHOLD;
|
protected double CONFIDENCE_THRESHOLD;
|
||||||
protected double MINIMUM_ALLELE_FREQUENCY;
|
protected double MINIMUM_ALLELE_FREQUENCY;
|
||||||
protected boolean REPORT_SLOD;
|
protected boolean REPORT_SLOD;
|
||||||
protected PrintWriter verboseWriter;
|
protected PrintStream verboseWriter;
|
||||||
protected PrintWriter beagleWriter;
|
protected PrintStream beagleWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new GenotypeCalculationModel object
|
* Create a new GenotypeCalculationModel object
|
||||||
|
|
@ -58,8 +58,8 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
Logger logger,
|
Logger logger,
|
||||||
UnifiedArgumentCollection UAC,
|
UnifiedArgumentCollection UAC,
|
||||||
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
||||||
PrintWriter verboseWriter,
|
PrintStream verboseWriter,
|
||||||
PrintWriter beagleWriter) {
|
PrintStream beagleWriter) {
|
||||||
this.samples = new TreeSet<String>(samples);
|
this.samples = new TreeSet<String>(samples);
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
baseModel = UAC.baseModel;
|
baseModel = UAC.baseModel;
|
||||||
|
|
@ -73,22 +73,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
|
MINIMUM_ALLELE_FREQUENCY = UAC.MINIMUM_ALLELE_FREQUENCY;
|
||||||
REPORT_SLOD = ! UAC.NO_SLOD;
|
REPORT_SLOD = ! UAC.NO_SLOD;
|
||||||
this.verboseWriter = verboseWriter;
|
this.verboseWriter = verboseWriter;
|
||||||
if ( verboseWriter != null )
|
|
||||||
initializeVerboseWriter(verboseWriter);
|
|
||||||
this.beagleWriter = beagleWriter;
|
this.beagleWriter = beagleWriter;
|
||||||
if ( beagleWriter != null )
|
|
||||||
initializeBeagleWriter(beagleWriter);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void initializeVerboseWriter(PrintWriter writer) { }
|
|
||||||
|
|
||||||
protected void initializeBeagleWriter(PrintWriter writer) {
|
|
||||||
writer.print("marker alleleA alleleB");
|
|
||||||
for ( String sample : samples ) {
|
|
||||||
writer.print(' ');
|
|
||||||
writer.print(sample);
|
|
||||||
}
|
|
||||||
writer.println();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,7 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
|
||||||
public class GenotypeCalculationModelFactory {
|
public class GenotypeCalculationModelFactory {
|
||||||
|
|
@ -60,8 +60,8 @@ public class GenotypeCalculationModelFactory {
|
||||||
Logger logger,
|
Logger logger,
|
||||||
UnifiedArgumentCollection UAC,
|
UnifiedArgumentCollection UAC,
|
||||||
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
||||||
PrintWriter verboseWriter,
|
PrintStream verboseWriter,
|
||||||
PrintWriter beagleWriter) {
|
PrintStream beagleWriter) {
|
||||||
GenotypeCalculationModel gcm;
|
GenotypeCalculationModel gcm;
|
||||||
switch ( UAC.genotypeModel ) {
|
switch ( UAC.genotypeModel ) {
|
||||||
case EM_POINT_ESTIMATE:
|
case EM_POINT_ESTIMATE:
|
||||||
|
|
|
||||||
|
|
@ -107,16 +107,6 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void initializeVerboseWriter(PrintWriter verboseWriter) {
|
|
||||||
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
|
|
||||||
for ( char altAllele : BaseUtils.BASES ) {
|
|
||||||
char base = Character.toLowerCase(altAllele);
|
|
||||||
header.append("POfDGivenAFFor" + base + "\t");
|
|
||||||
header.append("PosteriorAFFor" + base + "\t");
|
|
||||||
}
|
|
||||||
verboseWriter.println(header);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
|
protected void initialize(char ref, Map<String, StratifiedAlignmentContext> contexts, StratifiedAlignmentContext.StratifiedContextType contextType) {
|
||||||
// by default, no initialization is done
|
// by default, no initialization is done
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,7 @@ import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.io.PrintWriter;
|
import java.io.PrintStream;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -57,15 +56,10 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
public GenotypeWriter writer = null;
|
public GenotypeWriter writer = null;
|
||||||
|
|
||||||
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||||
public String VERBOSE = null;
|
public PrintStream verboseWriter = null;
|
||||||
|
|
||||||
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
|
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
|
||||||
public String BEAGLE = null;
|
public PrintStream beagleWriter = null;
|
||||||
|
|
||||||
|
|
||||||
// the verbose and beagle writers
|
|
||||||
private PrintWriter verboseWriter = null;
|
|
||||||
private PrintWriter beagleWriter = null;
|
|
||||||
|
|
||||||
// the model used for calculating genotypes
|
// the model used for calculating genotypes
|
||||||
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
||||||
|
|
@ -109,7 +103,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
|
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
|
||||||
throw new IllegalArgumentException(sb.toString());
|
throw new IllegalArgumentException(sb.toString());
|
||||||
}
|
}
|
||||||
if ( BEAGLE != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
if ( beagleWriter != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
||||||
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
|
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -118,11 +112,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
|
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
|
||||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||||
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
||||||
|
|
||||||
// TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads
|
|
||||||
// no VERBOSE because we'd need to deal with parallelizing the writing
|
|
||||||
if ( VERBOSE != null || BEAGLE != null )
|
|
||||||
throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
||||||
|
|
@ -149,13 +138,24 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize the writers
|
// initialize the writers
|
||||||
try {
|
if ( verboseWriter != null ) {
|
||||||
if ( VERBOSE != null )
|
if(UAC.genotypeModel != GenotypeCalculationModel.Model.EM_POINT_ESTIMATE) {
|
||||||
verboseWriter = new PrintWriter(VERBOSE);
|
StringBuilder header = new StringBuilder("AFINFO\tLOC\tMAF\tF\tNullAFpriors\t");
|
||||||
if ( BEAGLE != null )
|
for ( char altAllele : BaseUtils.BASES ) {
|
||||||
beagleWriter = new PrintWriter(BEAGLE);
|
char base = Character.toLowerCase(altAllele);
|
||||||
} catch (FileNotFoundException e) {
|
header.append("POfDGivenAFFor" + base + "\t");
|
||||||
throw new StingException("UnifiedGenotyper [verbose/beagle]: could not open file for writing");
|
header.append("PosteriorAFFor" + base + "\t");
|
||||||
|
}
|
||||||
|
verboseWriter.println(header);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ( beagleWriter != null ) {
|
||||||
|
beagleWriter.print("marker alleleA alleleB");
|
||||||
|
for ( String sample : samples ) {
|
||||||
|
beagleWriter.print(' ');
|
||||||
|
beagleWriter.print(sample);
|
||||||
|
}
|
||||||
|
beagleWriter.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
// *** If we were called by another walker, then we don't ***
|
// *** If we were called by another walker, then we don't ***
|
||||||
|
|
@ -328,11 +328,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
|
|
||||||
// Close any file writers
|
// Close any file writers
|
||||||
public void onTraversalDone(Integer sum) {
|
public void onTraversalDone(Integer sum) {
|
||||||
if ( verboseWriter != null )
|
|
||||||
verboseWriter.close();
|
|
||||||
if ( beagleWriter != null )
|
|
||||||
beagleWriter.close();
|
|
||||||
|
|
||||||
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue