Added -beagle option to emit likelihoods file for use with the BEAGLE imputation engine; still experimental.

(Also converted getPileup -> getBasePileup)


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2549 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-08 18:41:04 +00:00
parent 9cbae53ee1
commit fcce77c245
7 changed files with 85 additions and 21 deletions

View File

@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
for ( String sample : contexts.keySet() ) {
StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
@ -109,7 +109,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {
@ -128,6 +128,24 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
calls.add(call);
}
// output to beagle file if requested
if ( beagleWriter != null ) {
for ( String sample : samples ) {
GenotypeLikelihoods gl = GLs.get(sample);
if ( gl == null ) {
beagleWriter.print(" 0.0 0.0 0.0");
continue;
}
double[] likelihoods = gl.getLikelihoods();
beagleWriter.print(' ');
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.REF.ordinal()])));
beagleWriter.print(' ');
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HET.ordinal()])));
beagleWriter.print(' ');
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HOM.ordinal()])));
}
}
return calls;
}

View File

@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
call.setGenotype(bestGenotype);
if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
((ReadBacked)call).setPileup(pileup);
}
if ( call instanceof SampleBacked ) {

View File

@ -36,6 +36,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double MINIMUM_ALLELE_FREQUENCY;
protected boolean REPORT_SLOD;
protected PrintWriter verboseWriter;
protected PrintWriter beagleWriter;
/**
* Create a new GenotypeCalculationModel object
@ -51,12 +52,14 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @param UAC unified arg collection
* @param outputFormat output format
* @param verboseWriter verbose writer
* @param beagleWriter beagle writer
*/
protected void initialize(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintWriter verboseWriter) {
PrintWriter verboseWriter,
PrintWriter beagleWriter) {
this.samples = new TreeSet<String>(samples);
this.logger = logger;
baseModel = UAC.baseModel;
@ -72,9 +75,21 @@ public abstract class GenotypeCalculationModel implements Cloneable {
this.verboseWriter = verboseWriter;
if ( verboseWriter != null )
initializeVerboseWriter(verboseWriter);
this.beagleWriter = beagleWriter;
if ( beagleWriter != null )
initializeBeagleWriter(beagleWriter);
}
protected void initializeVerboseWriter(PrintWriter writer) { };
protected void initializeVerboseWriter(PrintWriter writer) { }
protected void initializeBeagleWriter(PrintWriter writer) {
writer.print("marker alleleA alleleB");
for ( String sample : samples ) {
writer.print(' ');
writer.print(sample);
}
writer.println();
}
/**
* Must be overridden by concrete subclasses

View File

@ -51,13 +51,17 @@ public class GenotypeCalculationModelFactory {
* @param logger logger
* @param UAC the unified argument collection
* @param outputFormat the output format
* @param verboseWriter verbose writer
* @param beagleWriter beagle writer
*
* @return model
*/
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
Logger logger,
UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintWriter verboseWriter) {
PrintWriter verboseWriter,
PrintWriter beagleWriter) {
GenotypeCalculationModel gcm;
switch ( UAC.genotypeModel ) {
case EM_POINT_ESTIMATE:
@ -72,7 +76,7 @@ public class GenotypeCalculationModelFactory {
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
}
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter);
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter, beagleWriter);
return gcm;
}
}

View File

@ -81,7 +81,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
// calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup();
ReadBackedPileup pileup = context.getBasePileup();
for ( PileupElement p : pileup ) {
// ignore deletions
if ( p.isDeletion() )
@ -341,9 +341,23 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<VariationCall, List<Genotype>>(null, null);
// populate the sample-specific data
// output to beagle file if requested
if ( beagleWriter != null ) {
beagleWriter.print(loc);
beagleWriter.print(' ');
beagleWriter.print(ref);
beagleWriter.print(' ');
beagleWriter.print(bestAlternateAllele);
}
// populate the sample-specific data (output it to beagle also if requested)
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
// close beagle record (if requested)
if ( beagleWriter != null ) {
beagleWriter.println();
}
// next, the general locus data
// *** note that calculating strand bias involves overwriting data structures, so we do that last
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);

View File

@ -113,7 +113,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
// create the pileup
AlignmentContext myContext = sampleContext.getContext(contextType);
ReadBackedPileup pileup = myContext.getPileup();
ReadBackedPileup pileup = myContext.getBasePileup();
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
@ -137,7 +137,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
for ( String sample : contexts.keySet() ) {
StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
// create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);

View File

@ -59,9 +59,13 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
public String VERBOSE = null;
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
public String BEAGLE = null;
// the verbose writer
// the verbose and beagle writers
private PrintWriter verboseWriter = null;
private PrintWriter beagleWriter = null;
// the model used for calculating genotypes
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
@ -105,15 +109,20 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
throw new IllegalArgumentException(sb.toString());
}
if ( BEAGLE != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
}
// some arguments can't be handled (at least for now) while we are multi-threaded
if ( getToolkit().getArguments().numberOfThreads > 1 ) {
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
// TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads
// no VERBOSE because we'd need to deal with parallelizing the writing
if ( VERBOSE != null )
throw new IllegalArgumentException("For technical reasons, the VERBOSE argument cannot be used with multiple threads");
if ( VERBOSE != null || BEAGLE != null )
throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads");
}
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
@ -139,14 +148,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
}
// initialize the verbose writer
if ( VERBOSE != null ) {
try {
// initialize the writers
try {
if ( VERBOSE != null )
verboseWriter = new PrintWriter(VERBOSE);
} catch (FileNotFoundException e) {
throw new StingException("Could not open file " + VERBOSE + " for writing");
}
if ( BEAGLE != null )
beagleWriter = new PrintWriter(BEAGLE);
} catch (FileNotFoundException e) {
throw new StingException("UnifiedGenotyper [verbose/beagle]: could not open file for writing");
}
// *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. ***
if ( writer == null )
@ -220,7 +231,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
}
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter));
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
}
char ref = Character.toUpperCase(refContext.getBase());
@ -319,6 +330,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
public void onTraversalDone(Integer sum) {
if ( verboseWriter != null )
verboseWriter.close();
if ( beagleWriter != null )
beagleWriter.close();
logger.info("Processed " + sum + " loci that are callable for SNPs");
}