Added -beagle option to emit likelihoods file for use with the BEAGLE imputation engine; still experimental.

(Also converted getPileup -> getBasePileup)


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2549 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-01-08 18:41:04 +00:00
parent 9cbae53ee1
commit fcce77c245
7 changed files with 85 additions and 21 deletions

View File

@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
for ( String sample : contexts.keySet() ) { for ( String sample : contexts.keySet() ) {
StratifiedAlignmentContext context = contexts.get(sample); StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup(); ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
// create the GenotypeLikelihoods object // create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
@ -109,7 +109,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
if ( call instanceof ReadBacked ) { if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
((ReadBacked)call).setPileup(pileup); ((ReadBacked)call).setPileup(pileup);
} }
if ( call instanceof SampleBacked ) { if ( call instanceof SampleBacked ) {
@ -128,6 +128,24 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
calls.add(call); calls.add(call);
} }
// output to beagle file if requested
if ( beagleWriter != null ) {
for ( String sample : samples ) {
GenotypeLikelihoods gl = GLs.get(sample);
if ( gl == null ) {
beagleWriter.print(" 0.0 0.0 0.0");
continue;
}
double[] likelihoods = gl.getLikelihoods();
beagleWriter.print(' ');
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.REF.ordinal()])));
beagleWriter.print(' ');
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HET.ordinal()])));
beagleWriter.print(' ');
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HOM.ordinal()])));
}
}
return calls; return calls;
} }

View File

@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
call.setGenotype(bestGenotype); call.setGenotype(bestGenotype);
if ( call instanceof ReadBacked ) { if ( call instanceof ReadBacked ) {
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
((ReadBacked)call).setPileup(pileup); ((ReadBacked)call).setPileup(pileup);
} }
if ( call instanceof SampleBacked ) { if ( call instanceof SampleBacked ) {

View File

@ -36,6 +36,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
protected double MINIMUM_ALLELE_FREQUENCY; protected double MINIMUM_ALLELE_FREQUENCY;
protected boolean REPORT_SLOD; protected boolean REPORT_SLOD;
protected PrintWriter verboseWriter; protected PrintWriter verboseWriter;
protected PrintWriter beagleWriter;
/** /**
* Create a new GenotypeCalculationModel object * Create a new GenotypeCalculationModel object
@ -51,12 +52,14 @@ public abstract class GenotypeCalculationModel implements Cloneable {
* @param UAC unified arg collection * @param UAC unified arg collection
* @param outputFormat output format * @param outputFormat output format
* @param verboseWriter verbose writer * @param verboseWriter verbose writer
* @param beagleWriter beagle writer
*/ */
protected void initialize(Set<String> samples, protected void initialize(Set<String> samples,
Logger logger, Logger logger,
UnifiedArgumentCollection UAC, UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintWriter verboseWriter) { PrintWriter verboseWriter,
PrintWriter beagleWriter) {
this.samples = new TreeSet<String>(samples); this.samples = new TreeSet<String>(samples);
this.logger = logger; this.logger = logger;
baseModel = UAC.baseModel; baseModel = UAC.baseModel;
@ -72,9 +75,21 @@ public abstract class GenotypeCalculationModel implements Cloneable {
this.verboseWriter = verboseWriter; this.verboseWriter = verboseWriter;
if ( verboseWriter != null ) if ( verboseWriter != null )
initializeVerboseWriter(verboseWriter); initializeVerboseWriter(verboseWriter);
this.beagleWriter = beagleWriter;
if ( beagleWriter != null )
initializeBeagleWriter(beagleWriter);
} }
protected void initializeVerboseWriter(PrintWriter writer) { }; protected void initializeVerboseWriter(PrintWriter writer) { }
protected void initializeBeagleWriter(PrintWriter writer) {
writer.print("marker alleleA alleleB");
for ( String sample : samples ) {
writer.print(' ');
writer.print(sample);
}
writer.println();
}
/** /**
* Must be overridden by concrete subclasses * Must be overridden by concrete subclasses

View File

@ -51,13 +51,17 @@ public class GenotypeCalculationModelFactory {
* @param logger logger * @param logger logger
* @param UAC the unified argument collection * @param UAC the unified argument collection
* @param outputFormat the output format * @param outputFormat the output format
* @param verboseWriter verbose writer
* @param beagleWriter beagle writer
*
* @return model * @return model
*/ */
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples, public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
Logger logger, Logger logger,
UnifiedArgumentCollection UAC, UnifiedArgumentCollection UAC,
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
PrintWriter verboseWriter) { PrintWriter verboseWriter,
PrintWriter beagleWriter) {
GenotypeCalculationModel gcm; GenotypeCalculationModel gcm;
switch ( UAC.genotypeModel ) { switch ( UAC.genotypeModel ) {
case EM_POINT_ESTIMATE: case EM_POINT_ESTIMATE:
@ -72,7 +76,7 @@ public class GenotypeCalculationModelFactory {
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
} }
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter); gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter, beagleWriter);
return gcm; return gcm;
} }
} }

View File

@ -81,7 +81,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
// calculate the sum of quality scores for each base // calculate the sum of quality scores for each base
ReadBackedPileup pileup = context.getPileup(); ReadBackedPileup pileup = context.getBasePileup();
for ( PileupElement p : pileup ) { for ( PileupElement p : pileup ) {
// ignore deletions // ignore deletions
if ( p.isDeletion() ) if ( p.isDeletion() )
@ -341,9 +341,23 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
return new Pair<VariationCall, List<Genotype>>(null, null); return new Pair<VariationCall, List<Genotype>>(null, null);
// populate the sample-specific data // output to beagle file if requested
if ( beagleWriter != null ) {
beagleWriter.print(loc);
beagleWriter.print(' ');
beagleWriter.print(ref);
beagleWriter.print(' ');
beagleWriter.print(bestAlternateAllele);
}
// populate the sample-specific data (output it to beagle also if requested)
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc); List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
// close beagle record (if requested)
if ( beagleWriter != null ) {
beagleWriter.println();
}
// next, the general locus data // next, the general locus data
// *** note that calculating strand bias involves overwriting data structures, so we do that last // *** note that calculating strand bias involves overwriting data structures, so we do that last
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP); VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);

View File

@ -113,7 +113,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) { private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
// create the pileup // create the pileup
AlignmentContext myContext = sampleContext.getContext(contextType); AlignmentContext myContext = sampleContext.getContext(contextType);
ReadBackedPileup pileup = myContext.getPileup(); ReadBackedPileup pileup = myContext.getBasePileup();
// create the GenotypeLikelihoods object // create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
@ -137,7 +137,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
for ( String sample : contexts.keySet() ) { for ( String sample : contexts.keySet() ) {
StratifiedAlignmentContext context = contexts.get(sample); StratifiedAlignmentContext context = contexts.get(sample);
ReadBackedPileup pileup = context.getContext(contextType).getPileup(); ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
// create the GenotypeLikelihoods object // create the GenotypeLikelihoods object
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform); GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);

View File

@ -59,9 +59,13 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false) @Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
public String VERBOSE = null; public String VERBOSE = null;
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
public String BEAGLE = null;
// the verbose writer
// the verbose and beagle writers
private PrintWriter verboseWriter = null; private PrintWriter verboseWriter = null;
private PrintWriter beagleWriter = null;
// the model used for calculating genotypes // the model used for calculating genotypes
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>(); private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
@ -105,15 +109,20 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff"); sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
throw new IllegalArgumentException(sb.toString()); throw new IllegalArgumentException(sb.toString());
} }
if ( BEAGLE != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
}
// some arguments can't be handled (at least for now) while we are multi-threaded // some arguments can't be handled (at least for now) while we are multi-threaded
if ( getToolkit().getArguments().numberOfThreads > 1 ) { if ( getToolkit().getArguments().numberOfThreads > 1 ) {
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name // no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
if ( UAC.ASSUME_SINGLE_SAMPLE != null ) if ( UAC.ASSUME_SINGLE_SAMPLE != null )
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads"); throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
// TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads
// no VERBOSE because we'd need to deal with parallelizing the writing // no VERBOSE because we'd need to deal with parallelizing the writing
if ( VERBOSE != null ) if ( VERBOSE != null || BEAGLE != null )
throw new IllegalArgumentException("For technical reasons, the VERBOSE argument cannot be used with multiple threads"); throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads");
} }
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names // get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
@ -139,14 +148,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT); ((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
} }
// initialize the verbose writer // initialize the writers
if ( VERBOSE != null ) { try {
try { if ( VERBOSE != null )
verboseWriter = new PrintWriter(VERBOSE); verboseWriter = new PrintWriter(VERBOSE);
} catch (FileNotFoundException e) { if ( BEAGLE != null )
throw new StingException("Could not open file " + VERBOSE + " for writing"); beagleWriter = new PrintWriter(BEAGLE);
} } catch (FileNotFoundException e) {
throw new StingException("UnifiedGenotyper [verbose/beagle]: could not open file for writing");
} }
// *** If we were called by another walker, then we don't *** // *** If we were called by another walker, then we don't ***
// *** want to do any of the other initialization steps. *** // *** want to do any of the other initialization steps. ***
if ( writer == null ) if ( writer == null )
@ -220,7 +231,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
else else
throw new StingException("Unsupported genotype format: " + writer.getClass().getName()); throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
} }
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter)); gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
} }
char ref = Character.toUpperCase(refContext.getBase()); char ref = Character.toUpperCase(refContext.getBase());
@ -319,6 +330,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
public void onTraversalDone(Integer sum) { public void onTraversalDone(Integer sum) {
if ( verboseWriter != null ) if ( verboseWriter != null )
verboseWriter.close(); verboseWriter.close();
if ( beagleWriter != null )
beagleWriter.close();
logger.info("Processed " + sum + " loci that are callable for SNPs"); logger.info("Processed " + sum + " loci that are callable for SNPs");
} }