From fcce77c24542cecdce3469628f050683673d8cc9 Mon Sep 17 00:00:00 2001 From: ebanks Date: Fri, 8 Jan 2010 18:41:04 +0000 Subject: [PATCH] Added -beagle option to emit likelihoods file for use with the BEAGLE imputation engine; still experimental. (Also converted getPileup -> getBasePileup) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2549 348d0f76-0448-11de-a6fe-93d51630548a --- .../DiploidGenotypeCalculationModel.java | 22 +++++++++++-- .../genotyper/EMGenotypeCalculationModel.java | 2 +- .../genotyper/GenotypeCalculationModel.java | 19 +++++++++-- .../GenotypeCalculationModelFactory.java | 8 +++-- ...JointEstimateGenotypeCalculationModel.java | 18 ++++++++-- ...PointEstimateGenotypeCalculationModel.java | 4 +-- .../walkers/genotyper/UnifiedGenotyper.java | 33 +++++++++++++------ 7 files changed, 85 insertions(+), 21 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 6cce8c4d0..e344489dd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul for ( String sample : contexts.keySet() ) { StratifiedAlignmentContext context = contexts.get(sample); - ReadBackedPileup pileup = context.getContext(contextType).getPileup(); + ReadBackedPileup pileup = context.getContext(contextType).getBasePileup(); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); @@ -109,7 +109,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { @@ -128,6 +128,24 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul calls.add(call); } + // output to beagle file if requested + if ( beagleWriter != null ) { + for ( String sample : samples ) { + GenotypeLikelihoods gl = GLs.get(sample); + if ( gl == null ) { + beagleWriter.print(" 0.0 0.0 0.0"); + continue; + } + double[] likelihoods = gl.getLikelihoods(); + beagleWriter.print(' '); + beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.REF.ordinal()]))); + beagleWriter.print(' '); + beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HET.ordinal()]))); + beagleWriter.print(' '); + beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HOM.ordinal()]))); + } + } + return calls; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 15f6f640d..d2ad127b2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode call.setGenotype(bestGenotype); if ( call instanceof ReadBacked ) { - ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup(); + ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup(); ((ReadBacked)call).setPileup(pileup); } if ( call instanceof SampleBacked ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index 77944decb..c8173b03b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -36,6 +36,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected double MINIMUM_ALLELE_FREQUENCY; protected boolean REPORT_SLOD; protected PrintWriter verboseWriter; + protected PrintWriter beagleWriter; /** * Create a new GenotypeCalculationModel object @@ -51,12 +52,14 @@ public abstract class GenotypeCalculationModel implements Cloneable { * @param UAC unified arg collection * @param outputFormat output format * @param verboseWriter verbose writer + * @param beagleWriter beagle writer */ protected void initialize(Set samples, Logger logger, UnifiedArgumentCollection UAC, GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, - PrintWriter verboseWriter) { + PrintWriter verboseWriter, + PrintWriter beagleWriter) { this.samples = new TreeSet(samples); this.logger = logger; baseModel = UAC.baseModel; @@ -72,9 +75,21 @@ public abstract class GenotypeCalculationModel implements Cloneable { this.verboseWriter = verboseWriter; if ( verboseWriter != null ) initializeVerboseWriter(verboseWriter); + this.beagleWriter = beagleWriter; + if ( beagleWriter != null ) + initializeBeagleWriter(beagleWriter); } - protected void initializeVerboseWriter(PrintWriter writer) { }; + protected void initializeVerboseWriter(PrintWriter writer) { } + + protected void initializeBeagleWriter(PrintWriter writer) { + writer.print("marker alleleA alleleB"); + for ( String sample : samples ) { + writer.print(' '); + writer.print(sample); + } + writer.println(); + } /** * Must be overridden by concrete subclasses diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java index ca934ea0c..33e6c21e3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModelFactory.java @@ -51,13 +51,17 @@ public class GenotypeCalculationModelFactory { * @param logger logger * @param UAC the unified argument collection * @param outputFormat the output format + * @param verboseWriter verbose writer + * @param beagleWriter beagle writer + * * @return model */ public static GenotypeCalculationModel makeGenotypeCalculation(Set samples, Logger logger, UnifiedArgumentCollection UAC, GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat, - PrintWriter verboseWriter) { + PrintWriter verboseWriter, + PrintWriter beagleWriter) { GenotypeCalculationModel gcm; switch ( UAC.genotypeModel ) { case EM_POINT_ESTIMATE: @@ -72,7 +76,7 @@ public class GenotypeCalculationModelFactory { default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel); } - gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter); + gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter, beagleWriter); return gcm; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index c7818c1d5..a7a5297d5 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -81,7 +81,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE); // calculate the sum of quality scores for each base - ReadBackedPileup pileup = context.getPileup(); + ReadBackedPileup pileup = context.getBasePileup(); for ( PileupElement p : pileup ) { // ignore deletions if ( p.isDeletion() ) @@ -341,9 +341,23 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) ) return new Pair>(null, null); - // populate the sample-specific data + // output to beagle file if requested + if ( beagleWriter != null ) { + beagleWriter.print(loc); + beagleWriter.print(' '); + beagleWriter.print(ref); + beagleWriter.print(' '); + beagleWriter.print(bestAlternateAllele); + } + + // populate the sample-specific data (output it to beagle also if requested) List calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc); + // close beagle record (if requested) + if ( beagleWriter != null ) { + beagleWriter.println(); + } + // next, the general locus data // *** note that calculating strand bias involves overwriting data structures, so we do that last VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index fc19e16a5..b403ce733 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -113,7 +113,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation private Pair getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) { // create the pileup AlignmentContext myContext = sampleContext.getContext(contextType); - ReadBackedPileup pileup = myContext.getPileup(); + ReadBackedPileup pileup = myContext.getBasePileup(); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform); @@ -137,7 +137,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation for ( String sample : contexts.keySet() ) { StratifiedAlignmentContext context = contexts.get(sample); - ReadBackedPileup pileup = context.getContext(contextType).getPileup(); + ReadBackedPileup pileup = context.getContext(contextType).getBasePileup(); // create the GenotypeLikelihoods object GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index b50751504..8d2da6302 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -59,9 +59,13 @@ public class UnifiedGenotyper extends LocusWalker gcm = new ThreadLocal(); @@ -105,15 +109,20 @@ public class UnifiedGenotyper extends LocusWalker 1 ) { // no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name if ( UAC.ASSUME_SINGLE_SAMPLE != null ) throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads"); + + // TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads // no VERBOSE because we'd need to deal with parallelizing the writing - if ( VERBOSE != null ) - throw new IllegalArgumentException("For technical reasons, the VERBOSE argument cannot be used with multiple threads"); + if ( VERBOSE != null || BEAGLE != null ) + throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads"); } // get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names @@ -139,14 +148,16 @@ public class UnifiedGenotyper extends LocusWalker