Added -beagle option to emit likelihoods file for use with the BEAGLE imputation engine; still experimental.
(Also converted getPileup -> getBasePileup) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2549 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9cbae53ee1
commit
fcce77c245
|
|
@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
StratifiedAlignmentContext context = contexts.get(sample);
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
|
|
@ -109,7 +109,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
@ -128,6 +128,24 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
|||
calls.add(call);
|
||||
}
|
||||
|
||||
// output to beagle file if requested
|
||||
if ( beagleWriter != null ) {
|
||||
for ( String sample : samples ) {
|
||||
GenotypeLikelihoods gl = GLs.get(sample);
|
||||
if ( gl == null ) {
|
||||
beagleWriter.print(" 0.0 0.0 0.0");
|
||||
continue;
|
||||
}
|
||||
double[] likelihoods = gl.getLikelihoods();
|
||||
beagleWriter.print(' ');
|
||||
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.REF.ordinal()])));
|
||||
beagleWriter.print(' ');
|
||||
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HET.ordinal()])));
|
||||
beagleWriter.print(' ');
|
||||
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HOM.ordinal()])));
|
||||
}
|
||||
}
|
||||
|
||||
return calls;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
call.setGenotype(bestGenotype);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
|
||||
((ReadBacked)call).setPileup(pileup);
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
protected double MINIMUM_ALLELE_FREQUENCY;
|
||||
protected boolean REPORT_SLOD;
|
||||
protected PrintWriter verboseWriter;
|
||||
protected PrintWriter beagleWriter;
|
||||
|
||||
/**
|
||||
* Create a new GenotypeCalculationModel object
|
||||
|
|
@ -51,12 +52,14 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
* @param UAC unified arg collection
|
||||
* @param outputFormat output format
|
||||
* @param verboseWriter verbose writer
|
||||
* @param beagleWriter beagle writer
|
||||
*/
|
||||
protected void initialize(Set<String> samples,
|
||||
Logger logger,
|
||||
UnifiedArgumentCollection UAC,
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
||||
PrintWriter verboseWriter) {
|
||||
PrintWriter verboseWriter,
|
||||
PrintWriter beagleWriter) {
|
||||
this.samples = new TreeSet<String>(samples);
|
||||
this.logger = logger;
|
||||
baseModel = UAC.baseModel;
|
||||
|
|
@ -72,9 +75,21 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
this.verboseWriter = verboseWriter;
|
||||
if ( verboseWriter != null )
|
||||
initializeVerboseWriter(verboseWriter);
|
||||
this.beagleWriter = beagleWriter;
|
||||
if ( beagleWriter != null )
|
||||
initializeBeagleWriter(beagleWriter);
|
||||
}
|
||||
|
||||
protected void initializeVerboseWriter(PrintWriter writer) { };
|
||||
protected void initializeVerboseWriter(PrintWriter writer) { }
|
||||
|
||||
protected void initializeBeagleWriter(PrintWriter writer) {
|
||||
writer.print("marker alleleA alleleB");
|
||||
for ( String sample : samples ) {
|
||||
writer.print(' ');
|
||||
writer.print(sample);
|
||||
}
|
||||
writer.println();
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
|
|
|
|||
|
|
@ -51,13 +51,17 @@ public class GenotypeCalculationModelFactory {
|
|||
* @param logger logger
|
||||
* @param UAC the unified argument collection
|
||||
* @param outputFormat the output format
|
||||
* @param verboseWriter verbose writer
|
||||
* @param beagleWriter beagle writer
|
||||
*
|
||||
* @return model
|
||||
*/
|
||||
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
|
||||
Logger logger,
|
||||
UnifiedArgumentCollection UAC,
|
||||
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
||||
PrintWriter verboseWriter) {
|
||||
PrintWriter verboseWriter,
|
||||
PrintWriter beagleWriter) {
|
||||
GenotypeCalculationModel gcm;
|
||||
switch ( UAC.genotypeModel ) {
|
||||
case EM_POINT_ESTIMATE:
|
||||
|
|
@ -72,7 +76,7 @@ public class GenotypeCalculationModelFactory {
|
|||
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
|
||||
}
|
||||
|
||||
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter);
|
||||
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter, beagleWriter);
|
||||
return gcm;
|
||||
}
|
||||
}
|
||||
|
|
@ -81,7 +81,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||
|
||||
// calculate the sum of quality scores for each base
|
||||
ReadBackedPileup pileup = context.getPileup();
|
||||
ReadBackedPileup pileup = context.getBasePileup();
|
||||
for ( PileupElement p : pileup ) {
|
||||
// ignore deletions
|
||||
if ( p.isDeletion() )
|
||||
|
|
@ -341,9 +341,23 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
|||
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||
|
||||
// populate the sample-specific data
|
||||
// output to beagle file if requested
|
||||
if ( beagleWriter != null ) {
|
||||
beagleWriter.print(loc);
|
||||
beagleWriter.print(' ');
|
||||
beagleWriter.print(ref);
|
||||
beagleWriter.print(' ');
|
||||
beagleWriter.print(bestAlternateAllele);
|
||||
}
|
||||
|
||||
// populate the sample-specific data (output it to beagle also if requested)
|
||||
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
|
||||
|
||||
// close beagle record (if requested)
|
||||
if ( beagleWriter != null ) {
|
||||
beagleWriter.println();
|
||||
}
|
||||
|
||||
// next, the general locus data
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
|
||||
// create the pileup
|
||||
AlignmentContext myContext = sampleContext.getContext(contextType);
|
||||
ReadBackedPileup pileup = myContext.getPileup();
|
||||
ReadBackedPileup pileup = myContext.getBasePileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||
|
|
@ -137,7 +137,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
|
||||
for ( String sample : contexts.keySet() ) {
|
||||
StratifiedAlignmentContext context = contexts.get(sample);
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
||||
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
|
||||
|
||||
// create the GenotypeLikelihoods object
|
||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
||||
|
|
|
|||
|
|
@ -59,9 +59,13 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||
public String VERBOSE = null;
|
||||
|
||||
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
|
||||
public String BEAGLE = null;
|
||||
|
||||
// the verbose writer
|
||||
|
||||
// the verbose and beagle writers
|
||||
private PrintWriter verboseWriter = null;
|
||||
private PrintWriter beagleWriter = null;
|
||||
|
||||
// the model used for calculating genotypes
|
||||
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
||||
|
|
@ -105,15 +109,20 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
|
||||
throw new IllegalArgumentException(sb.toString());
|
||||
}
|
||||
if ( BEAGLE != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
||||
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
|
||||
}
|
||||
|
||||
// some arguments can't be handled (at least for now) while we are multi-threaded
|
||||
if ( getToolkit().getArguments().numberOfThreads > 1 ) {
|
||||
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
|
||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
||||
|
||||
// TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads
|
||||
// no VERBOSE because we'd need to deal with parallelizing the writing
|
||||
if ( VERBOSE != null )
|
||||
throw new IllegalArgumentException("For technical reasons, the VERBOSE argument cannot be used with multiple threads");
|
||||
if ( VERBOSE != null || BEAGLE != null )
|
||||
throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads");
|
||||
}
|
||||
|
||||
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
||||
|
|
@ -139,14 +148,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
|
||||
}
|
||||
|
||||
// initialize the verbose writer
|
||||
if ( VERBOSE != null ) {
|
||||
try {
|
||||
// initialize the writers
|
||||
try {
|
||||
if ( VERBOSE != null )
|
||||
verboseWriter = new PrintWriter(VERBOSE);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Could not open file " + VERBOSE + " for writing");
|
||||
}
|
||||
if ( BEAGLE != null )
|
||||
beagleWriter = new PrintWriter(BEAGLE);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("UnifiedGenotyper [verbose/beagle]: could not open file for writing");
|
||||
}
|
||||
|
||||
// *** If we were called by another walker, then we don't ***
|
||||
// *** want to do any of the other initialization steps. ***
|
||||
if ( writer == null )
|
||||
|
|
@ -220,7 +231,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
else
|
||||
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
|
||||
}
|
||||
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter));
|
||||
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
|
||||
}
|
||||
|
||||
char ref = Character.toUpperCase(refContext.getBase());
|
||||
|
|
@ -319,6 +330,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
|||
public void onTraversalDone(Integer sum) {
|
||||
if ( verboseWriter != null )
|
||||
verboseWriter.close();
|
||||
if ( beagleWriter != null )
|
||||
beagleWriter.close();
|
||||
|
||||
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue