Added -beagle option to emit likelihoods file for use with the BEAGLE imputation engine; still experimental.
(Also converted getPileup -> getBasePileup) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2549 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
9cbae53ee1
commit
fcce77c245
|
|
@ -33,7 +33,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
|
|
||||||
for ( String sample : contexts.keySet() ) {
|
for ( String sample : contexts.keySet() ) {
|
||||||
StratifiedAlignmentContext context = contexts.get(sample);
|
StratifiedAlignmentContext context = contexts.get(sample);
|
||||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
|
@ -109,7 +109,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
|
|
||||||
|
|
||||||
if ( call instanceof ReadBacked ) {
|
if ( call instanceof ReadBacked ) {
|
||||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
|
||||||
((ReadBacked)call).setPileup(pileup);
|
((ReadBacked)call).setPileup(pileup);
|
||||||
}
|
}
|
||||||
if ( call instanceof SampleBacked ) {
|
if ( call instanceof SampleBacked ) {
|
||||||
|
|
@ -128,6 +128,24 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
|
||||||
calls.add(call);
|
calls.add(call);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// output to beagle file if requested
|
||||||
|
if ( beagleWriter != null ) {
|
||||||
|
for ( String sample : samples ) {
|
||||||
|
GenotypeLikelihoods gl = GLs.get(sample);
|
||||||
|
if ( gl == null ) {
|
||||||
|
beagleWriter.print(" 0.0 0.0 0.0");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
double[] likelihoods = gl.getLikelihoods();
|
||||||
|
beagleWriter.print(' ');
|
||||||
|
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.REF.ordinal()])));
|
||||||
|
beagleWriter.print(' ');
|
||||||
|
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HET.ordinal()])));
|
||||||
|
beagleWriter.print(' ');
|
||||||
|
beagleWriter.print(String.format("%.6f", Math.pow(10, likelihoods[GenotypeType.HOM.ordinal()])));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return calls;
|
return calls;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -105,7 +105,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
||||||
call.setGenotype(bestGenotype);
|
call.setGenotype(bestGenotype);
|
||||||
|
|
||||||
if ( call instanceof ReadBacked ) {
|
if ( call instanceof ReadBacked ) {
|
||||||
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getPileup();
|
ReadBackedPileup pileup = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup();
|
||||||
((ReadBacked)call).setPileup(pileup);
|
((ReadBacked)call).setPileup(pileup);
|
||||||
}
|
}
|
||||||
if ( call instanceof SampleBacked ) {
|
if ( call instanceof SampleBacked ) {
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
protected double MINIMUM_ALLELE_FREQUENCY;
|
protected double MINIMUM_ALLELE_FREQUENCY;
|
||||||
protected boolean REPORT_SLOD;
|
protected boolean REPORT_SLOD;
|
||||||
protected PrintWriter verboseWriter;
|
protected PrintWriter verboseWriter;
|
||||||
|
protected PrintWriter beagleWriter;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new GenotypeCalculationModel object
|
* Create a new GenotypeCalculationModel object
|
||||||
|
|
@ -51,12 +52,14 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
* @param UAC unified arg collection
|
* @param UAC unified arg collection
|
||||||
* @param outputFormat output format
|
* @param outputFormat output format
|
||||||
* @param verboseWriter verbose writer
|
* @param verboseWriter verbose writer
|
||||||
|
* @param beagleWriter beagle writer
|
||||||
*/
|
*/
|
||||||
protected void initialize(Set<String> samples,
|
protected void initialize(Set<String> samples,
|
||||||
Logger logger,
|
Logger logger,
|
||||||
UnifiedArgumentCollection UAC,
|
UnifiedArgumentCollection UAC,
|
||||||
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
||||||
PrintWriter verboseWriter) {
|
PrintWriter verboseWriter,
|
||||||
|
PrintWriter beagleWriter) {
|
||||||
this.samples = new TreeSet<String>(samples);
|
this.samples = new TreeSet<String>(samples);
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
baseModel = UAC.baseModel;
|
baseModel = UAC.baseModel;
|
||||||
|
|
@ -72,9 +75,21 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
||||||
this.verboseWriter = verboseWriter;
|
this.verboseWriter = verboseWriter;
|
||||||
if ( verboseWriter != null )
|
if ( verboseWriter != null )
|
||||||
initializeVerboseWriter(verboseWriter);
|
initializeVerboseWriter(verboseWriter);
|
||||||
|
this.beagleWriter = beagleWriter;
|
||||||
|
if ( beagleWriter != null )
|
||||||
|
initializeBeagleWriter(beagleWriter);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void initializeVerboseWriter(PrintWriter writer) { };
|
protected void initializeVerboseWriter(PrintWriter writer) { }
|
||||||
|
|
||||||
|
protected void initializeBeagleWriter(PrintWriter writer) {
|
||||||
|
writer.print("marker alleleA alleleB");
|
||||||
|
for ( String sample : samples ) {
|
||||||
|
writer.print(' ');
|
||||||
|
writer.print(sample);
|
||||||
|
}
|
||||||
|
writer.println();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Must be overridden by concrete subclasses
|
* Must be overridden by concrete subclasses
|
||||||
|
|
|
||||||
|
|
@ -51,13 +51,17 @@ public class GenotypeCalculationModelFactory {
|
||||||
* @param logger logger
|
* @param logger logger
|
||||||
* @param UAC the unified argument collection
|
* @param UAC the unified argument collection
|
||||||
* @param outputFormat the output format
|
* @param outputFormat the output format
|
||||||
|
* @param verboseWriter verbose writer
|
||||||
|
* @param beagleWriter beagle writer
|
||||||
|
*
|
||||||
* @return model
|
* @return model
|
||||||
*/
|
*/
|
||||||
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
|
public static GenotypeCalculationModel makeGenotypeCalculation(Set<String> samples,
|
||||||
Logger logger,
|
Logger logger,
|
||||||
UnifiedArgumentCollection UAC,
|
UnifiedArgumentCollection UAC,
|
||||||
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
GenotypeWriterFactory.GENOTYPE_FORMAT outputFormat,
|
||||||
PrintWriter verboseWriter) {
|
PrintWriter verboseWriter,
|
||||||
|
PrintWriter beagleWriter) {
|
||||||
GenotypeCalculationModel gcm;
|
GenotypeCalculationModel gcm;
|
||||||
switch ( UAC.genotypeModel ) {
|
switch ( UAC.genotypeModel ) {
|
||||||
case EM_POINT_ESTIMATE:
|
case EM_POINT_ESTIMATE:
|
||||||
|
|
@ -72,7 +76,7 @@ public class GenotypeCalculationModelFactory {
|
||||||
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
|
default: throw new RuntimeException("Unexpected GenotypeCalculationModel " + UAC.genotypeModel);
|
||||||
}
|
}
|
||||||
|
|
||||||
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter);
|
gcm.initialize(samples, logger, UAC, outputFormat, verboseWriter, beagleWriter);
|
||||||
return gcm;
|
return gcm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -81,7 +81,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
||||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
AlignmentContext context = contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE);
|
||||||
|
|
||||||
// calculate the sum of quality scores for each base
|
// calculate the sum of quality scores for each base
|
||||||
ReadBackedPileup pileup = context.getPileup();
|
ReadBackedPileup pileup = context.getBasePileup();
|
||||||
for ( PileupElement p : pileup ) {
|
for ( PileupElement p : pileup ) {
|
||||||
// ignore deletions
|
// ignore deletions
|
||||||
if ( p.isDeletion() )
|
if ( p.isDeletion() )
|
||||||
|
|
@ -341,9 +341,23 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
|
||||||
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
if ( !ALL_BASE_MODE && ((!GENOTYPE_MODE && bestAFguess == 0) || phredScaledConfidence < CONFIDENCE_THRESHOLD) )
|
||||||
return new Pair<VariationCall, List<Genotype>>(null, null);
|
return new Pair<VariationCall, List<Genotype>>(null, null);
|
||||||
|
|
||||||
// populate the sample-specific data
|
// output to beagle file if requested
|
||||||
|
if ( beagleWriter != null ) {
|
||||||
|
beagleWriter.print(loc);
|
||||||
|
beagleWriter.print(' ');
|
||||||
|
beagleWriter.print(ref);
|
||||||
|
beagleWriter.print(' ');
|
||||||
|
beagleWriter.print(bestAlternateAllele);
|
||||||
|
}
|
||||||
|
|
||||||
|
// populate the sample-specific data (output it to beagle also if requested)
|
||||||
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
|
List<Genotype> calls = makeGenotypeCalls(ref, bestAlternateAllele, bestAFguess, contexts, loc);
|
||||||
|
|
||||||
|
// close beagle record (if requested)
|
||||||
|
if ( beagleWriter != null ) {
|
||||||
|
beagleWriter.println();
|
||||||
|
}
|
||||||
|
|
||||||
// next, the general locus data
|
// next, the general locus data
|
||||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||||
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);
|
VariationCall locusdata = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT, ref, loc, bestAFguess == 0 ? VARIANT_TYPE.REFERENCE : VARIANT_TYPE.SNP);
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
|
private Pair<ReadBackedPileup, GenotypeLikelihoods> getSingleSampleLikelihoods(StratifiedAlignmentContext sampleContext, DiploidGenotypePriors priors, StratifiedAlignmentContext.StratifiedContextType contextType) {
|
||||||
// create the pileup
|
// create the pileup
|
||||||
AlignmentContext myContext = sampleContext.getContext(contextType);
|
AlignmentContext myContext = sampleContext.getContext(contextType);
|
||||||
ReadBackedPileup pileup = myContext.getPileup();
|
ReadBackedPileup pileup = myContext.getBasePileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
|
@ -137,7 +137,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
||||||
|
|
||||||
for ( String sample : contexts.keySet() ) {
|
for ( String sample : contexts.keySet() ) {
|
||||||
StratifiedAlignmentContext context = contexts.get(sample);
|
StratifiedAlignmentContext context = contexts.get(sample);
|
||||||
ReadBackedPileup pileup = context.getContext(contextType).getPileup();
|
ReadBackedPileup pileup = context.getContext(contextType).getBasePileup();
|
||||||
|
|
||||||
// create the GenotypeLikelihoods object
|
// create the GenotypeLikelihoods object
|
||||||
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
GenotypeLikelihoods GL = new GenotypeLikelihoods(baseModel, AFPriors, defaultPlatform);
|
||||||
|
|
|
||||||
|
|
@ -59,9 +59,13 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
|
||||||
public String VERBOSE = null;
|
public String VERBOSE = null;
|
||||||
|
|
||||||
|
@Argument(fullName = "beagle_file", shortName = "beagle", doc = "File to print BEAGLE-specific data for use with imputation", required = false)
|
||||||
|
public String BEAGLE = null;
|
||||||
|
|
||||||
// the verbose writer
|
|
||||||
|
// the verbose and beagle writers
|
||||||
private PrintWriter verboseWriter = null;
|
private PrintWriter verboseWriter = null;
|
||||||
|
private PrintWriter beagleWriter = null;
|
||||||
|
|
||||||
// the model used for calculating genotypes
|
// the model used for calculating genotypes
|
||||||
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
private ThreadLocal<GenotypeCalculationModel> gcm = new ThreadLocal<GenotypeCalculationModel>();
|
||||||
|
|
@ -105,15 +109,20 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
|
sb.append("\n***\tUse Q" + (10.0 * UAC.LOD_THRESHOLD) + " as an approximate equivalent to your LOD " + UAC.LOD_THRESHOLD + " cutoff");
|
||||||
throw new IllegalArgumentException(sb.toString());
|
throw new IllegalArgumentException(sb.toString());
|
||||||
}
|
}
|
||||||
|
if ( BEAGLE != null && UAC.genotypeModel == GenotypeCalculationModel.Model.EM_POINT_ESTIMATE ) {
|
||||||
|
throw new IllegalArgumentException("BEAGLE output is not currently supported in the EM_POINT_ESTIMATE calculation model.");
|
||||||
|
}
|
||||||
|
|
||||||
// some arguments can't be handled (at least for now) while we are multi-threaded
|
// some arguments can't be handled (at least for now) while we are multi-threaded
|
||||||
if ( getToolkit().getArguments().numberOfThreads > 1 ) {
|
if ( getToolkit().getArguments().numberOfThreads > 1 ) {
|
||||||
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
|
// no ASSUME_SINGLE_SAMPLE because the IO system doesn't know how to get the sample name
|
||||||
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
if ( UAC.ASSUME_SINGLE_SAMPLE != null )
|
||||||
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
throw new IllegalArgumentException("For technical reasons, the ASSUME_SINGLE_SAMPLE argument cannot be used with multiple threads");
|
||||||
|
|
||||||
|
// TODO -- it would be nice to be able to handle verbose and beagle even with multiple threads
|
||||||
// no VERBOSE because we'd need to deal with parallelizing the writing
|
// no VERBOSE because we'd need to deal with parallelizing the writing
|
||||||
if ( VERBOSE != null )
|
if ( VERBOSE != null || BEAGLE != null )
|
||||||
throw new IllegalArgumentException("For technical reasons, the VERBOSE argument cannot be used with multiple threads");
|
throw new IllegalArgumentException("For technical reasons, the VERBOSE and BEAGLE arguments cannot be used with multiple threads");
|
||||||
}
|
}
|
||||||
|
|
||||||
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
// get all of the unique sample names - unless we're in POOLED mode, in which case we ignore the sample names
|
||||||
|
|
@ -139,14 +148,16 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
|
((VCFGenotypeWriter)writer).setValidationStringency(VCFGenotypeWriterAdapter.VALIDATION_STRINGENCY.SILENT);
|
||||||
}
|
}
|
||||||
|
|
||||||
// initialize the verbose writer
|
// initialize the writers
|
||||||
if ( VERBOSE != null ) {
|
try {
|
||||||
try {
|
if ( VERBOSE != null )
|
||||||
verboseWriter = new PrintWriter(VERBOSE);
|
verboseWriter = new PrintWriter(VERBOSE);
|
||||||
} catch (FileNotFoundException e) {
|
if ( BEAGLE != null )
|
||||||
throw new StingException("Could not open file " + VERBOSE + " for writing");
|
beagleWriter = new PrintWriter(BEAGLE);
|
||||||
}
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new StingException("UnifiedGenotyper [verbose/beagle]: could not open file for writing");
|
||||||
}
|
}
|
||||||
|
|
||||||
// *** If we were called by another walker, then we don't ***
|
// *** If we were called by another walker, then we don't ***
|
||||||
// *** want to do any of the other initialization steps. ***
|
// *** want to do any of the other initialization steps. ***
|
||||||
if ( writer == null )
|
if ( writer == null )
|
||||||
|
|
@ -220,7 +231,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
else
|
else
|
||||||
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
|
throw new StingException("Unsupported genotype format: " + writer.getClass().getName());
|
||||||
}
|
}
|
||||||
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter));
|
gcm.set(GenotypeCalculationModelFactory.makeGenotypeCalculation(samples, logger, UAC, format, verboseWriter, beagleWriter));
|
||||||
}
|
}
|
||||||
|
|
||||||
char ref = Character.toUpperCase(refContext.getBase());
|
char ref = Character.toUpperCase(refContext.getBase());
|
||||||
|
|
@ -319,6 +330,8 @@ public class UnifiedGenotyper extends LocusWalker<Pair<VariationCall, List<Genot
|
||||||
public void onTraversalDone(Integer sum) {
|
public void onTraversalDone(Integer sum) {
|
||||||
if ( verboseWriter != null )
|
if ( verboseWriter != null )
|
||||||
verboseWriter.close();
|
verboseWriter.close();
|
||||||
|
if ( beagleWriter != null )
|
||||||
|
beagleWriter.close();
|
||||||
|
|
||||||
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
logger.info("Processed " + sum + " loci that are callable for SNPs");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue