Improvements to support GLF generation -- now correctly handles GLF

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1269 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2009-07-16 21:10:39 +00:00
parent 107f42a01e
commit 31f3f466ca
1 changed files with 19 additions and 12 deletions

View File

@ -18,6 +18,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import java.io.File;
import java.io.FileNotFoundException;
@ -29,10 +30,12 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GELI;
// Control what goes into the variants file and what format that file should have
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)public Double LOD_THRESHOLD = Double.MIN_VALUE;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false)
public boolean GENOTYPE = false;
// Control periodic reporting features
@Argument(fullName = "metrics_interval", shortName = "metint", doc = "Number of loci to process between metrics reports", required = false) public Integer METRICS_INTERVAL = 50000;
@ -105,8 +108,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
/** Initialize the walker with some sensible defaults */
public void initialize() {
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
mGenotypeWriter = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GLF, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
} else {
try {
@ -235,12 +238,12 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
}
if (this.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) {
G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
}
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) {
G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
}
return G;
}
@ -318,17 +321,21 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
* @return an empty string
*/
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) {
if (alleleFreq != null && alleleFreq.lodVsRef >= LOD_THRESHOLD) {
//System.out.printf("AlleleFreqEstimate %s %f %f %f%n", alleleFreq,alleleFreq.lodVsNextBest, alleleFreq.lodVsRef, LOD_THRESHOLD );
if (alleleFreq != null && (GENOTYPE ? alleleFreq.lodVsNextBest : alleleFreq.lodVsRef) >= LOD_THRESHOLD) {
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
variantsOut.println(alleleFreq.asGeliString());
variantsOut.println(alleleFreq.asGeliString());
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
variantsOut.println(alleleFreq.asTabularString());
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
SAMSequenceRecord rec = GenomeLocParser.getContigInfo(alleleFreq.location.getContig());
double[] likelihoods = new double[10];
for (int x = 0; x < alleleFreq.posteriors.length; x++) {
alleleFreq.posteriors[x] *= 10;
likelihoods[x] = GLFRecord.LIKELIHOOD_SCALE_FACTOR * alleleFreq.genotypeLikelihoods.likelihoods[x];
}
LikelihoodObject obj = new LikelihoodObject(alleleFreq.posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
LikelihoodObject obj = new LikelihoodObject(likelihoods, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
this.mGenotypeWriter.addGenotypeCall(rec,(int)alleleFreq.location.getStart(),0.0f,alleleFreq.ref,alleleFreq.depth,obj);
@ -339,7 +346,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
}
/** Close the variant file. */
public void onTraversalDone() {
public void onTraversalDone(String sum) {
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
mGenotypeWriter.close();
} else {