From 3a33401822be9503dfed8304cbfc04a78d5d2f55 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 2 Nov 2009 22:43:08 +0000 Subject: [PATCH] 2nd stage of the genotyper output refactoring is complete. Now, all output is generalized and all of the intelligence lies where it is supposed to. Next stage is syncing up old and new models and making sure we're outputting exactly what we should. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1960 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/EMGenotypeCalculationModel.java | 39 ++++--- .../genotyper/GenotypeCalculationModel.java | 2 +- ...JointEstimateGenotypeCalculationModel.java | 54 +++++---- ...PointEstimateGenotypeCalculationModel.java | 32 +++--- .../walkers/genotyper/UnifiedGenotyper.java | 8 +- ...seTransitionTableCalculatorJavaWalker.java | 4 +- .../gatk/walkers/CoverageEvalWalker.java | 2 +- .../gatk/walkers/DeNovoSNPWalker.java | 4 +- .../utils/ArtificialPoolContext.java | 2 +- .../utils/genotype/AlleleFrequencyBacked.java | 24 ++++ .../utils/genotype/ConfidenceBacked.java | 24 ++++ .../utils/genotype/GenotypeLocusData.java | 28 +++++ .../utils/genotype/GenotypeMetaData.java | 61 ---------- .../sting/utils/genotype/GenotypeWriter.java | 2 +- .../utils/genotype/GenotypeWriterFactory.java | 26 +++++ .../sting/utils/genotype/SLODBacked.java | 24 ++++ .../sting/utils/genotype/SampleBacked.java | 2 +- .../utils/genotype/geli/GeliAdapter.java | 2 +- .../utils/genotype/geli/GeliTextWriter.java | 2 +- .../sting/utils/genotype/glf/GLFWriter.java | 2 +- .../genotype/tabular/TabularLFWriter.java | 2 +- .../genotype/vcf/VCFGenotypeLocusData.java | 108 ++++++++++++++++++ .../vcf/VCFGenotypeWriterAdapter.java | 20 ++-- 23 files changed, 334 insertions(+), 140 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java delete mode 100755 java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index f7851db6b..3086665cb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -17,7 +17,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode protected EMGenotypeCalculationModel() {} - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -27,24 +27,35 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode // run the EM calculation EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL); double lod = overall.getPofD() - overall.getPofNull(); + logger.debug(String.format("LOD=%f", lod)); // return a null call if we don't pass the lod cutoff if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD ) - return new Pair, GenotypeMetaData>(null, null); - - // calculate strand score - EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); - EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); - double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); - double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); - logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); - double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - - logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore)); + return new Pair, GenotypeLocusData>(null, null); // generate the calls - GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF()); - return new Pair, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation()); + if ( locusdata != null ) { + if ( locusdata instanceof ConfidenceBacked ) { + ((ConfidenceBacked)locusdata).setConfidence(lod); + } + if ( locusdata instanceof SLODBacked ) { + // calculate strand score + EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); + EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); + double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); + double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); + logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + logger.debug(String.format("SLOD=%f", lod, strandScore)); + + ((SLODBacked)locusdata).setSLOD(strandScore); + } + if ( locusdata instanceof AlleleFrequencyBacked ) { + ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(overall.getMAF()); + } + } + return new Pair, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata); } protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index b4ceff900..ca9452383 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -96,7 +96,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { * * @return calls */ - public abstract Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, + public abstract Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index e8b7556ef..c3ad1e3f7 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -37,7 +37,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo private enum GenotypeType { REF, HET, HOM } - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -49,24 +49,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // run joint estimation for the full GL contexts initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL); calculateAlleleFrequencyPosteriors(ref, context.getLocation()); - return createCalls(ref, contexts); - - //double lod = overall.getPofD() - overall.getPofNull(); - //logger.debug("lod=" + lod); - - // calculate strand score - //EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD); - //EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE); - //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); - //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); - //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); - //double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - - //logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore)); - - // generate the calls - //GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF()); - //return new Pair, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata); + return createCalls(ref, contexts, context.getLocation()); } private void initializeAlleleFrequencies(int numSamples) { @@ -292,7 +275,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo verboseWriter.println(); } - private Pair, GenotypeMetaData> createCalls(char ref, HashMap contexts) { + private Pair, GenotypeLocusData> createCalls(char ref, HashMap contexts, GenomeLoc loc) { // first, find the alt allele with maximum confidence int indexOfMax = 0; char baseOfMax = ref; @@ -312,14 +295,35 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // return a null call if we don't pass the confidence cutoff if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD ) - return new Pair, GenotypeMetaData>(null, null); + return new Pair, GenotypeLocusData>(null, null); double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1); ArrayList calls = new ArrayList(); - // TODO -- generate strand score - double strandScore = 0.0; - GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc); + if ( locusdata != null ) { + if ( locusdata instanceof ConfidenceBacked ) { + ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); + } + if ( locusdata instanceof SLODBacked ) { + // TODO -- generate strand score + double strandScore = 0.0; + + // calculate strand score + //EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); + //EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); + //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); + //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); + //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + //double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + //logger.debug(String.format("SLOD=%f", lod, strandScore)); + + ((SLODBacked)locusdata).setSLOD(strandScore); + } + if ( locusdata instanceof AlleleFrequencyBacked ) { + ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess); + } + } for ( String sample : GLs.keySet() ) { @@ -343,6 +347,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo calls.add(call); } - return new Pair, GenotypeMetaData>(calls, metadata); + return new Pair, GenotypeLocusData>(calls, locusdata); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 53dce6322..b8f90aa8b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -24,7 +24,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // overload this method so we can special-case the single sample - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // we don't actually want to run EM for single samples if ( samples.size() == 1 ) { @@ -45,18 +45,17 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // get the genotype likelihoods Pair discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL); - // are we above the lod threshold for emitting calls (and not in all-bases-mode)? - if ( !ALL_BASE_MODE ) { - double[] posteriors = discoveryGL.second.getPosteriors(); - Integer sortedPosteriors[] = Utils.SortPermutation(posteriors); - double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]]; - double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]]; - double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()]; - double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype)); + // calculate the lod threshold + double[] posteriors = discoveryGL.second.getPosteriors(); + Integer sortedPosteriors[] = Utils.SortPermutation(posteriors); + double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]]; + double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]]; + double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()]; + double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype)); - // return a null call - if ( lodConfidence < LOD_THRESHOLD ) - return new Pair, GenotypeMetaData>(null, null); + // are we above the lod threshold for emitting calls (and not in all-bases mode)? + if ( !ALL_BASE_MODE && lodConfidence < LOD_THRESHOLD ) { + return new Pair, GenotypeLocusData>(null, null); } // we can now create the genotype call object @@ -75,7 +74,14 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); } - return new Pair, GenotypeMetaData>(Arrays.asList(call), null); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation()); + if ( locusdata != null ) { + if ( locusdata instanceof ConfidenceBacked ) { + ((ConfidenceBacked)locusdata).setConfidence(lodConfidence); + } + } + + return new Pair, GenotypeLocusData>(Arrays.asList(call), locusdata); } return super.calculateGenotype(tracker, ref, context, priors); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 4e4e5e85b..47003fc12 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -43,7 +43,7 @@ import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData; +import org.broadinstitute.sting.utils.genotype.GenotypeLocusData; import java.io.File; import java.util.HashSet; @@ -52,7 +52,7 @@ import java.util.ArrayList; @ReadFilters({ZeroMappingQualityReadFilter.class}) -public class UnifiedGenotyper extends LocusWalker, GenotypeMetaData>, Integer> { +public class UnifiedGenotyper extends LocusWalker, GenotypeLocusData>, Integer> { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -149,7 +149,7 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeM * @param refContext the reference base * @param context contextual information around the locus */ - public Pair, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) { + public Pair, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) { char ref = Character.toUpperCase(refContext.getBase()); if ( !BaseUtils.isRegularBase(ref) ) return null; @@ -211,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeM public Integer reduceInit() { return 0; } - public Integer reduce(Pair, GenotypeMetaData> value, Integer sum) { + public Integer reduce(Pair, GenotypeLocusData> value, Integer sum) { // can't call the locus because of no coverage if ( value == null ) return sum; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index f3fd3e456..cd57b34eb 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -11,8 +11,6 @@ import org.broadinstitute.sting.utils.genotype.*; import java.util.*; import java.io.PrintStream; -import java.io.PrintWriter; -import java.io.OutputStream; import java.io.FileNotFoundException; import net.sf.samtools.SAMRecord; @@ -374,7 +372,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker, GenotypeMetaData> calls = ug.map(tracker,ref,context); + Pair, GenotypeLocusData> calls = ug.map(tracker,ref,context); if (calls == null || calls.first == null) return false; return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 007ca16cf..f9166b9f8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - Pair, GenotypeMetaData> calls = UG.map(tracker, ref, subContext); + Pair, GenotypeLocusData> calls = UG.map(tracker, ref, subContext); if (calls != null && calls.first != null) { Genotype call = calls.first.get(0); String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index d5933be62..ff97a1c2b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -70,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker{ } AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets); - Pair, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext); + Pair, GenotypeLocusData> parent1 = UG.map(tracker, ref, parent1_subContext); AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets); - Pair, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext); + Pair, GenotypeLocusData> parent2 = UG.map(tracker, ref, parent2_subContext); if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) { Genotype parent1call = parent1.first.get(0); diff --git a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java index 2c5ce7733..62993bbe9 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java +++ b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java @@ -257,7 +257,7 @@ public class ArtificialPoolContext { public Genotype getGenotype(int group) { AlignmentContext alicon = this.getAlignmentContext(); Pair[],List[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets()); - Pair, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(), + Pair, GenotypeLocusData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(), new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group])); return (result.first == null ? null : result.first.get(0)); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java new file mode 100755 index 000000000..03bd54eba --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface AlleleFrequencyBacked + * + * this interface indicates that the genotype is + * backed up by allele frequency information. + */ +public interface AlleleFrequencyBacked { + + /** + * + * @return returns the allele frequency for this genotype + */ + public double getAlleleFrequency(); + + /** + * + * @param frequency the allele frequency for this genotype + */ + public void setAlleleFrequency(double frequency); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java new file mode 100755 index 000000000..2ccca2fb8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface ConfidenceBacked + * + * this interface indicates that the genotype is + * backed up by sample information. + */ +public interface ConfidenceBacked { + + /** + * + * @return returns the confidence for this genotype + */ + public double getConfidence(); + + /** + * + * @param confidence the confidence for this genotype + */ + public void setConfidence(double confidence); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java new file mode 100755 index 000000000..45c68dbd6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; + + +/** + * @author ebanks + *

+ * Class GenotypeLocusData + *

+ * represents the locus specific data associated with a genotype object. + */ +public interface GenotypeLocusData { + + /** + * get the reference base. + * @return a character, representing the reference base + */ + public char getReference(); + + /** + * get the genotype's location + * + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation(); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java deleted file mode 100755 index a61c0c2da..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - - -/** - * @author ebanks - *

- * Class GenotypeMetaData - *

- * represents the meta data for a genotype object. - */ -public class GenotypeMetaData { - - // the discovery lod score - private double mLOD; - - // the strand score lod - private double mSLOD; - - // the allele frequency - private double mAlleleFrequency; - - /** - * create a basic genotype meta data pbject, given the following fields - * - * @param discoveryLOD the discovery lod - * @param strandLOD the strand score lod - * @param alleleFrequency the allele frequency - */ - public GenotypeMetaData(double discoveryLOD, double strandLOD, double alleleFrequency) { - mLOD = discoveryLOD; - mSLOD = strandLOD; - mAlleleFrequency = alleleFrequency; - } - - /** - * get the discovery lod - * - * @return the discovery lod - */ - public double getLOD() { - return mLOD; - } - - /** - * get the strand lod - * - * @return the strand lod - */ - public double getSLOD() { - return mSLOD; - } - - /** - * get the allele frequency - * - * @return the allele frequency - */ - public double getAlleleFrequency() { - return mAlleleFrequency; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index b8b380b34..796654d93 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -56,7 +56,7 @@ public interface GenotypeWriter { * add a multi-sample call if we support it * @param genotypes the list of genotypes, that are backed by sample information */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata); + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata); /** * @return true if we support multisample, false otherwise diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index c46df69cc..b1ad6e400 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -30,6 +30,9 @@ public class GenotypeWriterFactory { * @param format the format * @param header the sam file header * @param destination the destination file + * @param source the source + * @param referenceName the ref name + * @param sampleNames the sample names * @return the genotype writer object */ public static GenotypeWriter create(GENOTYPE_FORMAT format, @@ -73,6 +76,8 @@ public class GenotypeWriterFactory { /** * create a genotype call * @param format the format + * @param ref the reference base + * @param loc the location * @return an unpopulated genotype call object */ public static Genotype createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { @@ -88,4 +93,25 @@ public class GenotypeWriterFactory { throw new StingException("Genotype format " + format.toString() + " is not implemented"); } } + + /** + * create a genotype locus data object + * @param format the format + * @param ref the reference base + * @param loc the location + * @return an unpopulated genotype locus data object + */ + public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { + switch (format) { + case VCF: + return new VCFGenotypeLocusData(ref, loc); + case GELI: + case GELI_BINARY: + return null; + case GLF: + return null; + default: + throw new StingException("Genotype format " + format.toString() + " is not implemented"); + } + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java new file mode 100755 index 000000000..c0ca132e9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface SLODBacked + * + * this interface indicates that the genotype is + * backed up by strand lod information. + */ +public interface SLODBacked { + + /** + * + * @return returns the strand lod for this genotype + */ + public double getSLOD(); + + /** + * + * @param slod the strand lod for this genotype + */ + public void setSLOD(double slod); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java index f7ab1bba5..086f9b2e7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.genotype; /** * @author aaron - * Interface PosteriorsBacked + * Interface SampleBacked * * this interface indicates that the genotype is * backed up by sample information. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 9f44ee1da..23b0c2002 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -147,7 +147,7 @@ public class GeliAdapter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall( List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall( List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("Geli binary doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index b3d52a136..6f8faec1b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -113,7 +113,7 @@ public class GeliTextWriter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("Geli text doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 6b625d4f8..a3543ad97 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -287,7 +287,7 @@ public class GLFWriter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("GLF writer doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index d35f1e965..f563d1de5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -104,7 +104,7 @@ public class TabularLFWriter implements GenotypeWriter { * @param genotypes the list of genotypes, that are backed by sample information */ @Override - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java new file mode 100755 index 000000000..f3df77b9e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -0,0 +1,108 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.genotype.*; + +/** + * @author ebanks + *

+ * Class VCFGenotypeLocusData + *

+ * represents the meta data for a genotype object. + */ +public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked { + + // the discovery lod score + private double mConfidence = 0.0; + + // the strand score lod + private double mSLOD = 0.0; + + // the allele frequency + private double mAlleleFrequency = 0.0; + + // the location + private GenomeLoc mLoc; + + // the ref base + private char mRefBase; + + /** + * create a basic genotype meta data pbject, given the following fields + * + * @param ref the reference base + * @param loc the locus + */ + public VCFGenotypeLocusData(char ref, GenomeLoc loc) { + mRefBase = ref; + mLoc = loc; + } + + /** + * get the reference base. + * @return a character, representing the reference base + */ + public char getReference() { + return mRefBase; + } + + /** + * get the genotype's location + * + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation() { + return mLoc; + } + + /** + * get the confidence + * + * @return the confidence + */ + public double getConfidence() { + return mConfidence; + } + + /** + * + * @param confidence the confidence for this genotype + */ + public void setConfidence(double confidence) { + mConfidence = confidence; + } + + /** + * get the strand lod + * + * @return the strand lod + */ + public double getSLOD() { + return mSLOD; + } + + /** + * + * @param slod the strand lod for this genotype + */ + public void setSLOD(double slod) { + mSLOD = slod; + } + + /** + * get the allele frequency + * + * @return the allele frequency + */ + public double getAlleleFrequency() { + return mAlleleFrequency; + } + + /** + * + * @param frequency the allele frequency for this genotype + */ + public void setAlleleFrequency(double frequency) { + mAlleleFrequency = frequency; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index ac2213402..aef18d6a2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -95,7 +95,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * @param call the locus to add */ public void addGenotypeCall(Genotype call) { - throw new UnsupportedOperationException("VCF calls require metadata; use the addMultiSampleCall method instead"); + throw new UnsupportedOperationException("VCF calls require locusdata; use the addMultiSampleCall method instead"); } /** @@ -118,10 +118,12 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData locusdata) { if (!mInitialized) lazyInitialize(mFile, mStream); + if ( locusdata != null && !(locusdata instanceof VCFGenotypeLocusData) ) + throw new IllegalArgumentException("Only VCFGenotypeLocusData objects should be passed in to the VCF writers"); VCFParameters params = new VCFParameters(); params.addFormatItem("GT"); @@ -154,9 +156,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { throw new IllegalArgumentException("Genotype array passed to VCFGenotypeWriterAdapter contained Genotypes not in the VCF header"); } - Map infoFields = getInfoFields(metadata, params); + Map infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params); - double qual = (metadata == null) ? 0 : metadata.getLOD(); + double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence(); // maintain 0-99 based Q-scores qual = Math.min(qual, 99); qual = Math.max(qual, 0); @@ -178,16 +180,16 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { /** * get the information fields of the VCF record, given the meta data and parameters * - * @param metadata the metadata associated with this multi sample call + * @param locusdata the metadata associated with this multi sample call * @param params the parameters * * @return a mapping of info field to value */ - private Map getInfoFields(GenotypeMetaData metadata, VCFParameters params) { + private Map getInfoFields(VCFGenotypeLocusData locusdata, VCFParameters params) { Map infoFields = new HashMap(); - if (metadata != null) { - infoFields.put("SB", String.format("%.2f", metadata.getSLOD())); - infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency())); + if ( locusdata != null ) { + infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); + infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); } infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); return infoFields;