diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index f7851db6b..3086665cb 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -17,7 +17,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode protected EMGenotypeCalculationModel() {} - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -27,24 +27,35 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode // run the EM calculation EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL); double lod = overall.getPofD() - overall.getPofNull(); + logger.debug(String.format("LOD=%f", lod)); // return a null call if we don't pass the lod cutoff if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD ) - return new Pair, GenotypeMetaData>(null, null); - - // calculate strand score - EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); - EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); - double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); - double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); - logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); - double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - - logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore)); + return new Pair, GenotypeLocusData>(null, null); // generate the calls - GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF()); - return new Pair, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation()); + if ( locusdata != null ) { + if ( locusdata instanceof ConfidenceBacked ) { + ((ConfidenceBacked)locusdata).setConfidence(lod); + } + if ( locusdata instanceof SLODBacked ) { + // calculate strand score + EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); + EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); + double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); + double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); + logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + logger.debug(String.format("SLOD=%f", lod, strandScore)); + + ((SLODBacked)locusdata).setSLOD(strandScore); + } + if ( locusdata instanceof AlleleFrequencyBacked ) { + ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(overall.getMAF()); + } + } + return new Pair, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata); } protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index b4ceff900..ca9452383 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -96,7 +96,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { * * @return calls */ - public abstract Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, + public abstract Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index e8b7556ef..c3ad1e3f7 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -37,7 +37,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo private enum GenotypeType { REF, HET, HOM } - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -49,24 +49,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // run joint estimation for the full GL contexts initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL); calculateAlleleFrequencyPosteriors(ref, context.getLocation()); - return createCalls(ref, contexts); - - //double lod = overall.getPofD() - overall.getPofNull(); - //logger.debug("lod=" + lod); - - // calculate strand score - //EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD); - //EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE); - //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); - //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); - //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); - //double strandScore = Math.max(forwardLod - lod, reverseLod - lod); - - //logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore)); - - // generate the calls - //GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF()); - //return new Pair, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata); + return createCalls(ref, contexts, context.getLocation()); } private void initializeAlleleFrequencies(int numSamples) { @@ -292,7 +275,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo verboseWriter.println(); } - private Pair, GenotypeMetaData> createCalls(char ref, HashMap contexts) { + private Pair, GenotypeLocusData> createCalls(char ref, HashMap contexts, GenomeLoc loc) { // first, find the alt allele with maximum confidence int indexOfMax = 0; char baseOfMax = ref; @@ -312,14 +295,35 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo // return a null call if we don't pass the confidence cutoff if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD ) - return new Pair, GenotypeMetaData>(null, null); + return new Pair, GenotypeLocusData>(null, null); double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1); ArrayList calls = new ArrayList(); - // TODO -- generate strand score - double strandScore = 0.0; - GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc); + if ( locusdata != null ) { + if ( locusdata instanceof ConfidenceBacked ) { + ((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence); + } + if ( locusdata instanceof SLODBacked ) { + // TODO -- generate strand score + double strandScore = 0.0; + + // calculate strand score + //EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD); + //EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE); + //double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull(); + //double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull(); + //logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod); + //double strandScore = Math.max(forwardLod - lod, reverseLod - lod); + //logger.debug(String.format("SLOD=%f", lod, strandScore)); + + ((SLODBacked)locusdata).setSLOD(strandScore); + } + if ( locusdata instanceof AlleleFrequencyBacked ) { + ((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess); + } + } for ( String sample : GLs.keySet() ) { @@ -343,6 +347,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo calls.add(call); } - return new Pair, GenotypeMetaData>(calls, metadata); + return new Pair, GenotypeLocusData>(calls, locusdata); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index 53dce6322..b8f90aa8b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -24,7 +24,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // overload this method so we can special-case the single sample - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // we don't actually want to run EM for single samples if ( samples.size() == 1 ) { @@ -45,18 +45,17 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // get the genotype likelihoods Pair discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL); - // are we above the lod threshold for emitting calls (and not in all-bases-mode)? - if ( !ALL_BASE_MODE ) { - double[] posteriors = discoveryGL.second.getPosteriors(); - Integer sortedPosteriors[] = Utils.SortPermutation(posteriors); - double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]]; - double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]]; - double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()]; - double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype)); + // calculate the lod threshold + double[] posteriors = discoveryGL.second.getPosteriors(); + Integer sortedPosteriors[] = Utils.SortPermutation(posteriors); + double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]]; + double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]]; + double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()]; + double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype)); - // return a null call - if ( lodConfidence < LOD_THRESHOLD ) - return new Pair, GenotypeMetaData>(null, null); + // are we above the lod threshold for emitting calls (and not in all-bases mode)? + if ( !ALL_BASE_MODE && lodConfidence < LOD_THRESHOLD ) { + return new Pair, GenotypeLocusData>(null, null); } // we can now create the genotype call object @@ -75,7 +74,14 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); } - return new Pair, GenotypeMetaData>(Arrays.asList(call), null); + GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation()); + if ( locusdata != null ) { + if ( locusdata instanceof ConfidenceBacked ) { + ((ConfidenceBacked)locusdata).setConfidence(lodConfidence); + } + } + + return new Pair, GenotypeLocusData>(Arrays.asList(call), locusdata); } return super.calculateGenotype(tracker, ref, context, priors); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 4e4e5e85b..47003fc12 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -43,7 +43,7 @@ import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.Genotype; -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData; +import org.broadinstitute.sting.utils.genotype.GenotypeLocusData; import java.io.File; import java.util.HashSet; @@ -52,7 +52,7 @@ import java.util.ArrayList; @ReadFilters({ZeroMappingQualityReadFilter.class}) -public class UnifiedGenotyper extends LocusWalker, GenotypeMetaData>, Integer> { +public class UnifiedGenotyper extends LocusWalker, GenotypeLocusData>, Integer> { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -149,7 +149,7 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeM * @param refContext the reference base * @param context contextual information around the locus */ - public Pair, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) { + public Pair, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) { char ref = Character.toUpperCase(refContext.getBase()); if ( !BaseUtils.isRegularBase(ref) ) return null; @@ -211,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker, GenotypeM public Integer reduceInit() { return 0; } - public Integer reduce(Pair, GenotypeMetaData> value, Integer sum) { + public Integer reduce(Pair, GenotypeLocusData> value, Integer sum) { // can't call the locus because of no coverage if ( value == null ) return sum; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index f3fd3e456..cd57b34eb 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -11,8 +11,6 @@ import org.broadinstitute.sting.utils.genotype.*; import java.util.*; import java.io.PrintStream; -import java.io.PrintWriter; -import java.io.OutputStream; import java.io.FileNotFoundException; import net.sf.samtools.SAMRecord; @@ -374,7 +372,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker, GenotypeMetaData> calls = ug.map(tracker,ref,context); + Pair, GenotypeLocusData> calls = ug.map(tracker,ref,context); if (calls == null || calls.first == null) return false; return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase()); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 007ca16cf..f9166b9f8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - Pair, GenotypeMetaData> calls = UG.map(tracker, ref, subContext); + Pair, GenotypeLocusData> calls = UG.map(tracker, ref, subContext); if (calls != null && calls.first != null) { Genotype call = calls.first.get(0); String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index d5933be62..ff97a1c2b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -70,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker{ } AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets); - Pair, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext); + Pair, GenotypeLocusData> parent1 = UG.map(tracker, ref, parent1_subContext); AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets); - Pair, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext); + Pair, GenotypeLocusData> parent2 = UG.map(tracker, ref, parent2_subContext); if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) { Genotype parent1call = parent1.first.get(0); diff --git a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java index 2c5ce7733..62993bbe9 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java +++ b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java @@ -257,7 +257,7 @@ public class ArtificialPoolContext { public Genotype getGenotype(int group) { AlignmentContext alicon = this.getAlignmentContext(); Pair[],List[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets()); - Pair, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(), + Pair, GenotypeLocusData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(), new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group])); return (result.first == null ? null : result.first.get(0)); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java new file mode 100755 index 000000000..03bd54eba --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/AlleleFrequencyBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface AlleleFrequencyBacked + * + * this interface indicates that the genotype is + * backed up by allele frequency information. + */ +public interface AlleleFrequencyBacked { + + /** + * + * @return returns the allele frequency for this genotype + */ + public double getAlleleFrequency(); + + /** + * + * @param frequency the allele frequency for this genotype + */ + public void setAlleleFrequency(double frequency); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java new file mode 100755 index 000000000..2ccca2fb8 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/ConfidenceBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface ConfidenceBacked + * + * this interface indicates that the genotype is + * backed up by sample information. + */ +public interface ConfidenceBacked { + + /** + * + * @return returns the confidence for this genotype + */ + public double getConfidence(); + + /** + * + * @param confidence the confidence for this genotype + */ + public void setConfidence(double confidence); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java new file mode 100755 index 000000000..45c68dbd6 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeLocusData.java @@ -0,0 +1,28 @@ +package org.broadinstitute.sting.utils.genotype; + +import org.broadinstitute.sting.utils.GenomeLoc; + + +/** + * @author ebanks + *

+ * Class GenotypeLocusData + *

+ * represents the locus specific data associated with a genotype object. + */ +public interface GenotypeLocusData { + + /** + * get the reference base. + * @return a character, representing the reference base + */ + public char getReference(); + + /** + * get the genotype's location + * + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation(); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java deleted file mode 100755 index a61c0c2da..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeMetaData.java +++ /dev/null @@ -1,61 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - - -/** - * @author ebanks - *

- * Class GenotypeMetaData - *

- * represents the meta data for a genotype object. - */ -public class GenotypeMetaData { - - // the discovery lod score - private double mLOD; - - // the strand score lod - private double mSLOD; - - // the allele frequency - private double mAlleleFrequency; - - /** - * create a basic genotype meta data pbject, given the following fields - * - * @param discoveryLOD the discovery lod - * @param strandLOD the strand score lod - * @param alleleFrequency the allele frequency - */ - public GenotypeMetaData(double discoveryLOD, double strandLOD, double alleleFrequency) { - mLOD = discoveryLOD; - mSLOD = strandLOD; - mAlleleFrequency = alleleFrequency; - } - - /** - * get the discovery lod - * - * @return the discovery lod - */ - public double getLOD() { - return mLOD; - } - - /** - * get the strand lod - * - * @return the strand lod - */ - public double getSLOD() { - return mSLOD; - } - - /** - * get the allele frequency - * - * @return the allele frequency - */ - public double getAlleleFrequency() { - return mAlleleFrequency; - } -} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index b8b380b34..796654d93 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -56,7 +56,7 @@ public interface GenotypeWriter { * add a multi-sample call if we support it * @param genotypes the list of genotypes, that are backed by sample information */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata); + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata); /** * @return true if we support multisample, false otherwise diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index c46df69cc..b1ad6e400 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -30,6 +30,9 @@ public class GenotypeWriterFactory { * @param format the format * @param header the sam file header * @param destination the destination file + * @param source the source + * @param referenceName the ref name + * @param sampleNames the sample names * @return the genotype writer object */ public static GenotypeWriter create(GENOTYPE_FORMAT format, @@ -73,6 +76,8 @@ public class GenotypeWriterFactory { /** * create a genotype call * @param format the format + * @param ref the reference base + * @param loc the location * @return an unpopulated genotype call object */ public static Genotype createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { @@ -88,4 +93,25 @@ public class GenotypeWriterFactory { throw new StingException("Genotype format " + format.toString() + " is not implemented"); } } + + /** + * create a genotype locus data object + * @param format the format + * @param ref the reference base + * @param loc the location + * @return an unpopulated genotype locus data object + */ + public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) { + switch (format) { + case VCF: + return new VCFGenotypeLocusData(ref, loc); + case GELI: + case GELI_BINARY: + return null; + case GLF: + return null; + default: + throw new StingException("Genotype format " + format.toString() + " is not implemented"); + } + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java new file mode 100755 index 000000000..c0ca132e9 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/SLODBacked.java @@ -0,0 +1,24 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author ebanks + * Interface SLODBacked + * + * this interface indicates that the genotype is + * backed up by strand lod information. + */ +public interface SLODBacked { + + /** + * + * @return returns the strand lod for this genotype + */ + public double getSLOD(); + + /** + * + * @param slod the strand lod for this genotype + */ + public void setSLOD(double slod); + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java index f7ab1bba5..086f9b2e7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.genotype; /** * @author aaron - * Interface PosteriorsBacked + * Interface SampleBacked * * this interface indicates that the genotype is * backed up by sample information. diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 9f44ee1da..23b0c2002 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -147,7 +147,7 @@ public class GeliAdapter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall( List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall( List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("Geli binary doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index b3d52a136..6f8faec1b 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -113,7 +113,7 @@ public class GeliTextWriter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("Geli text doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 6b625d4f8..a3543ad97 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -287,7 +287,7 @@ public class GLFWriter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("GLF writer doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index d35f1e965..f563d1de5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -104,7 +104,7 @@ public class TabularLFWriter implements GenotypeWriter { * @param genotypes the list of genotypes, that are backed by sample information */ @Override - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData metadata) { throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java new file mode 100755 index 000000000..f3df77b9e --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeLocusData.java @@ -0,0 +1,108 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.genotype.*; + +/** + * @author ebanks + *

+ * Class VCFGenotypeLocusData + *

+ * represents the meta data for a genotype object. + */ +public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked { + + // the discovery lod score + private double mConfidence = 0.0; + + // the strand score lod + private double mSLOD = 0.0; + + // the allele frequency + private double mAlleleFrequency = 0.0; + + // the location + private GenomeLoc mLoc; + + // the ref base + private char mRefBase; + + /** + * create a basic genotype meta data pbject, given the following fields + * + * @param ref the reference base + * @param loc the locus + */ + public VCFGenotypeLocusData(char ref, GenomeLoc loc) { + mRefBase = ref; + mLoc = loc; + } + + /** + * get the reference base. + * @return a character, representing the reference base + */ + public char getReference() { + return mRefBase; + } + + /** + * get the genotype's location + * + * @return a GenomeLoc representing the location + */ + public GenomeLoc getLocation() { + return mLoc; + } + + /** + * get the confidence + * + * @return the confidence + */ + public double getConfidence() { + return mConfidence; + } + + /** + * + * @param confidence the confidence for this genotype + */ + public void setConfidence(double confidence) { + mConfidence = confidence; + } + + /** + * get the strand lod + * + * @return the strand lod + */ + public double getSLOD() { + return mSLOD; + } + + /** + * + * @param slod the strand lod for this genotype + */ + public void setSLOD(double slod) { + mSLOD = slod; + } + + /** + * get the allele frequency + * + * @return the allele frequency + */ + public double getAlleleFrequency() { + return mAlleleFrequency; + } + + /** + * + * @param frequency the allele frequency for this genotype + */ + public void setAlleleFrequency(double frequency) { + mAlleleFrequency = frequency; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index ac2213402..aef18d6a2 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -95,7 +95,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * @param call the locus to add */ public void addGenotypeCall(Genotype call) { - throw new UnsupportedOperationException("VCF calls require metadata; use the addMultiSampleCall method instead"); + throw new UnsupportedOperationException("VCF calls require locusdata; use the addMultiSampleCall method instead"); } /** @@ -118,10 +118,12 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @param genotypes the list of genotypes */ - public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { + public void addMultiSampleCall(List genotypes, GenotypeLocusData locusdata) { if (!mInitialized) lazyInitialize(mFile, mStream); + if ( locusdata != null && !(locusdata instanceof VCFGenotypeLocusData) ) + throw new IllegalArgumentException("Only VCFGenotypeLocusData objects should be passed in to the VCF writers"); VCFParameters params = new VCFParameters(); params.addFormatItem("GT"); @@ -154,9 +156,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { throw new IllegalArgumentException("Genotype array passed to VCFGenotypeWriterAdapter contained Genotypes not in the VCF header"); } - Map infoFields = getInfoFields(metadata, params); + Map infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params); - double qual = (metadata == null) ? 0 : metadata.getLOD(); + double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence(); // maintain 0-99 based Q-scores qual = Math.min(qual, 99); qual = Math.max(qual, 0); @@ -178,16 +180,16 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { /** * get the information fields of the VCF record, given the meta data and parameters * - * @param metadata the metadata associated with this multi sample call + * @param locusdata the metadata associated with this multi sample call * @param params the parameters * * @return a mapping of info field to value */ - private Map getInfoFields(GenotypeMetaData metadata, VCFParameters params) { + private Map getInfoFields(VCFGenotypeLocusData locusdata, VCFParameters params) { Map infoFields = new HashMap(); - if (metadata != null) { - infoFields.put("SB", String.format("%.2f", metadata.getSLOD())); - infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency())); + if ( locusdata != null ) { + infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); + infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency())); } infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); return infoFields;