From 3091443dc7b9c2b3e87740959a2b40e94c29709f Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 29 Oct 2009 03:46:41 +0000 Subject: [PATCH] Sweeping changes to the genotype output system, as per several discussions with Matt & Aaron. Some things still need to be changed, but it will entail some more design decisions first (which means I get to bug M&A again tomorrow!). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1930 348d0f76-0448-11de-a6fe-93d51630548a --- .../genotyper/EMGenotypeCalculationModel.java | 36 ++- .../genotyper/GenotypeCalculationModel.java | 20 +- .../gatk/walkers/genotyper/GenotypeCall.java | 302 ------------------ ...JointEstimateGenotypeCalculationModel.java | 38 ++- ...PointEstimateGenotypeCalculationModel.java | 27 +- .../genotyper/UnifiedArgumentCollection.java | 4 + .../walkers/genotyper/UnifiedGenotyper.java | 34 +- ...seTransitionTableCalculatorJavaWalker.java | 6 +- .../gatk/walkers/CoverageEvalWalker.java | 5 +- .../gatk/walkers/DeNovoSNPWalker.java | 12 +- .../utils/ArtificialPoolContext.java | 2 +- .../sting/utils/genotype/BasicGenotype.java | 10 + .../sting/utils/genotype/Genotype.java | 12 + .../utils/genotype/GenotypeWriterFactory.java | 27 +- .../utils/genotype/LikelihoodsBacked.java | 11 +- .../utils/genotype/PosteriorsBacked.java | 19 +- .../sting/utils/genotype/ReadBacked.java | 6 + .../sting/utils/genotype/SampleBacked.java | 15 +- .../utils/genotype/geli/GeliAdapter.java | 47 +-- .../utils/genotype/geli/GeliGenotypeCall.java | 240 ++++++++++++++ .../utils/genotype/geli/GeliTextWriter.java | 64 ++-- .../utils/genotype/glf/GLFGenotypeCall.java | 208 ++++++++++++ .../sting/utils/genotype/glf/GLFWriter.java | 41 +-- .../utils/genotype/vcf/VCFGenotypeCall.java | 255 +++++++++++++++ .../vcf/VCFGenotypeWriterAdapter.java | 69 ++-- .../walkers/genotyper/GenotypeCallTest.java | 121 ------- .../UnifiedGenotyperIntegrationTest.java | 4 +- .../utils/genotype/glf/GLFWriterTest.java | 14 +- 28 files changed, 991 insertions(+), 658 deletions(-) delete mode 100644 java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java create mode 100755 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java delete mode 100644 java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index ba0d657c0..34b414f40 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -18,7 +18,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode protected EMGenotypeCalculationModel() {} - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -42,26 +42,42 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode // generate the calls GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF()); - return new Pair, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata); + return new Pair, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata); } - protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { + protected List genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap contexts) { HashMap GLs = results.getGenotypeLikelihoods(); - ArrayList calls = new ArrayList(); + ArrayList calls = new ArrayList(); int variantCalls = 0; for ( String sample : GLs.keySet() ) { - // get the pileup - AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); - pileup.setIncludeDeletionsInPileupString(true); + // create the call - GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup); + Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT); + call.setReference(ref); + + // get the context + AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); + call.setLocation(context.getLocation()); + + if ( call instanceof ReadBacked ) { + ((ReadBacked)call).setReads(context.getReads()); + } + if ( call instanceof SampleBacked ) { + ((SampleBacked)call).setSampleName(sample); + } + if ( call instanceof LikelihoodsBacked ) { + ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods()); + } + if ( call instanceof PosteriorsBacked ) { + ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors()); + } + calls.add(call); // increment the variant count if it's non-ref - if ( call.isVariant() ) + if ( call.isVariant(ref) ) variantCalls++; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java index c2e2af0db..b4ceff900 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCalculationModel.java @@ -2,9 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData; -import org.broadinstitute.sting.utils.Pair; +import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Pair; import org.broadinstitute.sting.utils.StingException; import org.apache.log4j.Logger; @@ -30,6 +30,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { protected Logger logger; protected double heterozygosity; protected EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform; + protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT; protected boolean ALL_BASE_MODE; protected boolean GENOTYPE_MODE; protected boolean POOLED_INPUT; @@ -62,6 +63,7 @@ public abstract class GenotypeCalculationModel implements Cloneable { baseModel = UAC.baseModel; heterozygosity = UAC.heterozygosity; defaultPlatform = UAC.defaultPlatform; + OUTPUT_FORMAT = UAC.VAR_FORMAT; ALL_BASE_MODE = UAC.ALL_BASES; GENOTYPE_MODE = UAC.GENOTYPE; POOLED_INPUT = UAC.POOLED; @@ -80,12 +82,6 @@ public abstract class GenotypeCalculationModel implements Cloneable { } } - public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) { - // just close and re-initialize - close(); - initialize(this.samples, this.logger, UAC); - } - public void close() { if ( verboseWriter != null ) verboseWriter.close(); @@ -100,10 +96,10 @@ public abstract class GenotypeCalculationModel implements Cloneable { * * @return calls */ - public abstract Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, - char ref, - AlignmentContext context, - DiploidGenotypePriors priors); + public abstract Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, + char ref, + AlignmentContext context, + DiploidGenotypePriors priors); /** diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java deleted file mode 100644 index 8cc7e2773..000000000 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java +++ /dev/null @@ -1,302 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.*; - -import java.util.Arrays; -import java.util.List; - - -/** - * @author aaron - *

- * Class GenotypeCall - *

- * The implementation of the genotype interface, which contains - * extra information for the various genotype outputs - */ -public class GenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked { - private final char mRefBase; - private final GenotypeLikelihoods mGenotypeLikelihoods; - - // the next two values are filled in on-demand. Default to -1 since they can never be negitive - private final GenomeLoc mLocation; - private final ReadBackedPileup mPileup; - - // if this is null, we were constructed with the intention that we'd represent the best genotype - private DiploidGenotype mGenotype = null; - - // the reference genotype and the next best genotype, lazy loaded - private DiploidGenotype mRefGenotype = null; - private DiploidGenotype mNextGenotype = null; - - // the sample name, used to propulate the SampleBacked interface - private String mSampleName; - - /** - * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object - * - * @param sampleName the sample name - * @param location the location we're working with - * @param refBase the ref base - * @param gtlh the genotype likelihoods object - * @param pileup the pile-up of reads at the specified locus - */ - public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { - mSampleName = sampleName; - mRefBase = Character.toUpperCase(refBase); - mGenotypeLikelihoods = gtlh; - mLocation = location; - mPileup = pileup; - } - - /** - * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object - * - * @param sampleName the sample name - * @param location the location we're working with - * @param refBase the ref base - * @param gtlh the genotype likelihoods object - * @param pileup the pile-up of reads at the specified locus - */ - GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { - mSampleName = sampleName; - mRefBase = Character.toUpperCase(refBase); - mGenotypeLikelihoods = gtlh; - mLocation = location; - mGenotype = genotype; - mPileup = pileup; - } - - @Override - public boolean equals(Object other) { - lazyEval(); - - if (other == null) - return false; - if (other instanceof GenotypeCall) { - GenotypeCall otherCall = (GenotypeCall) other; - - if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) { - return false; - } - if (!this.mGenotype.equals(otherCall.mGenotype)) - return false; - return (this.mRefBase == otherCall.mRefBase) && - this.mPileup.equals(mPileup); - } - return false; - } - - public String toString() { - lazyEval(); - return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s", - getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(), - getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods())); - } - - private void lazyEval() { - // us - if (mGenotype == null) { - Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); - mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; - } - - // our comparison - if (mRefGenotype == null) { - mRefGenotype = DiploidGenotype.valueOf(Utils.dupString(this.getReference(), 2)); - } - if (mNextGenotype == null) { - Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors()); - mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; - } - } - - - /** - * get the confidence we have - * - * @return get the one minus error value - */ - public double getNegLog10PError() { - return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest())); - } - - /** get the best genotype */ - private DiploidGenotype getBestGenotype() { - lazyEval(); - return mGenotype; - } - - /** get the alternate genotype */ - private DiploidGenotype getNextBest() { - lazyEval(); - return mNextGenotype; - } - - /** get the alternate genotype */ - private DiploidGenotype getRefGenotype() { - lazyEval(); - return mRefGenotype; - } - - /** - * get the bases that represent this - * - * @return the bases, as a string - */ - public String getBases() { - return getBestGenotype().toString(); - } - - /** - * get the ploidy - * - * @return the ploidy value - */ - public int getPloidy() { - return 2; - } - - /** - * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) - * - * @return true if we're homozygous, false otherwise - */ - public boolean isHom() { - return getBestGenotype().isHom(); - } - - /** - * Returns true if observed alleles differ (regardless of whether they are ref or alt) - * - * @return true if we're het, false otherwise - */ - public boolean isHet() { - return !isHom(); - } - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - public GenomeLoc getLocation() { - return this.mLocation; - } - - /** - * returns true if the genotype is a point genotype, false if it's a indel / deletion - * - * @return true is a SNP - */ - public boolean isPointGenotype() { - return true; - } - - /** - * given the reference, are we a variant? (non-ref) - * - * @param ref the reference base or bases - * - * @return true if we're a variant - */ - public boolean isVariant(char ref) { - return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString()); - } - - /** - * are we a variant? (non-ref) - * - * @return true if we're a variant - */ - public boolean isVariant() { - return isVariant(mRefBase); - } - - /** - * return this genotype as a variant - * - * @return - */ - public Variation toVariation() { - double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype())); - return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef); - } - - /** - * return the likelihoods as a double array, in lexographic order - * - * @return the likelihoods - */ - public double[] getProbabilities() { - return this.mGenotypeLikelihoods.getPosteriors(); - } - - /** - * get the SAM records that back this genotype call - * - * @return a list of SAM records - */ - public List getReads() { - return this.mPileup.getReads(); - } - - /** - * get the count of reads - * - * @return the number of reads we're backed by - */ - public int getReadCount() { - return this.mPileup.getReads().size(); - } - - /** - * get the reference character - * - * @return - */ - public char getReference() { - return this.mRefBase; - } - - /** - * get the likelihood information for this call - * - * @return - */ - public double[] getLikelihoods() { - return this.mGenotypeLikelihoods.getLikelihoods(); - } - - - /** - * get the posteriors information for this call - * - * @return - */ - public double[] getPosteriors() { - return this.mGenotypeLikelihoods.getPosteriors(); - } - - /** @return returns the sample name for this genotype */ - public String getSampleName() { - return this.mSampleName; - } - - /** - * get the filtering string for this genotype - * - * @return a string, representing the genotyping value - */ - public String getFilteringValue() { - return "0"; - } -} - - - \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index e78a0cedd..40d68e3f1 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -37,7 +37,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo private enum GenotypeType { REF, HET, HOM } - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // keep track of the context for each sample, overall and separated by strand HashMap contexts = splitContextBySample(context); @@ -292,7 +292,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo verboseWriter.println(); } - private Pair, GenotypeMetaData> createCalls(char ref, HashMap contexts) { + private Pair, GenotypeMetaData> createCalls(char ref, HashMap contexts) { // first, find the alt allele with maximum confidence int indexOfMax = 0; char baseOfMax = ref; @@ -308,9 +308,9 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo } } double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]); - double bestAFguess = (double)findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1); + double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1); - ArrayList calls = new ArrayList(); + ArrayList calls = new ArrayList(); // TODO -- generate strand score double strandScore = 0.0; GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess); @@ -319,21 +319,31 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo if ( phredScaledConfidence >= CONFIDENCE_THRESHOLD || ALL_BASE_MODE ) { for ( String sample : GLs.keySet() ) { - // get the pileup - AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); - ReadBackedPileup pileup = new ReadBackedPileup(ref, context); - pileup.setIncludeDeletionsInPileupString(true); - // TODO -- fix GenotypeCall code so that each call doesn't need its own pileup // create the call - GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup); - calls.add(call); + Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT); + call.setReference(ref); - // TODO -- fix GenotypeCall code so that UG tells it which genotypes to use - // TODO -- all of the intelligence for making calls should be in UG + AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL); + call.setLocation(context.getLocation()); + + if ( call instanceof ReadBacked ) { + ((ReadBacked)call).setReads(context.getReads()); + } + if ( call instanceof SampleBacked ) { + ((SampleBacked)call).setSampleName(sample); + } + if ( call instanceof LikelihoodsBacked ) { + ((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods()); + } + if ( call instanceof PosteriorsBacked ) { + ((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors()); + } + + calls.add(call); } } - return new Pair, GenotypeMetaData>(calls, metadata); + return new Pair, GenotypeMetaData>(calls, metadata); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java index b7c645d52..2953febbf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/PointEstimateGenotypeCalculationModel.java @@ -3,8 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData; +import org.broadinstitute.sting.utils.genotype.*; import java.util.*; @@ -25,7 +24,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation // overload this method so we can special-case the single sample - public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { + public Pair, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) { // we don't actually want to run EM for single samples if ( samples.size() == 1 ) { @@ -44,9 +43,27 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation return null; // create the genotype call object + // create the call + Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT); + call.setReference(ref); + call.setLocation(context.getLocation()); + Pair discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL); - GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, discoveryGL.second, discoveryGL.first); - return new Pair, GenotypeMetaData>(Arrays.asList(call), null); + + if ( call instanceof ReadBacked ) { + ((ReadBacked)call).setReads(discoveryGL.first.getReads()); + } + if ( call instanceof SampleBacked ) { + ((SampleBacked)call).setSampleName(sample); + } + if ( call instanceof LikelihoodsBacked ) { + ((LikelihoodsBacked)call).setLikelihoods(discoveryGL.second.getLikelihoods()); + } + if ( call instanceof PosteriorsBacked ) { + ((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors()); + } + + return new Pair, GenotypeMetaData>(Arrays.asList(call), null); } return super.calculateGenotype(tracker, ref, context, priors); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java index 4b00b83d9..1cea4843b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; public class UnifiedArgumentCollection { @@ -48,6 +49,9 @@ public class UnifiedArgumentCollection { // control the output + @Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false) + public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; + @Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes or just the variants?", required = false) public boolean GENOTYPE = false; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 3de0064ad..6196250e4 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -52,7 +52,7 @@ import java.util.ArrayList; @ReadFilters({ZeroMappingQualityReadFilter.class}) -public class UnifiedGenotyper extends LocusWalker, GenotypeMetaData>, Integer> { +public class UnifiedGenotyper extends LocusWalker, GenotypeMetaData>, Integer> { @ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); @@ -60,9 +60,6 @@ public class UnifiedGenotyper extends LocusWalker, Genot @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false) public File VARIANTS_FILE = null; - @Argument(fullName = "variant_output_format", shortName = "vf", doc = "File format to be used; default is VCF", required = false) - public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF; - // the model used for calculating genotypes private GenotypeCalculationModel gcm; @@ -88,9 +85,13 @@ public class UnifiedGenotyper extends LocusWalker, Genot * To be used with walkers that call the UnifiedGenotyper's map function * and consequently can't set these arguments on the command-line * + * @param UAC the UnifiedArgumentCollection + * **/ public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) { - gcm.setUnifiedArgumentCollection(UAC); + gcm.close(); + this.UAC = UAC; + initialize(); } /** @@ -130,12 +131,12 @@ public class UnifiedGenotyper extends LocusWalker, Genot // create the output writer stream if ( VARIANTS_FILE != null ) - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, + writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, "UnifiedGenotyper", this.getToolkit().getArguments().referenceFile.getName(), samples); else - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper", + writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper", this.getToolkit().getArguments().referenceFile.getName(), samples); callsMetrics = new CallMetrics(); @@ -148,7 +149,7 @@ public class UnifiedGenotyper extends LocusWalker, Genot * @param refContext the reference base * @param context contextual information around the locus */ - public Pair, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) { + public Pair, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) { char ref = Character.toUpperCase(refContext.getBase()); if ( !BaseUtils.isRegularBase(ref) ) return null; @@ -210,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker, Genot public Integer reduceInit() { return 0; } - public Integer reduce(Pair, GenotypeMetaData> value, Integer sum) { + public Integer reduce(Pair, GenotypeMetaData> value, Integer sum) { if ( value == null || value.first == null ) return sum; @@ -221,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker, Genot // special-case for single-sample using PointEstimate model if ( value.second == null ) { - GenotypeCall call = value.first.get(0); + Genotype call = value.first.get(0); if ( UAC.GENOTYPE || call.isVariant(call.getReference()) ) { double confidence = (UAC.GENOTYPE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError()); if ( confidence >= UAC.LOD_THRESHOLD ) { @@ -236,13 +237,12 @@ public class UnifiedGenotyper extends LocusWalker, Genot // use multi-sample mode if we have multiple samples or the output type allows it else if ( writer.supportsMultiSample() || samples.size() > 1 ) { - // annoying hack to get around Java generics - ArrayList callList = new ArrayList(); - for ( GenotypeCall call : value.first ) - callList.add(call); - - callsMetrics.nConfidentCalls++; - writer.addMultiSampleCall(callList, value.second); + if ( UAC.CONFIDENCE_THRESHOLD <= value.second.getLOD() && UAC.LOD_THRESHOLD <= value.second.getLOD() ) { + callsMetrics.nConfidentCalls++; + writer.addMultiSampleCall(value.first, value.second); + } else { + callsMetrics.nNonConfidentCalls++; + } } // otherwise, use single sample mode diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java index 8f396c7b4..af83105dd 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/BaseTransitionTableCalculatorJavaWalker.java @@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData; +import org.broadinstitute.sting.utils.genotype.*; import java.util.*; import java.io.PrintStream; @@ -373,10 +373,10 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker, GenotypeMetaData> calls = ug.map(tracker,ref,context); + Pair, GenotypeMetaData> calls = ug.map(tracker,ref,context); if (calls == null) return false; - return (! calls.first.get(0).isVariant()) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase()); + return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase()); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 5a3952d33..007ca16cf 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.BaseUtils; @@ -81,9 +80,9 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - Pair, GenotypeMetaData> calls = UG.map(tracker, ref, subContext); + Pair, GenotypeMetaData> calls = UG.map(tracker, ref, subContext); if (calls != null && calls.first != null) { - GenotypeCall call = calls.first.get(0); + Genotype call = calls.first.get(0); String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call)); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index 038380040..d5933be62 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -7,10 +7,8 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariationRod; import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper; -import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData; +import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.Pair; import java.util.ArrayList; @@ -72,14 +70,14 @@ public class DeNovoSNPWalker extends RefWalker{ } AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets); - Pair, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext); + Pair, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext); AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets); - Pair, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext); + Pair, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext); if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) { - GenotypeCall parent1call = parent1.first.get(0); - GenotypeCall parent2call = parent2.first.get(0); + Genotype parent1call = parent1.first.get(0); + Genotype parent2call = parent2.first.get(0); if (!parent1call.isVariant(parent1call.getReference()) && parent1call.getNegLog10PError() > 5 && diff --git a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java index 057440d60..2c5ce7733 100755 --- a/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java +++ b/java/src/org/broadinstitute/sting/playground/utils/ArtificialPoolContext.java @@ -257,7 +257,7 @@ public class ArtificialPoolContext { public Genotype getGenotype(int group) { AlignmentContext alicon = this.getAlignmentContext(); Pair[],List[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets()); - Pair, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(), + Pair, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(), new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group])); return (result.first == null ? null : result.first.get(0)); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java index 2082d127a..f72076dee 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/BasicGenotype.java @@ -113,6 +113,11 @@ public class BasicGenotype implements Genotype { return mLocation; } + // set the location + public void setLocation(GenomeLoc loc) { + mLocation = loc; + } + /** * returns true if the genotype is a point genotype, false if it's a indel / deletion * @@ -145,6 +150,11 @@ public class BasicGenotype implements Genotype { return mRef; } + // set the reference base + public void setReference(char refBase) { + mRef = Character.toUpperCase(refBase); + } + /** * return this genotype as a variant * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java index cc0ad15be..d7f6e8cd4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/Genotype.java @@ -52,6 +52,12 @@ public interface Genotype { */ public GenomeLoc getLocation(); + /** + * set the location. + * @param loc the location + */ + public void setLocation(GenomeLoc loc); + /** * returns true if the genotype is a point genotype, false if it's a indel / deletion * @@ -74,6 +80,12 @@ public interface Genotype { */ public char getReference(); + /** + * set the reference base. + * @param ref the ref base + */ + public void setReference(char ref); + /** * return this genotype as a variant * diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 69dea8392..cf5668515 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -2,14 +2,12 @@ package org.broadinstitute.sting.utils.genotype; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter; -import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; -import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter; +import org.broadinstitute.sting.utils.genotype.geli.*; +import org.broadinstitute.sting.utils.genotype.glf.*; +import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.File; import java.io.PrintStream; -import java.util.List; import java.util.Set; @@ -70,4 +68,23 @@ public class GenotypeWriterFactory { throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); } } + + /** + * create a genotype call + * @param format the format + * @return an unpopulated genotype call object + */ + public static Genotype createSupportedCall(GENOTYPE_FORMAT format) { + switch (format) { + case VCF: + return new VCFGenotypeCall(); + case GELI: + case GELI_BINARY: + return new GeliGenotypeCall(); + case GLF: + return new GLFGenotypeCall(); + default: + throw new StingException("Genotype format " + format.toString() + " is not implemented"); + } + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java index 81f9cd328..bdad8de20 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/LikelihoodsBacked.java @@ -10,8 +10,15 @@ package org.broadinstitute.sting.utils.genotype; public interface LikelihoodsBacked { /** - * get the likelihood information for this - * @return + * + * @return the likelihood information for this genotype */ public double[] getLikelihoods(); + + /** + * + * @param likelihoods the likelihoods for this genotype + */ + public void setLikelihoods(double[] likelihoods); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java index 3fa9b59ff..81212c3b8 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/PosteriorsBacked.java @@ -2,17 +2,24 @@ package org.broadinstitute.sting.utils.genotype; /** * @author aaron - *

- * Interface PosteriorsBacked - *

- * A descriptions should go here. Blame aaron if it's missing. + * Interface PosteriorsBacked + * + * this interface indicates that the genotype is + * backed up by genotype posterior information. */ public interface PosteriorsBacked { /** - * get the likelihood information for this - * @return + * + * @return the likelihood information for this genotype */ public double[] getPosteriors(); + + /** + * + * @param posteriors the posteriors for this genotype + */ + public void setPosteriors(double[] posteriors); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java index 84a3b6687..2cc95f45f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/ReadBacked.java @@ -18,6 +18,12 @@ public interface ReadBacked { */ public List getReads(); + /** + * + * @param reads the reads for this genotype + */ + public void setReads(List reads); + /** * get the count of reads * @return the number of reads we're backed by diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java index 583e20dfb..f7ab1bba5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java @@ -2,10 +2,10 @@ package org.broadinstitute.sting.utils.genotype; /** * @author aaron - *

- * Interface SampleBacked - *

- * A descriptions should go here. Blame aaron if it's missing. + * Interface PosteriorsBacked + * + * this interface indicates that the genotype is + * backed up by sample information. */ public interface SampleBacked { @@ -16,8 +16,9 @@ public interface SampleBacked { public String getSampleName(); /** - * get the filtering string for this genotype - * @return a string, representing the genotyping value + * + * @param name the sample name for this genotype */ - public String getFilteringValue(); + public void setSampleName(String name); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index e555a238e..9f44ee1da 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -9,7 +9,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.genotype.*; import java.io.File; -import java.util.Arrays; import java.util.List; @@ -79,7 +78,7 @@ public class GeliAdapter implements GenotypeWriter { GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase); lk.setNumReads(readCount); - lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? (short)Short.MAX_VALUE : (short)Math.round(maxMappingQuality)); + lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? Short.MAX_VALUE : (short)Math.round(maxMappingQuality)); writer.addGenotypeLikelihoods(lk); } @@ -100,37 +99,27 @@ public class GeliAdapter implements GenotypeWriter { } /** - * Add a genotype, given a genotype locus + * Add a genotype, given a genotype call * - * @param locus the locus to add + * @param call the call to add */ - @Override - public void addGenotypeCall(Genotype locus) { - double posteriors[]; - int readDepth = -1; - double nextVrsBest = 0; - double nextVrsRef = 0; - if (!(locus instanceof PosteriorsBacked)) { - posteriors = new double[10]; - Arrays.fill(posteriors, Double.MIN_VALUE); - } else { - posteriors = ((PosteriorsBacked) locus).getPosteriors(); + public void addGenotypeCall(Genotype call) { + if ( !(call instanceof GeliGenotypeCall) ) + throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); + GeliGenotypeCall gCall = (GeliGenotypeCall)call; - } - char ref = locus.getReference(); - int readCount = 0; + char ref = gCall.getReference(); + List recs = gCall.getReads(); + int readCount = recs.size(); double maxMappingQual = 0; - if (locus instanceof ReadBacked) { - List recs = ((ReadBacked)locus).getReads(); - readCount = recs.size(); - for (SAMRecord rec : recs) { - if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); - } + for (SAMRecord rec : recs) { + if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); } + double[] posteriors = gCall.getPosteriors(); LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()), - (int)locus.getLocation().getStart(), + this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()), + (int)gCall.getLocation().getStart(), ref, maxMappingQual, readCount, @@ -142,13 +131,11 @@ public class GeliAdapter implements GenotypeWriter { * * @param position */ - @Override public void addNoCall(int position) { throw new UnsupportedOperationException("Geli format does not support no-calls"); } /** finish writing, closing any open files. */ - @Override public void close() { if (this.writer != null) { this.writer.close(); @@ -158,15 +145,13 @@ public class GeliAdapter implements GenotypeWriter { /** * add a multi-sample call if we support it * - * @param genotypes the list of genotypes, that are backed by sample information + * @param genotypes the list of genotypes */ - @Override public void addMultiSampleCall( List genotypes, GenotypeMetaData metadata) { throw new UnsupportedOperationException("Geli binary doesn't support multisample calls"); } /** @return true if we support multisample, false otherwise */ - @Override public boolean supportsMultiSample() { return false; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java new file mode 100755 index 000000000..52063ee9d --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliGenotypeCall.java @@ -0,0 +1,240 @@ +package org.broadinstitute.sting.utils.genotype.geli; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; + + +/** + * @author ebanks + *

+ * Class GeliGenotypeCall + *

+ * The implementation of the genotype interface, specific to Geli + */ +public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked { + private char mRefBase; + private GenomeLoc mLocation; + + private List mReads; + private double[] mPosteriors; + + + // the reference genotype, the best genotype, and the next best genotype, lazy loaded + private DiploidGenotype mRefGenotype = null; + private DiploidGenotype mBestGenotype = null; + private DiploidGenotype mNextGenotype = null; + + /** + * Generate a single sample genotype object + * + */ + public GeliGenotypeCall() { + // fill in empty data + mRefBase = 'N'; + mPosteriors = new double[10]; + Arrays.fill(mPosteriors, Double.MIN_VALUE); + mReads = new ArrayList(); + } + + public void setReference(char refBase) { + mRefBase = Character.toUpperCase(refBase); + } + + public void setLocation(GenomeLoc loc) { + mLocation = loc; + } + + public void setReads(List reads) { + mReads = new ArrayList(reads); + } + + public void setPosteriors(double[] posteriors) { + mPosteriors = posteriors; + } + + + @Override + public boolean equals(Object other) { + lazyEval(); + + if (other == null) + return false; + if (other instanceof GeliGenotypeCall) { + GeliGenotypeCall otherCall = (GeliGenotypeCall) other; + + if (!this.mBestGenotype.equals(otherCall.mBestGenotype)) + return false; + return (this.mRefBase == otherCall.mRefBase); + } + return false; + } + + public String toString() { + lazyEval(); + return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f", + getLocation(), mBestGenotype, mRefGenotype, mRefBase, mReads.size(), getNegLog10PError()); + } + + private void lazyEval() { + if (mBestGenotype == null) { + Integer sorted[] = Utils.SortPermutation(mPosteriors); + mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; + mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference()); + } + } + + + /** + * get the confidence we have + * + * @return get the one minus error value + */ + public double getNegLog10PError() { + return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]); + } + + // get the best genotype + private DiploidGenotype getBestGenotype() { + lazyEval(); + return mBestGenotype; + } + + // get the alternate genotype + private DiploidGenotype getNextBest() { + lazyEval(); + return mNextGenotype; + } + + // get the ref genotype + private DiploidGenotype getRefGenotype() { + lazyEval(); + return mRefGenotype; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + public String getBases() { + return getBestGenotype().toString(); + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + public int getPloidy() { + return 2; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + public boolean isHom() { + return getBestGenotype().isHom(); + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + public boolean isHet() { + return !isHom(); + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + public GenomeLoc getLocation() { + return this.mLocation; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * + * @return true if we're a variant + */ + public boolean isVariant(char ref) { + return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString()); + } + + /** + * are we a variant? (non-ref) + * + * @return true if we're a variant + */ + public boolean isVariant() { + return isVariant(mRefBase); + } + + /** + * + * @return return this genotype as a variant + */ + public Variation toVariation() { + double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); + return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef); + } + + /** + * get the SAM records that back this genotype call + * + * @return a list of SAM records + */ + public List getReads() { + return mReads; + } + + /** + * get the count of reads + * + * @return the number of reads we're backed by + */ + public int getReadCount() { + return mReads.size(); + } + + /** + * get the reference character + * + * @return the reference character + */ + public char getReference() { + return mRefBase; + } + + /** + * get the posteriors + * + * @return the posteriors + */ + public double[] getPosteriors() { + return mPosteriors; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 34cbe9931..b3d52a136 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.genotype.geli; import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.genotype.*; import java.io.File; @@ -46,48 +45,41 @@ public class GeliTextWriter implements GenotypeWriter { public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT"; /** - * Add a genotype, given a genotype locus + * Add a genotype, given a call * - * @param locus the locus to add + * @param call the call to add */ - public void addGenotypeCall(Genotype locus) { - double posteriors[]; - int readDepth = -1; - double nextVrsBest = 0; + public void addGenotypeCall(Genotype call) { + if ( !(call instanceof GeliGenotypeCall) ) + throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); + GeliGenotypeCall gCall = (GeliGenotypeCall)call; + + char ref = gCall.getReference(); + + double[] posteriors = gCall.getPosteriors(); + double[] lks; + lks = Arrays.copyOf(posteriors, posteriors.length); + Arrays.sort(lks); + + double nextVrsBest = lks[9] - lks[8]; double nextVrsRef = 0; + if (ref != 'X') + nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()]; - char ref = locus.getReference(); - - - if (!(locus instanceof PosteriorsBacked)) { - posteriors = new double[10]; - Arrays.fill(posteriors, 0.0); - } else { - posteriors = ((PosteriorsBacked) locus).getPosteriors(); - double[] lks; - lks = Arrays.copyOf(posteriors, posteriors.length); - Arrays.sort(lks); - nextVrsBest = lks[9] - lks[8]; - if (ref != 'X') { - int index = (DiploidGenotype.valueOf(Utils.dupString(ref, 2)).ordinal()); - nextVrsRef = lks[9] - posteriors[index]; - } - } double maxMappingQual = 0; - if (locus instanceof ReadBacked) { - List recs = ((ReadBacked) locus).getReads(); - readDepth = recs.size(); - for (SAMRecord rec : recs) { - if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); - } + List recs = gCall.getReads(); + int readDepth = recs.size(); + for (SAMRecord rec : recs) { + if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); } + mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f", - locus.getLocation().getContig(), - locus.getLocation().getStart(), + gCall.getLocation().getContig(), + gCall.getLocation().getStart(), ref, readDepth, maxMappingQual, - locus.getBases(), + gCall.getBases(), nextVrsRef, nextVrsBest, posteriors[0], @@ -107,13 +99,11 @@ public class GeliTextWriter implements GenotypeWriter { * * @param position the position to add the no call at */ - @Override public void addNoCall(int position) { throw new UnsupportedOperationException("Geli text format doesn't support a no-call call."); } /** finish writing, closing any open files. */ - @Override public void close() { mWriter.close(); } @@ -121,15 +111,13 @@ public class GeliTextWriter implements GenotypeWriter { /** * add a multi-sample call if we support it * - * @param genotypes the list of genotypes, that are backed by sample information + * @param genotypes the list of genotypes */ - @Override public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { throw new UnsupportedOperationException("Geli text doesn't support multisample calls"); } /** @return true if we support multisample, false otherwise */ - @Override public boolean supportsMultiSample() { return false; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java new file mode 100755 index 000000000..e0dd8b733 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFGenotypeCall.java @@ -0,0 +1,208 @@ +package org.broadinstitute.sting.utils.genotype.glf; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; + +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; + + +/** + * @author aaron + *

+ * Class GLFGenotypeCall + *

+ * The implementation of the genotype interface, specific to GLF + */ +public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked { + private char mRefBase; + private GenomeLoc mLocation; + + private List mReads; + private double[] mLikelihoods; + + private double mNegLog10PError; + private String mGenotype; + + /** + * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object + * + */ + public GLFGenotypeCall() { + // fill in empty data + mRefBase = 'N'; + mGenotype = "NN"; + mLikelihoods = new double[10]; + Arrays.fill(mLikelihoods, Double.MIN_VALUE); + mReads = new ArrayList(); + mNegLog10PError = Double.MIN_VALUE; + } + + public void setReference(char refBase) { + mRefBase = Character.toUpperCase(refBase); + } + + public void setLocation(GenomeLoc loc) { + mLocation = loc; + } + + public void setReads(List reads) { + mReads = new ArrayList(reads); + } + + public void setLikelihoods(double[] likelihoods) { + mLikelihoods = likelihoods; + } + + public void setNegLog10PError(double negLog10PError) { + mNegLog10PError = negLog10PError; + } + + public void setGenotype(String genotype) { + mGenotype = genotype; + } + + @Override + public boolean equals(Object other) { + if (other == null || !(other instanceof GLFGenotypeCall)) + return false; + return (this.mRefBase == ((GLFGenotypeCall)other).mRefBase); + } + + public String toString() { + return String.format("%s ref=%s depth=%d negLog10PError=%.2f", + getLocation(), mRefBase, mReads.size(), getNegLog10PError()); + } + + /** + * get the confidence we have + * + * @return get the one minus error value + */ + public double getNegLog10PError() { + return mNegLog10PError; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + public String getBases() { + return Character.toString(mRefBase); + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + public int getPloidy() { + return 2; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + public boolean isHom() { + return true; + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + public boolean isHet() { + return !isHom(); + } + + /** + * + * @return return this genotype as a variant + */ + public Variation toVariation() { + throw new UnsupportedOperationException("GLF call doesn't support conversion to Variation"); + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + public GenomeLoc getLocation() { + return this.mLocation; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * + * @return true if we're a variant + */ + public boolean isVariant(char ref) { + return !Utils.dupString(mRefBase, 2).equals(mGenotype); + } + + /** + * are we a variant? (non-ref) + * + * @return true if we're a variant + */ + public boolean isVariant() { + return isVariant(mRefBase); + } + + /** + * get the SAM records that back this genotype call + * + * @return a list of SAM records + */ + public List getReads() { + return mReads; + } + + /** + * get the count of reads + * + * @return the number of reads we're backed by + */ + public int getReadCount() { + return mReads.size(); + } + + /** + * get the reference character + * + * @return the reference character + */ + public char getReference() { + return mRefBase; + } + + /** + * get the posteriors + * + * @return the posteriors + */ + public double[] getLikelihoods() { + return mLikelihoods; + } + +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 1534dc98a..6b625d4f8 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -11,7 +11,6 @@ import org.broadinstitute.sting.utils.genotype.*; import java.io.DataOutputStream; import java.io.File; import java.io.OutputStream; -import java.util.Arrays; import java.util.List; /* * Copyright (c) 2009 The Broad Institute @@ -118,33 +117,25 @@ public class GLFWriter implements GenotypeWriter { } /** - * Add a genotype, given a genotype locus + * Add a genotype, given a genotype call * - * @param locus the genotype called at a locus + * @param call the genotype call */ - @Override - public void addGenotypeCall(Genotype locus) { - char ref = locus.getReference(); + public void addGenotypeCall(Genotype call) { + if ( !(call instanceof GLFGenotypeCall) ) + throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers"); + GLFGenotypeCall gCall = (GLFGenotypeCall)call; + + char ref = gCall.getReference(); // get likelihood information if available - LikelihoodObject obj; - if (locus instanceof LikelihoodsBacked) { - obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); - } else { - double values[] = new double[10]; - Arrays.fill(values,-255.0); - obj = new LikelihoodObject(values, LikelihoodObject.LIKELIHOOD_TYPE.LOG); - } + LikelihoodObject obj = new LikelihoodObject(gCall.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods - double rms = 0; - int readCount = 0; // calculate the RMS mapping qualities and the read depth - if (locus instanceof ReadBacked) { - rms = calculateRMS(((ReadBacked)locus).getReads()); - readCount = ((ReadBacked)locus).getReadCount(); - } - this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,readCount,obj); + double rms = calculateRMS(gCall.getReads()); + int readCount = gCall.getReadCount(); + this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()),(int)gCall.getLocation().getStart(),(float)rms,ref,readCount,obj); } @@ -207,7 +198,6 @@ public class GLFWriter implements GenotypeWriter { * * @param position the position */ - @Override public void addNoCall(int position) { // glf doesn't support this operation throw new UnsupportedOperationException("GLF doesn't support a 'no call' call."); @@ -277,7 +267,6 @@ public class GLFWriter implements GenotypeWriter { * close the file. You must close the file to ensure any remaining data gets * written out. */ - @Override public void close() { writeEndRecord(); outputBinaryCodec.close(); @@ -286,7 +275,7 @@ public class GLFWriter implements GenotypeWriter { /** * get the reference sequence * - * @return + * @return the reference sequence */ public String getReferenceSequenceName() { return referenceSequenceName; @@ -296,15 +285,13 @@ public class GLFWriter implements GenotypeWriter { /** * add a multi-sample call if we support it * - * @param genotypes the list of genotypes, that are backed by sample information + * @param genotypes the list of genotypes */ - @Override public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { throw new UnsupportedOperationException("GLF writer doesn't support multisample calls"); } /** @return true if we support multisample, false otherwise */ - @Override public boolean supportsMultiSample() { return false; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java new file mode 100755 index 000000000..339dc41da --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeCall.java @@ -0,0 +1,255 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import net.sf.samtools.SAMRecord; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.*; + +import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; + + +/** + * @author ebanks + *

+ * Class VCFGenotypeCall + *

+ * The implementation of the genotype interface, specific to VCF + */ +public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, SampleBacked { + private char mRefBase; + private GenomeLoc mLocation; + + private List mReads; + private double[] mPosteriors; + + + // the reference genotype, the best genotype, and the next best genotype, lazy loaded + private DiploidGenotype mRefGenotype = null; + private DiploidGenotype mBestGenotype = null; + private DiploidGenotype mNextGenotype = null; + + // the sample name, used to propulate the SampleBacked interface + private String mSampleName; + + /** + * Generate a single sample genotype object + * + */ + public VCFGenotypeCall() { + // fill in empty data + mRefBase = 'N'; + mPosteriors = new double[10]; + Arrays.fill(mPosteriors, Double.MIN_VALUE); + mSampleName = ""; + mReads = new ArrayList(); + } + + public void setReference(char refBase) { + mRefBase = Character.toUpperCase(refBase); + } + + public void setLocation(GenomeLoc loc) { + mLocation = loc; + } + + public void setReads(List reads) { + mReads = new ArrayList(reads); + } + + public void setPosteriors(double[] posteriors) { + mPosteriors = posteriors; + } + + public void setSampleName(String name) { + mSampleName = name; + } + + + @Override + public boolean equals(Object other) { + lazyEval(); + + if (other == null) + return false; + if (other instanceof VCFGenotypeCall) { + VCFGenotypeCall otherCall = (VCFGenotypeCall) other; + + if (!this.mBestGenotype.equals(otherCall.mBestGenotype)) + return false; + return (this.mRefBase == otherCall.mRefBase); + } + return false; + } + + public String toString() { + lazyEval(); + return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f", + getLocation(), mBestGenotype, mRefGenotype, mRefBase, mReads.size(), getNegLog10PError()); + } + + private void lazyEval() { + if (mBestGenotype == null) { + Integer sorted[] = Utils.SortPermutation(mPosteriors); + mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]]; + mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]]; + mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference()); + } + } + + + /** + * get the confidence we have + * + * @return get the one minus error value + */ + public double getNegLog10PError() { + return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]); + } + + // get the best genotype + private DiploidGenotype getBestGenotype() { + lazyEval(); + return mBestGenotype; + } + + // get the alternate genotype + private DiploidGenotype getNextBest() { + lazyEval(); + return mNextGenotype; + } + + // get the ref genotype + private DiploidGenotype getRefGenotype() { + lazyEval(); + return mRefGenotype; + } + + /** + * get the bases that represent this + * + * @return the bases, as a string + */ + public String getBases() { + return getBestGenotype().toString(); + } + + /** + * get the ploidy + * + * @return the ploidy value + */ + public int getPloidy() { + return 2; + } + + /** + * Returns true if both observed alleles are the same (regardless of whether they are ref or alt) + * + * @return true if we're homozygous, false otherwise + */ + public boolean isHom() { + return getBestGenotype().isHom(); + } + + /** + * Returns true if observed alleles differ (regardless of whether they are ref or alt) + * + * @return true if we're het, false otherwise + */ + public boolean isHet() { + return !isHom(); + } + + /** + * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base + * is located right after the specified location + * + * @return position on the genome wrapped in GenomeLoc object + */ + public GenomeLoc getLocation() { + return this.mLocation; + } + + /** + * returns true if the genotype is a point genotype, false if it's a indel / deletion + * + * @return true is a SNP + */ + public boolean isPointGenotype() { + return true; + } + + /** + * given the reference, are we a variant? (non-ref) + * + * @param ref the reference base or bases + * + * @return true if we're a variant + */ + public boolean isVariant(char ref) { + return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString()); + } + + /** + * are we a variant? (non-ref) + * + * @return true if we're a variant + */ + public boolean isVariant() { + return isVariant(mRefBase); + } + + /** + * + * @return return this genotype as a variant + */ + public Variation toVariation() { + double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]); + return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef); + } + + /** + * get the SAM records that back this genotype call + * + * @return a list of SAM records + */ + public List getReads() { + return mReads; + } + + /** + * get the count of reads + * + * @return the number of reads we're backed by + */ + public int getReadCount() { + return mReads.size(); + } + + /** + * get the reference character + * + * @return the reference character + */ + public char getReference() { + return mRefBase; + } + + /** + * get the posteriors + * + * @return the posteriors + */ + public double[] getPosteriors() { + return mPosteriors; + } + + /** + * @return returns the sample name for this genotype + */ + public String getSampleName() { + return mSampleName; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 6cebf4489..18380a232 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -52,10 +52,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { /** * initialize this VCF writer * - * @param genotypes the genotypes * @param file the file location to write to + * @param stream the output stream */ - private void lazyInitialize(List genotypes, File file, OutputStream stream) { + private void lazyInitialize(File file, OutputStream stream) { Map hInfo = new HashMap(); // setup the header fields @@ -82,9 +82,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { private static List getSampleNames(List genotypes) { List strings = new ArrayList(); for (Genotype genotype : genotypes) { - if (!(genotype instanceof SampleBacked)) + if (!(genotype instanceof VCFGenotypeCall)) throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); - strings.add(((SampleBacked) genotype).getSampleName()); + strings.add(((VCFGenotypeCall) genotype).getSampleName()); } return strings; } @@ -94,9 +94,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @param call the locus to add */ - @Override public void addGenotypeCall(Genotype call) { - addMultiSampleCall(Arrays.asList(call), null); + throw new UnsupportedOperationException("VCF calls require metadata; use the addMultiSampleCall method instead"); } /** @@ -104,13 +103,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @param position the position to add the no call at */ - @Override public void addNoCall(int position) { throw new UnsupportedOperationException("We don't currently support no-calls in VCF"); } /** finish writing, closing any open files. */ - @Override public void close() { if (mInitialized) mWriter.close(); @@ -119,12 +116,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { /** * add a multi-sample call if we support it * - * @param genotypes the list of genotypes, that are backed by sample information + * @param genotypes the list of genotypes */ - @Override public void addMultiSampleCall(List genotypes, GenotypeMetaData metadata) { if (!mInitialized) - lazyInitialize(genotypes, mFile, mStream); + lazyInitialize(mFile, mStream); VCFParameters params = new VCFParameters(); @@ -132,22 +128,22 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { // mapping of our sample names to genotypes if (genotypes.size() < 1) { - throw new IllegalArgumentException("Unable to parse out the current location: genotype array must at least contain one entry"); + throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry"); } // get the location and reference params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference()); - Map genotypeMap = genotypeListToSampleNameMap(genotypes); + Map genotypeMap = genotypeListToSampleNameMap(genotypes); for (String name : mHeader.getGenotypeSamples()) { if (genotypeMap.containsKey(name)) { Genotype gtype = genotypeMap.get(name); - VCFGenotypeRecord record = createVCFGenotypeRecord(params, gtype); + VCFGenotypeRecord record = createVCFGenotypeRecord(params, (VCFGenotypeCall)gtype); params.addGenotypeRecord(record); genotypeMap.remove(name); } else { - VCFGenotypeRecord record = createNoCallRecord(params, name); + VCFGenotypeRecord record = createNoCallRecord(name); params.addGenotypeRecord(record); } } @@ -161,14 +157,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { Map infoFields = getInfoFields(metadata, params); double qual = (metadata == null) ? 0 : (metadata.getLOD()) * 10; - - /** - * TODO: Eric fix the next line when our LOD scores are 0->Inf based instead - * of -3 to Inf based. - */ - if (qual < 0.0) { - qual = 0.0; - } + // maintain 0-99 based Q-scores + qual = Math.min(qual, 99); + qual = Math.max(qual, 0); VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), params.getContig(), @@ -176,7 +167,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { ".", params.getAlternateBases(), qual, - ".", + "0", infoFields, params.getFormatString(), params.getGenotypesRecords()); @@ -210,18 +201,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @return a VCFGenotypeRecord */ - private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, Genotype gtype) { + private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, VCFGenotypeCall gtype) { Map map = new HashMap(); - if (!(gtype instanceof SampleBacked)) { - throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); - } // calculate the RMS mapping qualities and the read depth - if (gtype instanceof ReadBacked) { - int readDepth = ((ReadBacked) gtype).getReadCount(); - map.put("RD", String.valueOf(readDepth)); - params.addFormatItem("RD"); - } + int readDepth = gtype.getReadCount(); + map.put("RD", String.valueOf(readDepth)); + params.addFormatItem("RD"); double qual = gtype.getNegLog10PError(); map.put("GQ", String.format("%.2f", qual)); params.addFormatItem("GQ"); @@ -231,7 +217,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { params.addAlternateBase(allele); } - VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(), + VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(), alleles, VCFGenotypeRecord.PHASE.UNPHASED, map); @@ -241,12 +227,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { /** * create a no call record * - * @param params the VCF parameters object * @param sampleName the sample name * * @return a VCFGenotypeRecord for the no call situation */ - private VCFGenotypeRecord createNoCallRecord(VCFParameters params, String sampleName) { + private VCFGenotypeRecord createNoCallRecord(String sampleName) { Map map = new HashMap(); @@ -277,24 +262,24 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { } /** @return true if we support multisample, false otherwise */ - @Override public boolean supportsMultiSample() { return true; } /** * create a genotype mapping from a list and their sample names + * while we're at it, checks that all genotypes are VCF-based * * @param list a list of genotype samples * * @return a mapping of the sample name to genotype fields */ - private static Map genotypeListToSampleNameMap(List list) { - Map map = new HashMap(); + private static Map genotypeListToSampleNameMap(List list) { + Map map = new HashMap(); for (Genotype rec : list) { - if (!(rec instanceof SampleBacked)) - throw new IllegalArgumentException("Genotype must be backed by sample information"); - map.put(((SampleBacked) rec).getSampleName(), rec); + if ( !(rec instanceof VCFGenotypeCall) ) + throw new IllegalArgumentException("Only VCFGenotypeCalls should be passed in to the VCF writers"); + map.put(((VCFGenotypeCall) rec).getSampleName(), (VCFGenotypeCall) rec); } return map; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java deleted file mode 100644 index 27de8d5ff..000000000 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java +++ /dev/null @@ -1,121 +0,0 @@ -package org.broadinstitute.sting.gatk.walkers.genotyper; - -import net.sf.samtools.SAMFileHeader; -import net.sf.samtools.SAMRecord; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Pair; -import org.broadinstitute.sting.utils.ReadBackedPileup; -import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; -import org.junit.Assert; -import org.junit.Test; - -import java.util.ArrayList; -import java.util.List; - - -/** - * @author aaron - *

- * Class SSGenotypeCallTest - *

- * test the SS Genotype call class - */ -public class GenotypeCallTest extends BaseTest { - - // we need a fake GenotypeLikelihoods class - public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods { - public boolean cacheIsEnabled() { return false; } - - protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy, - SAMRecord read, int offset, GenotypeLikelihoods val ) { - return null; - } - - /** - * Must be overridden by concrete subclasses - * - * @param observedBase - * @param chromBase - * @param read - * @param offset - * - * @return - */ - @Override - protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) { - return 0; //To change body of implemented methods use File | Settings | File Templates. - } - - public GenotypeLikelihoodsImpl() { - this.posteriors = new double[10]; - for (int x = 0; x < 10; x++) { - posteriors[x] = -(x); - } - this.likelihoods = new double[10]; - for (int x = 0; x < 10; x++) { - likelihoods[x] = -(x); - } - } - } - - - // make a fake read pile-up - public Pair makePileup() { - List reads = new ArrayList(); - List offset = new ArrayList(); - SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000); - for (int x = 0; x < 10; x++) { - reads.add(ArtificialSAMUtils.createArtificialRead(header, "test", 0, 1, 100)); - offset.add(10 - x); - } - GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary()); - GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 10); - return new Pair(new ReadBackedPileup(loc, 'A', reads, offset), loc); - } - - - @Test - public void testBestVrsRefSame() { - Pair myPair = makePileup(); - GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); - Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001); - } - - @Test - public void testBestVrsRef2() { - Pair myPair = makePileup(); - GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first); - Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001); - } - - @Test - public void testBestVrsRef3() { - Pair myPair = makePileup(); - GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); - Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001); - } - - - @Test - public void testBestVrsNextSame() { - Pair myPair = makePileup(); - GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); - Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001); - } - - @Test - public void testBestVrsNext2() { - Pair myPair = makePileup(); - GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); - Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001); - } - - @Test - public void testBestVrsNext3() { - Pair myPair = makePileup(); - GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); - Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001); - } -} diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 527253876..2c0af0a6c 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm EM_POINT_ESTIMATE -lod 5", 1, - Arrays.asList("beb07ff5cbf60febfb451e4248f2b013")); + Arrays.asList("d41d8cd98f00b204e9800998ecf8427e")); executeTest("testMultiSamplePilot1", spec); } @@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -lod 5", 1, - Arrays.asList("8cde0c2aa4d9ad51c603db4b64f3933f")); + Arrays.asList("394f009c9bad34eb584fa10d133d79e0")); executeTest("testMultiSamplePilot2", spec); } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java index cdee2b4d7..aa042e3d2 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/glf/GLFWriterTest.java @@ -132,7 +132,7 @@ public class GLFWriterTest extends BaseTest { } -class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparable { +class FakeGenotype extends GLFGenotypeCall implements Comparable { private double[] likelihoods; @@ -145,8 +145,12 @@ class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparabl * @param negLog10PError the confidence score */ public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) { - super(location, genotype, ref, negLog10PError); - this.likelihoods = likelihoods; + setLocation(location); + setReference(ref); + setLikelihoods(likelihoods); + setGenotype(genotype); + setNegLog10PError(negLog10PError); + } /** @@ -159,6 +163,10 @@ class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparabl return likelihoods; } + public void setLikelihoods(double[] likelihoods) { + this.likelihoods = likelihoods; + } + @Override public int compareTo(FakeGenotype that) {