diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java index 3bb73c55b..4689d31fe 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java @@ -1,15 +1,16 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import org.broadinstitute.sting.utils.genotype.DiploidGenotype; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.ReadBackedPileup; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.DiploidGenotype; -import java.util.*; - -import net.sf.samtools.SAMRecord; -import net.sf.samtools.SAMReadGroupRecord; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; public class EMGenotypeCalculationModel extends GenotypeCalculationModel { @@ -79,14 +80,15 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel { // for now, we need to special-case single sample mode if ( samples.size() == 1 ) { - UnifiedGenotypeLikelihoods UGL = GLs.get(samples.iterator().next()); + String sample = samples.iterator().next(); + UnifiedGenotypeLikelihoods UGL = GLs.get(sample); // if there were no good bases, the likelihoods object wouldn't exist if ( UGL == null ) return false; callsMetrics.nCalledBases++; UGL.setPriors(priors); - SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context)); + GenotypeCall call = new GenotypeCall(sample,context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context)); if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) { double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError()); @@ -183,7 +185,7 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel { */ - //return new SSGenotypeCall(context.getLocation(), ref,gl, pileup); + //return new GenotypeCall(context.getLocation(), ref,gl, pileup); return true; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java similarity index 78% rename from java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java rename to java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java index 74d51053b..88274e596 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java @@ -15,10 +15,10 @@ import java.util.List; *

* Class SSGenotypeCall *

- * The single sample implementation of the genotype interface, which contains + * The mplementation of the genotype interface, which contains * extra information for the various genotype outputs */ -public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked { +public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked { private final char mRefBase; private final GenotypeLikelihoods mGenotypeLikelihoods; @@ -33,18 +33,20 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li private DiploidGenotype mRefGenotype = null; private DiploidGenotype mNextGenotype = null; - // are we best vrs ref or best vrs next - for internal consumption only - //private final boolean mBestVrsRef; + // the sample name, used to propulate the SampleBacked interface + private String mSampleName; /** * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object * - * @param location the location we're working with - * @param refBase the ref base - * @param gtlh the genotype likelihoods object - * @param pileup the pile-up of reads at the specified locus + * @param sampleName the sample name + * @param location the location we're working with + * @param refBase the ref base + * @param gtlh the genotype likelihoods object + * @param pileup the pile-up of reads at the specified locus */ - public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { + public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) { + mSampleName = sampleName; mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case mGenotypeLikelihoods = gtlh; mLocation = location; @@ -54,12 +56,14 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li /** * Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object * - * @param location the location we're working with - * @param refBase the ref base - * @param gtlh the genotype likelihoods object - * @param pileup the pile-up of reads at the specified locus + * @param sampleName the sample name + * @param location the location we're working with + * @param refBase the ref base + * @param gtlh the genotype likelihoods object + * @param pileup the pile-up of reads at the specified locus */ - SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { + GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) { + mSampleName = sampleName; mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case mGenotypeLikelihoods = gtlh; mLocation = location; @@ -73,8 +77,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li if (other == null) return false; - if (other instanceof SSGenotypeCall) { - SSGenotypeCall otherCall = (SSGenotypeCall) other; + if (other instanceof GenotypeCall) { + GenotypeCall otherCall = (GenotypeCall) other; if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) { return false; @@ -90,8 +94,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li public String toString() { lazyEval(); return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s", - getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(), - getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods())); + getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(), + getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods())); } private void lazyEval() { @@ -122,25 +126,19 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest())); } - /** - * get the best genotype - */ + /** get the best genotype */ private DiploidGenotype getBestGenotype() { lazyEval(); return mGenotype; } - /** - * get the alternate genotype - */ + /** get the alternate genotype */ private DiploidGenotype getNextBest() { lazyEval(); return mNextGenotype; } - /** - * get the alternate genotype - */ + /** get the alternate genotype */ private DiploidGenotype getRefGenotype() { lazyEval(); return mRefGenotype; @@ -211,6 +209,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li * given the reference, are we a variant? (non-ref) * * @param ref the reference base or bases + * * @return true if we're a variant */ @Override @@ -270,7 +269,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li * @return an array in lexigraphical order of the likelihoods */ public Genotype getGenotype(DiploidGenotype x) { - return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x); + return new GenotypeCall(mSampleName, mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x); } /** @@ -292,6 +291,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li public double[] getPosteriors() { return this.mGenotypeLikelihoods.getPosteriors(); } + + /** @return returns the sample name for this genotype */ + @Override + public String getSampleName() { + return this.mSampleName; + } + + /** + * get the filtering string for this genotype + * + * @return a string, representing the genotyping value + */ + @Override + public String getFilteringValue() { + return "0"; + } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java index dbd58589a..731026c13 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java @@ -10,13 +10,15 @@ import org.broadinstitute.sting.gatk.walkers.ReadFilters; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.ReadBackedPileup; import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import java.io.File; +import java.util.Arrays; @ReadFilters(ZeroMappingQualityReadFilter.class) -public class SingleSampleGenotyper extends LocusWalker { +public class SingleSampleGenotyper extends LocusWalker { // Control output settings @Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false) public File VARIANTS_FILE = null; @@ -47,6 +49,8 @@ public class SingleSampleGenotyper extends LocusWalker= LOD_THRESHOLD) { sum.nConfidentCalls++; //System.out.printf("Call %s%n", call); - sum.writer.addGenotypeCall(call); + if (sum.writer.supportsMulitSample()) { + sum.writer.addMultiSampleCall(Arrays.asList((Genotype)call)); + } else { + sum.writer.addGenotypeCall(call); + } } else sum.nNonConfidentCalls++; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 49b50928e..3a1606ff0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -25,19 +25,25 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; +import net.sf.samtools.SAMReadGroupRecord; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; -import org.broadinstitute.sting.gatk.contexts.*; -import org.broadinstitute.sting.gatk.filters.*; -import org.broadinstitute.sting.gatk.refdata.*; -import org.broadinstitute.sting.gatk.walkers.*; -import org.broadinstitute.sting.utils.*; -import org.broadinstitute.sting.utils.cmdLine.*; -import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.MissingReadGroupFilter; +import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.rodDbSNP; +import org.broadinstitute.sting.gatk.walkers.LocusWalker; +import org.broadinstitute.sting.gatk.walkers.ReadFilters; +import org.broadinstitute.sting.utils.BaseUtils; +import org.broadinstitute.sting.utils.cmdLine.Argument; +import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import java.io.File; -import java.util.*; - -import net.sf.samtools.SAMReadGroupRecord; +import java.util.HashSet; +import java.util.List; @ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class}) @@ -91,7 +97,9 @@ public class UnifiedGenotyper extends LocusWalker { // create the output writer stream if ( VARIANTS_FILE != null ) - writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE); + writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, + "UnifiedGenotyper", + this.getToolkit().getArguments().referenceFile.getName()); else writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java index 5d5c3ea04..b79bb4716 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java @@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF; import org.broadinstitute.sting.gatk.walkers.LocusWalker; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.BaseUtils; @@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker, String> { List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets); AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets); - SSGenotypeCall call = SSG.map(tracker, ref, subContext); + GenotypeCall call = SSG.map(tracker, ref, subContext); String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference"; if (call != null) { diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java index 961014981..c1c6d163b 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java @@ -1,22 +1,19 @@ package org.broadinstitute.sting.playground.gatk.walkers; +import net.sf.samtools.SAMRecord; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.VariationRod; import org.broadinstitute.sting.gatk.walkers.*; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall; import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; -import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter; -import org.broadinstitute.sting.utils.StingException; -import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.Variation; -import java.util.Set; -import java.util.List; import java.util.ArrayList; - -import net.sf.samtools.SAMRecord; +import java.util.List; +import java.util.Set; /** * Created by IntelliJ IDEA. @@ -73,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker{ } AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets); - SSGenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext); + GenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext); AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets); - SSGenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext); + GenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext); if (!parent1.isVariant(parent1.getReference()) && parent1.getNegLog10PError() > 5 && diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java deleted file mode 100644 index 9e087c77c..000000000 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java +++ /dev/null @@ -1,53 +0,0 @@ -package org.broadinstitute.sting.utils.genotype; - -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.genotype.Genotype; - -import java.util.List; - -/** - * @author aaron - *

- * Interface GenotypeCall - *

- * Genotype call interface, for indicating that a genotype is - * also a genotype call. - */ -public interface GenotypeCall extends Genotype { - /** - * gets the reference base - * - * @return the reference base we represent - */ - public char getReferencebase(); - - /** - * check to see if this called genotype is a variant, i.e. not homozygous reference - * @return true if it's not hom ref, false otherwise - */ - public boolean isVariation(); - - /** - * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base - * is located right after the specified location - * - * @return position on the genome wrapped in GenomeLoc object - */ - public GenomeLoc getLocation(); - - /** - * get the genotypes, sorted in asscending order by their ConfidenceScores (the best - * to the worst ConfidenceScores) - * - * @return a list of the genotypes, sorted by ConfidenceScores - */ - public List getGenotypes(); - - /** - * get the genotypes sorted lexigraphically - * - * @return a list of the genotypes sorted lexi - */ - public List getLexigraphicallySortedGenotypes(); - -} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 90d3210b8..8ae61f08a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -1,5 +1,7 @@ package org.broadinstitute.sting.utils.genotype; +import java.util.List; + /* * Copyright (c) 2009 The Broad Institute * @@ -50,4 +52,15 @@ public interface GenotypeWriter { /** finish writing, closing any open files. */ public void close(); + /** + * add a multi-sample call if we support it + * @param genotypes the list of genotypes, that are backed by sample information + */ + public void addMultiSampleCall(List genotypes); + + /** + * @return true if we support multisample, false otherwise + */ + public boolean supportsMulitSample(); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index e2ad1968b..94c36da8f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter; import java.io.File; import java.io.PrintStream; @@ -20,7 +21,7 @@ import java.io.PrintStream; public class GenotypeWriterFactory { /** available genotype writers */ public enum GENOTYPE_FORMAT { - GELI, GLF, GFF, TABULAR, GELI_BINARY; + GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF; } /** @@ -30,7 +31,7 @@ public class GenotypeWriterFactory { * @param destination the destination file * @return the genotype writer object */ - public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination) { + public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, String source, String referenceName ) { switch (format) { case GLF: return new GLFWriter(header.toString(), destination); @@ -38,6 +39,8 @@ public class GenotypeWriterFactory { return new GeliTextWriter(destination); case GELI_BINARY: return new GeliAdapter(destination, header); + case VCF: + return new VCFGenotypeWriterAdapter(source, referenceName, destination); default: throw new StingException("Genotype writer " + format.toString() + " is not implemented"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java new file mode 100644 index 000000000..583e20dfb --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java @@ -0,0 +1,23 @@ +package org.broadinstitute.sting.utils.genotype; + +/** + * @author aaron + *

+ * Interface SampleBacked + *

+ * A descriptions should go here. Blame aaron if it's missing. + */ +public interface SampleBacked { + + /** + * + * @return returns the sample name for this genotype + */ + public String getSampleName(); + + /** + * get the filtering string for this genotype + * @return a string, representing the genotyping value + */ + public String getFilteringValue(); +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index c56ecfa87..c3fc7c1a3 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -5,7 +5,6 @@ import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMSequenceRecord; -import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.genotype.*; @@ -111,7 +110,7 @@ public class GeliAdapter implements GenotypeWriter { int readDepth = -1; double nextVrsBest = 0; double nextVrsRef = 0; - if (!(locus instanceof GenotypesBacked)) { + if (!(locus instanceof PosteriorsBacked)) { posteriors = new double[10]; Arrays.fill(posteriors, Double.MIN_VALUE); } else { @@ -128,8 +127,8 @@ public class GeliAdapter implements GenotypeWriter { if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); } } - SSGenotypeCall call = (SSGenotypeCall)locus; - LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG); + + LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG); this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()), (int)locus.getLocation().getStart(), ref, @@ -155,4 +154,20 @@ public class GeliAdapter implements GenotypeWriter { this.writer.close(); } } + + /** + * add a multi-sample call if we support it + * + * @param genotypes the list of genotypes, that are backed by sample information + */ + @Override + public void addMultiSampleCall( List genotypes) { + throw new UnsupportedOperationException("Geli binary doesn't support multisample calls"); + } + + /** @return true if we support multisample, false otherwise */ + @Override + public boolean supportsMulitSample() { + return false; + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index 58727e134..912d60a1d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -1,4 +1,3 @@ - package org.broadinstitute.sting.utils.genotype.geli; import net.sf.samtools.SAMRecord; @@ -66,17 +65,17 @@ public class GeliTextWriter implements GenotypeWriter { } else { posteriors = ((PosteriorsBacked) locus).getPosteriors(); double[] lks; - lks = Arrays.copyOf(posteriors,posteriors.length); + lks = Arrays.copyOf(posteriors, posteriors.length); Arrays.sort(lks); nextVrsBest = lks[9] - lks[8]; - if (ref != 'X') { - int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal()); + if (ref != 'X') { + int index = (DiploidGenotype.valueOf(Utils.dupString(ref, 2)).ordinal()); nextVrsRef = lks[9] - posteriors[index]; } } double maxMappingQual = 0; if (locus instanceof ReadBacked) { - List recs = ((ReadBacked)locus).getReads(); + List recs = ((ReadBacked) locus).getReads(); readDepth = recs.size(); for (SAMRecord rec : recs) { if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality(); @@ -118,4 +117,20 @@ public class GeliTextWriter implements GenotypeWriter { public void close() { mWriter.close(); } + + /** + * add a multi-sample call if we support it + * + * @param genotypes the list of genotypes, that are backed by sample information + */ + @Override + public void addMultiSampleCall(List genotypes) { + throw new UnsupportedOperationException("Geli text doesn't support multisample calls"); + } + + /** @return true if we support multisample, false otherwise */ + @Override + public boolean supportsMulitSample() { + return false; + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index 9f6e096c8..96583f2d3 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -278,6 +278,21 @@ public class GLFWriter implements GenotypeWriter { } + /** + * add a multi-sample call if we support it + * + * @param genotypes the list of genotypes, that are backed by sample information + */ + @Override + public void addMultiSampleCall(List genotypes) { + throw new UnsupportedOperationException("GLF writer doesn't support multisample calls"); + } + + /** @return true if we support multisample, false otherwise */ + @Override + public boolean supportsMulitSample() { + return false; + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java index 879e3f08b..ccf94193a 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java @@ -7,6 +7,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.PrintStream; import java.util.Arrays; +import java.util.List; /** @@ -95,4 +96,21 @@ public class TabularLFWriter implements GenotypeWriter { outStream.close(); } } + + + /** + * add a multi-sample call if we support it + * + * @param genotypes the list of genotypes, that are backed by sample information + */ + @Override + public void addMultiSampleCall(List genotypes) { + throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls"); + } + + /** @return true if we support multisample, false otherwise */ + @Override + public boolean supportsMulitSample() { + return false; + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java new file mode 100644 index 000000000..a76ea29ad --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -0,0 +1,247 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.GenotypeWriter; +import org.broadinstitute.sting.utils.genotype.ReadBacked; +import org.broadinstitute.sting.utils.genotype.SampleBacked; + +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + + +/** + * @author aaron + *

+ * Class VCFGenotypeWriterAdapter + *

+ * Adapt the VCF writter to the genotype output system + */ +public class VCFGenotypeWriterAdapter implements GenotypeWriter { + // our VCF objects + private VCFWriter mWriter = null; + private VCFHeader mHeader = null; + private String mSource; + private String mReferenceName; + private final Map mSampleNames = new HashMap(); + private boolean mInitialized = false; + private final File mFile; + + public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo) { + mReferenceName = referenceName; + mSource = source; + mFile = writeTo; + } + + + /** + * initialize this VCF writer + * + * @param genotypes the genotypes + * @param file the file location to write to + */ + private void lazyInitialize(List genotypes, File file) { + Map hInfo = new HashMap(); + List sampleNames = getSampleNames(genotypes); + + // setup the header fields + hInfo.put("format", "VCRv3.2"); + hInfo.put("source", mSource); + + // setup the sample names + mHeader = new VCFHeader(hInfo, sampleNames); + mWriter = new VCFWriter(mHeader, file); + mInitialized = true; + } + + /** + * get the samples names from genotype objects + * + * @param genotypes the genotype list + * + * @return a list of strings representing the sample names + */ + private static List getSampleNames(List genotypes) { + List strings = new ArrayList(); + for (Genotype genotype : genotypes) { + if (!(genotype instanceof SampleBacked)) + throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); + strings.add(((SampleBacked) genotype).getSampleName()); + } + return strings; + } + + /** + * Add a genotype, given a genotype locus + * + * @param call the locus to add + */ + @Override + public void addGenotypeCall(Genotype call) { + throw new UnsupportedOperationException("VCF is a multi sample fomat"); + } + + /** + * add a no call to the genotype file, if supported. + * + * @param position the position to add the no call at + */ + @Override + public void addNoCall(int position) { + throw new UnsupportedOperationException("VCF is a multi sample fomat"); + } + + /** finish writing, closing any open files. */ + @Override + public void close() { + mWriter.close(); + } + + /** + * add a multi-sample call if we support it + * + * @param genotypes the list of genotypes, that are backed by sample information + */ + @Override + public void addMultiSampleCall(List genotypes) { + if (!mInitialized) + lazyInitialize(genotypes, mFile); + + + VCFParamters params = new VCFParamters(); + params.addFormatItem("GT"); + + for (Genotype gtype : genotypes) { + // setup the parameters + params.setLocations(gtype.getLocation(), gtype.getReference()); + + Map map = new HashMap(); + if (!(gtype instanceof SampleBacked)) { + throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface"); + } + + // calculate the RMS mapping qualities and the read depth + if (gtype instanceof ReadBacked) { + int readDepth = ((ReadBacked) gtype).getReadCount(); + map.put("RD", String.valueOf(readDepth)); + params.addFormatItem("RD"); + } + double qual = gtype.getNegLog10PError(); + map.put("GQ", String.format("%.2f", qual)); + params.addFormatItem("GQ"); + + List alleles = new ArrayList(); + for (char allele : gtype.getBases().toCharArray()) { + alleles.add(String.valueOf(allele)); + params.addAlternateBase(allele); + } + + VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(), + alleles, + VCFGenotypeRecord.PHASE.UNPHASED, + map); + params.addGenotypeRecord(record); + } + + Map infoFields = new HashMap(); + + VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), + params.getContig(), + params.getPosition(), + ".", + params.getAlternateBases(), + 0, /* BETTER VALUE HERE */ + ".", + infoFields, + params.getFormatString(), + params.getGenotypesRecords()); + + mWriter.addRecord(vcfRecord); + } + + /** @return true if we support multisample, false otherwise */ + @Override + public boolean supportsMulitSample() { + return true; + } + + + /** + * a helper class, which performs a lot of the safety checks on the parameters + * we feed to the VCF (like ensuring the same position for each genotype in a call). + */ + class VCFParamters { + private char referenceBase = '0'; + private int position = 0; + private String contig = null; + private boolean initialized = false; + private List genotypesRecord = new ArrayList(); + private List formatList = new ArrayList(); + private List alternateBases = new ArrayList(); + + public void setLocations(GenomeLoc location, char refBase) { + // if we haven't set it up, we initialize the object + if (!initialized) { + initialized = true; + this.contig = location.getContig(); + this.position = (int)location.getStart(); + if (location.getStart() != location.getStop()) { + throw new IllegalArgumentException("The start and stop locations must be the same"); + } + this.referenceBase = refBase; + } else { + if (contig.equals(this.contig)) + throw new IllegalArgumentException("The contig name has to be the same at a single locus"); + if (position != this.position) + throw new IllegalArgumentException("The position has to be the same at a single locus"); + if (refBase != this.referenceBase) + throw new IllegalArgumentException("The reference base name has to be the same at a single locus"); + } + } + + /** @return get the position */ + public int getPosition() { + return position; + } + + /** @return get the contig name */ + public String getContig() { + return contig; + } + + /** @return get the reference base */ + public char getReferenceBase() { + return referenceBase; + } + + public void addGenotypeRecord(VCFGenotypeRecord record) { + this.genotypesRecord.add(record); + } + + public void addFormatItem(String item) { + if (!formatList.contains(item)) + formatList.add(item); + } + + public void addAlternateBase(char base) { + if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase()) + alternateBases.add(String.valueOf(base)); + } + + public List getAlternateBases() { + return alternateBases; + } + + public String getFormatString() { + return Utils.join(";", formatList); + } + + public List getGenotypesRecords() { + return genotypesRecord; + } + } +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 2682e5a65..f2b1d65f7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -216,8 +216,8 @@ public class VCFRecord { */ public Map getInfoValues() { if (this.mInfoFields.size() < 1) { - Map map = new HashMap(); - map.put(".",""); + Map map = new HashMap(); + map.put(".", ""); return map; } return this.mInfoFields; @@ -305,6 +305,10 @@ public class VCFRecord { this.mInfoFields.put(key, value); } + /** + * the generation of a string representation, which is used by the VCF writer + * @return a string + */ public String toString() { StringBuilder builder = new StringBuilder(); @@ -340,7 +344,7 @@ public class VCFRecord { if (rec.getFields().get(s).equals("")) continue; builder.append(":"); builder.append(rec.getFields().get(s)); - } + } } } return builder.toString(); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index 3da5c2757..773bf05f3 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -70,14 +70,16 @@ public class VCFWriter { } + + /** attempt to close the VCF file */ public void close() { try { + mWriter.flush(); mWriter.close(); } catch (IOException e) { throw new RuntimeException("Unable to close VCFFile"); } } - } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCallTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java similarity index 82% rename from java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCallTest.java rename to java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java index 03c623283..27de8d5ff 100644 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCallTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java @@ -22,7 +22,7 @@ import java.util.List; *

* test the SS Genotype call class */ -public class SSGenotypeCallTest extends BaseTest { +public class GenotypeCallTest extends BaseTest { // we need a fake GenotypeLikelihoods class public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods { @@ -79,21 +79,21 @@ public class SSGenotypeCallTest extends BaseTest { @Test public void testBestVrsRefSame() { Pair myPair = makePileup(); - SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); + GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001); } @Test public void testBestVrsRef2() { Pair myPair = makePileup(); - SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first); + GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first); Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001); } @Test public void testBestVrsRef3() { Pair myPair = makePileup(); - SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); + GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001); } @@ -101,21 +101,21 @@ public class SSGenotypeCallTest extends BaseTest { @Test public void testBestVrsNextSame() { Pair myPair = makePileup(); - SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); + GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001); } @Test public void testBestVrsNext2() { Pair myPair = makePileup(); - SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); + GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first); Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001); } @Test public void testBestVrsNext3() { Pair myPair = makePileup(); - SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); + GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first); Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001); } }