diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
index 3bb73c55b..4689d31fe 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/EMGenotypeCalculationModel.java
@@ -1,15 +1,16 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
-import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
-import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
+import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.ReadBackedPileup;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
-import java.util.*;
-
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMReadGroupRecord;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
@@ -79,14 +80,15 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
// for now, we need to special-case single sample mode
if ( samples.size() == 1 ) {
- UnifiedGenotypeLikelihoods UGL = GLs.get(samples.iterator().next());
+ String sample = samples.iterator().next();
+ UnifiedGenotypeLikelihoods UGL = GLs.get(sample);
// if there were no good bases, the likelihoods object wouldn't exist
if ( UGL == null )
return false;
callsMetrics.nCalledBases++;
UGL.setPriors(priors);
- SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
+ GenotypeCall call = new GenotypeCall(sample,context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
@@ -183,7 +185,7 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
*/
- //return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
+ //return new GenotypeCall(context.getLocation(), ref,gl, pileup);
return true;
}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java
similarity index 78%
rename from java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java
rename to java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java
index 74d51053b..88274e596 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCall.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCall.java
@@ -15,10 +15,10 @@ import java.util.List;
*
* Class SSGenotypeCall
*
- * The single sample implementation of the genotype interface, which contains
+ * The mplementation of the genotype interface, which contains
* extra information for the various genotype outputs
*/
-public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked {
+public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
private final char mRefBase;
private final GenotypeLikelihoods mGenotypeLikelihoods;
@@ -33,18 +33,20 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
private DiploidGenotype mRefGenotype = null;
private DiploidGenotype mNextGenotype = null;
- // are we best vrs ref or best vrs next - for internal consumption only
- //private final boolean mBestVrsRef;
+ // the sample name, used to propulate the SampleBacked interface
+ private String mSampleName;
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
- * @param location the location we're working with
- * @param refBase the ref base
- * @param gtlh the genotype likelihoods object
- * @param pileup the pile-up of reads at the specified locus
+ * @param sampleName the sample name
+ * @param location the location we're working with
+ * @param refBase the ref base
+ * @param gtlh the genotype likelihoods object
+ * @param pileup the pile-up of reads at the specified locus
*/
- public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
+ public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
+ mSampleName = sampleName;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
@@ -54,12 +56,14 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
- * @param location the location we're working with
- * @param refBase the ref base
- * @param gtlh the genotype likelihoods object
- * @param pileup the pile-up of reads at the specified locus
+ * @param sampleName the sample name
+ * @param location the location we're working with
+ * @param refBase the ref base
+ * @param gtlh the genotype likelihoods object
+ * @param pileup the pile-up of reads at the specified locus
*/
- SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
+ GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
+ mSampleName = sampleName;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
@@ -73,8 +77,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
if (other == null)
return false;
- if (other instanceof SSGenotypeCall) {
- SSGenotypeCall otherCall = (SSGenotypeCall) other;
+ if (other instanceof GenotypeCall) {
+ GenotypeCall otherCall = (GenotypeCall) other;
if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) {
return false;
@@ -90,8 +94,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
public String toString() {
lazyEval();
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
- getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
- getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
+ getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
+ getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
}
private void lazyEval() {
@@ -122,25 +126,19 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
}
- /**
- * get the best genotype
- */
+ /** get the best genotype */
private DiploidGenotype getBestGenotype() {
lazyEval();
return mGenotype;
}
- /**
- * get the alternate genotype
- */
+ /** get the alternate genotype */
private DiploidGenotype getNextBest() {
lazyEval();
return mNextGenotype;
}
- /**
- * get the alternate genotype
- */
+ /** get the alternate genotype */
private DiploidGenotype getRefGenotype() {
lazyEval();
return mRefGenotype;
@@ -211,6 +209,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
+ *
* @return true if we're a variant
*/
@Override
@@ -270,7 +269,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x) {
- return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
+ return new GenotypeCall(mSampleName, mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
}
/**
@@ -292,6 +291,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
public double[] getPosteriors() {
return this.mGenotypeLikelihoods.getPosteriors();
}
+
+ /** @return returns the sample name for this genotype */
+ @Override
+ public String getSampleName() {
+ return this.mSampleName;
+ }
+
+ /**
+ * get the filtering string for this genotype
+ *
+ * @return a string, representing the genotyping value
+ */
+ @Override
+ public String getFilteringValue() {
+ return "0";
+ }
}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java
index dbd58589a..731026c13 100644
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SingleSampleGenotyper.java
@@ -10,13 +10,15 @@ import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import java.io.File;
+import java.util.Arrays;
@ReadFilters(ZeroMappingQualityReadFilter.class)
-public class SingleSampleGenotyper extends LocusWalker {
+public class SingleSampleGenotyper extends LocusWalker {
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
public File VARIANTS_FILE = null;
@@ -47,6 +49,8 @@ public class SingleSampleGenotyper extends LocusWalker= LOD_THRESHOLD) {
sum.nConfidentCalls++;
//System.out.printf("Call %s%n", call);
- sum.writer.addGenotypeCall(call);
+ if (sum.writer.supportsMulitSample()) {
+ sum.writer.addMultiSampleCall(Arrays.asList((Genotype)call));
+ } else {
+ sum.writer.addGenotypeCall(call);
+ }
} else
sum.nNonConfidentCalls++;
}
diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 49b50928e..3a1606ff0 100755
--- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -25,19 +25,25 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
+import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.contexts.*;
-import org.broadinstitute.sting.gatk.filters.*;
-import org.broadinstitute.sting.gatk.refdata.*;
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.cmdLine.*;
-import org.broadinstitute.sting.utils.genotype.*;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.filters.MissingReadGroupFilter;
+import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.walkers.ReadFilters;
+import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.cmdLine.Argument;
+import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
+import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import java.io.File;
-import java.util.*;
-
-import net.sf.samtools.SAMReadGroupRecord;
+import java.util.HashSet;
+import java.util.List;
@ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class})
@@ -91,7 +97,9 @@ public class UnifiedGenotyper extends LocusWalker {
// create the output writer stream
if ( VARIANTS_FILE != null )
- writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE);
+ writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
+ "UnifiedGenotyper",
+ this.getToolkit().getArguments().referenceFile.getName());
else
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out);
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java
index 5d5c3ea04..b79bb4716 100755
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/CoverageEvalWalker.java
@@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
-import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.BaseUtils;
@@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker, String> {
List sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
- SSGenotypeCall call = SSG.map(tracker, ref, subContext);
+ GenotypeCall call = SSG.map(tracker, ref, subContext);
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
if (call != null) {
diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java
index 961014981..c1c6d163b 100755
--- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java
+++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/DeNovoSNPWalker.java
@@ -1,22 +1,19 @@
package org.broadinstitute.sting.playground.gatk.walkers;
+import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariationRod;
import org.broadinstitute.sting.gatk.walkers.*;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
-import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
-import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
-import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
-import java.util.Set;
-import java.util.List;
import java.util.ArrayList;
-
-import net.sf.samtools.SAMRecord;
+import java.util.List;
+import java.util.Set;
/**
* Created by IntelliJ IDEA.
@@ -73,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker{
}
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
- SSGenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
+ GenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
- SSGenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
+ GenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
if (!parent1.isVariant(parent1.getReference()) &&
parent1.getNegLog10PError() > 5 &&
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java
deleted file mode 100644
index 9e087c77c..000000000
--- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeCall.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package org.broadinstitute.sting.utils.genotype;
-
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.genotype.Genotype;
-
-import java.util.List;
-
-/**
- * @author aaron
- *
- * Interface GenotypeCall
- *
- * Genotype call interface, for indicating that a genotype is
- * also a genotype call.
- */
-public interface GenotypeCall extends Genotype {
- /**
- * gets the reference base
- *
- * @return the reference base we represent
- */
- public char getReferencebase();
-
- /**
- * check to see if this called genotype is a variant, i.e. not homozygous reference
- * @return true if it's not hom ref, false otherwise
- */
- public boolean isVariation();
-
- /**
- * Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
- * is located right after the specified location
- *
- * @return position on the genome wrapped in GenomeLoc object
- */
- public GenomeLoc getLocation();
-
- /**
- * get the genotypes, sorted in asscending order by their ConfidenceScores (the best
- * to the worst ConfidenceScores)
- *
- * @return a list of the genotypes, sorted by ConfidenceScores
- */
- public List getGenotypes();
-
- /**
- * get the genotypes sorted lexigraphically
- *
- * @return a list of the genotypes sorted lexi
- */
- public List getLexigraphicallySortedGenotypes();
-
-}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
index 90d3210b8..8ae61f08a 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java
@@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils.genotype;
+import java.util.List;
+
/*
* Copyright (c) 2009 The Broad Institute
*
@@ -50,4 +52,15 @@ public interface GenotypeWriter {
/** finish writing, closing any open files. */
public void close();
+ /**
+ * add a multi-sample call if we support it
+ * @param genotypes the list of genotypes, that are backed by sample information
+ */
+ public void addMultiSampleCall(List genotypes);
+
+ /**
+ * @return true if we support multisample, false otherwise
+ */
+ public boolean supportsMulitSample();
+
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java
index e2ad1968b..94c36da8f 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java
@@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
+import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter;
import java.io.File;
import java.io.PrintStream;
@@ -20,7 +21,7 @@ import java.io.PrintStream;
public class GenotypeWriterFactory {
/** available genotype writers */
public enum GENOTYPE_FORMAT {
- GELI, GLF, GFF, TABULAR, GELI_BINARY;
+ GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF;
}
/**
@@ -30,7 +31,7 @@ public class GenotypeWriterFactory {
* @param destination the destination file
* @return the genotype writer object
*/
- public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination) {
+ public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, String source, String referenceName ) {
switch (format) {
case GLF:
return new GLFWriter(header.toString(), destination);
@@ -38,6 +39,8 @@ public class GenotypeWriterFactory {
return new GeliTextWriter(destination);
case GELI_BINARY:
return new GeliAdapter(destination, header);
+ case VCF:
+ return new VCFGenotypeWriterAdapter(source, referenceName, destination);
default:
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java
new file mode 100644
index 000000000..583e20dfb
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/SampleBacked.java
@@ -0,0 +1,23 @@
+package org.broadinstitute.sting.utils.genotype;
+
+/**
+ * @author aaron
+ *
+ * Interface SampleBacked
+ *
+ * A descriptions should go here. Blame aaron if it's missing.
+ */
+public interface SampleBacked {
+
+ /**
+ *
+ * @return returns the sample name for this genotype
+ */
+ public String getSampleName();
+
+ /**
+ * get the filtering string for this genotype
+ * @return a string, representing the genotyping value
+ */
+ public String getFilteringValue();
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java
index c56ecfa87..c3fc7c1a3 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java
@@ -5,7 +5,6 @@ import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
-import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.*;
@@ -111,7 +110,7 @@ public class GeliAdapter implements GenotypeWriter {
int readDepth = -1;
double nextVrsBest = 0;
double nextVrsRef = 0;
- if (!(locus instanceof GenotypesBacked)) {
+ if (!(locus instanceof PosteriorsBacked)) {
posteriors = new double[10];
Arrays.fill(posteriors, Double.MIN_VALUE);
} else {
@@ -128,8 +127,8 @@ public class GeliAdapter implements GenotypeWriter {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
}
}
- SSGenotypeCall call = (SSGenotypeCall)locus;
- LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
+
+ LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
(int)locus.getLocation().getStart(),
ref,
@@ -155,4 +154,20 @@ public class GeliAdapter implements GenotypeWriter {
this.writer.close();
}
}
+
+ /**
+ * add a multi-sample call if we support it
+ *
+ * @param genotypes the list of genotypes, that are backed by sample information
+ */
+ @Override
+ public void addMultiSampleCall( List genotypes) {
+ throw new UnsupportedOperationException("Geli binary doesn't support multisample calls");
+ }
+
+ /** @return true if we support multisample, false otherwise */
+ @Override
+ public boolean supportsMulitSample() {
+ return false;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java
index 58727e134..912d60a1d 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java
@@ -1,4 +1,3 @@
-
package org.broadinstitute.sting.utils.genotype.geli;
import net.sf.samtools.SAMRecord;
@@ -66,17 +65,17 @@ public class GeliTextWriter implements GenotypeWriter {
} else {
posteriors = ((PosteriorsBacked) locus).getPosteriors();
double[] lks;
- lks = Arrays.copyOf(posteriors,posteriors.length);
+ lks = Arrays.copyOf(posteriors, posteriors.length);
Arrays.sort(lks);
nextVrsBest = lks[9] - lks[8];
- if (ref != 'X') {
- int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
+ if (ref != 'X') {
+ int index = (DiploidGenotype.valueOf(Utils.dupString(ref, 2)).ordinal());
nextVrsRef = lks[9] - posteriors[index];
}
}
double maxMappingQual = 0;
if (locus instanceof ReadBacked) {
- List recs = ((ReadBacked)locus).getReads();
+ List recs = ((ReadBacked) locus).getReads();
readDepth = recs.size();
for (SAMRecord rec : recs) {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
@@ -118,4 +117,20 @@ public class GeliTextWriter implements GenotypeWriter {
public void close() {
mWriter.close();
}
+
+ /**
+ * add a multi-sample call if we support it
+ *
+ * @param genotypes the list of genotypes, that are backed by sample information
+ */
+ @Override
+ public void addMultiSampleCall(List genotypes) {
+ throw new UnsupportedOperationException("Geli text doesn't support multisample calls");
+ }
+
+ /** @return true if we support multisample, false otherwise */
+ @Override
+ public boolean supportsMulitSample() {
+ return false;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
index 9f6e096c8..96583f2d3 100755
--- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java
@@ -278,6 +278,21 @@ public class GLFWriter implements GenotypeWriter {
}
+ /**
+ * add a multi-sample call if we support it
+ *
+ * @param genotypes the list of genotypes, that are backed by sample information
+ */
+ @Override
+ public void addMultiSampleCall(List genotypes) {
+ throw new UnsupportedOperationException("GLF writer doesn't support multisample calls");
+ }
+
+ /** @return true if we support multisample, false otherwise */
+ @Override
+ public boolean supportsMulitSample() {
+ return false;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java
index 879e3f08b..ccf94193a 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/tabular/TabularLFWriter.java
@@ -7,6 +7,7 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Arrays;
+import java.util.List;
/**
@@ -95,4 +96,21 @@ public class TabularLFWriter implements GenotypeWriter {
outStream.close();
}
}
+
+
+ /**
+ * add a multi-sample call if we support it
+ *
+ * @param genotypes the list of genotypes, that are backed by sample information
+ */
+ @Override
+ public void addMultiSampleCall(List genotypes) {
+ throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
+ }
+
+ /** @return true if we support multisample, false otherwise */
+ @Override
+ public boolean supportsMulitSample() {
+ return false;
+ }
}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java
new file mode 100644
index 000000000..a76ea29ad
--- /dev/null
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java
@@ -0,0 +1,247 @@
+package org.broadinstitute.sting.utils.genotype.vcf;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.genotype.Genotype;
+import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
+import org.broadinstitute.sting.utils.genotype.ReadBacked;
+import org.broadinstitute.sting.utils.genotype.SampleBacked;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * @author aaron
+ *
+ * Class VCFGenotypeWriterAdapter
+ *
+ * Adapt the VCF writter to the genotype output system
+ */
+public class VCFGenotypeWriterAdapter implements GenotypeWriter {
+ // our VCF objects
+ private VCFWriter mWriter = null;
+ private VCFHeader mHeader = null;
+ private String mSource;
+ private String mReferenceName;
+ private final Map mSampleNames = new HashMap();
+ private boolean mInitialized = false;
+ private final File mFile;
+
+ public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo) {
+ mReferenceName = referenceName;
+ mSource = source;
+ mFile = writeTo;
+ }
+
+
+ /**
+ * initialize this VCF writer
+ *
+ * @param genotypes the genotypes
+ * @param file the file location to write to
+ */
+ private void lazyInitialize(List genotypes, File file) {
+ Map hInfo = new HashMap();
+ List sampleNames = getSampleNames(genotypes);
+
+ // setup the header fields
+ hInfo.put("format", "VCRv3.2");
+ hInfo.put("source", mSource);
+
+ // setup the sample names
+ mHeader = new VCFHeader(hInfo, sampleNames);
+ mWriter = new VCFWriter(mHeader, file);
+ mInitialized = true;
+ }
+
+ /**
+ * get the samples names from genotype objects
+ *
+ * @param genotypes the genotype list
+ *
+ * @return a list of strings representing the sample names
+ */
+ private static List getSampleNames(List genotypes) {
+ List strings = new ArrayList();
+ for (Genotype genotype : genotypes) {
+ if (!(genotype instanceof SampleBacked))
+ throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
+ strings.add(((SampleBacked) genotype).getSampleName());
+ }
+ return strings;
+ }
+
+ /**
+ * Add a genotype, given a genotype locus
+ *
+ * @param call the locus to add
+ */
+ @Override
+ public void addGenotypeCall(Genotype call) {
+ throw new UnsupportedOperationException("VCF is a multi sample fomat");
+ }
+
+ /**
+ * add a no call to the genotype file, if supported.
+ *
+ * @param position the position to add the no call at
+ */
+ @Override
+ public void addNoCall(int position) {
+ throw new UnsupportedOperationException("VCF is a multi sample fomat");
+ }
+
+ /** finish writing, closing any open files. */
+ @Override
+ public void close() {
+ mWriter.close();
+ }
+
+ /**
+ * add a multi-sample call if we support it
+ *
+ * @param genotypes the list of genotypes, that are backed by sample information
+ */
+ @Override
+ public void addMultiSampleCall(List genotypes) {
+ if (!mInitialized)
+ lazyInitialize(genotypes, mFile);
+
+
+ VCFParamters params = new VCFParamters();
+ params.addFormatItem("GT");
+
+ for (Genotype gtype : genotypes) {
+ // setup the parameters
+ params.setLocations(gtype.getLocation(), gtype.getReference());
+
+ Map map = new HashMap();
+ if (!(gtype instanceof SampleBacked)) {
+ throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
+ }
+
+ // calculate the RMS mapping qualities and the read depth
+ if (gtype instanceof ReadBacked) {
+ int readDepth = ((ReadBacked) gtype).getReadCount();
+ map.put("RD", String.valueOf(readDepth));
+ params.addFormatItem("RD");
+ }
+ double qual = gtype.getNegLog10PError();
+ map.put("GQ", String.format("%.2f", qual));
+ params.addFormatItem("GQ");
+
+ List alleles = new ArrayList();
+ for (char allele : gtype.getBases().toCharArray()) {
+ alleles.add(String.valueOf(allele));
+ params.addAlternateBase(allele);
+ }
+
+ VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
+ alleles,
+ VCFGenotypeRecord.PHASE.UNPHASED,
+ map);
+ params.addGenotypeRecord(record);
+ }
+
+ Map infoFields = new HashMap();
+
+ VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
+ params.getContig(),
+ params.getPosition(),
+ ".",
+ params.getAlternateBases(),
+ 0, /* BETTER VALUE HERE */
+ ".",
+ infoFields,
+ params.getFormatString(),
+ params.getGenotypesRecords());
+
+ mWriter.addRecord(vcfRecord);
+ }
+
+ /** @return true if we support multisample, false otherwise */
+ @Override
+ public boolean supportsMulitSample() {
+ return true;
+ }
+
+
+ /**
+ * a helper class, which performs a lot of the safety checks on the parameters
+ * we feed to the VCF (like ensuring the same position for each genotype in a call).
+ */
+ class VCFParamters {
+ private char referenceBase = '0';
+ private int position = 0;
+ private String contig = null;
+ private boolean initialized = false;
+ private List genotypesRecord = new ArrayList();
+ private List formatList = new ArrayList();
+ private List alternateBases = new ArrayList();
+
+ public void setLocations(GenomeLoc location, char refBase) {
+ // if we haven't set it up, we initialize the object
+ if (!initialized) {
+ initialized = true;
+ this.contig = location.getContig();
+ this.position = (int)location.getStart();
+ if (location.getStart() != location.getStop()) {
+ throw new IllegalArgumentException("The start and stop locations must be the same");
+ }
+ this.referenceBase = refBase;
+ } else {
+ if (contig.equals(this.contig))
+ throw new IllegalArgumentException("The contig name has to be the same at a single locus");
+ if (position != this.position)
+ throw new IllegalArgumentException("The position has to be the same at a single locus");
+ if (refBase != this.referenceBase)
+ throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
+ }
+ }
+
+ /** @return get the position */
+ public int getPosition() {
+ return position;
+ }
+
+ /** @return get the contig name */
+ public String getContig() {
+ return contig;
+ }
+
+ /** @return get the reference base */
+ public char getReferenceBase() {
+ return referenceBase;
+ }
+
+ public void addGenotypeRecord(VCFGenotypeRecord record) {
+ this.genotypesRecord.add(record);
+ }
+
+ public void addFormatItem(String item) {
+ if (!formatList.contains(item))
+ formatList.add(item);
+ }
+
+ public void addAlternateBase(char base) {
+ if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase())
+ alternateBases.add(String.valueOf(base));
+ }
+
+ public List getAlternateBases() {
+ return alternateBases;
+ }
+
+ public String getFormatString() {
+ return Utils.join(";", formatList);
+ }
+
+ public List getGenotypesRecords() {
+ return genotypesRecord;
+ }
+ }
+}
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
index 2682e5a65..f2b1d65f7 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java
@@ -216,8 +216,8 @@ public class VCFRecord {
*/
public Map getInfoValues() {
if (this.mInfoFields.size() < 1) {
- Map map = new HashMap();
- map.put(".","");
+ Map map = new HashMap();
+ map.put(".", "");
return map;
}
return this.mInfoFields;
@@ -305,6 +305,10 @@ public class VCFRecord {
this.mInfoFields.put(key, value);
}
+ /**
+ * the generation of a string representation, which is used by the VCF writer
+ * @return a string
+ */
public String toString() {
StringBuilder builder = new StringBuilder();
@@ -340,7 +344,7 @@ public class VCFRecord {
if (rec.getFields().get(s).equals("")) continue;
builder.append(":");
builder.append(rec.getFields().get(s));
- }
+ }
}
}
return builder.toString();
diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
index 3da5c2757..773bf05f3 100644
--- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
+++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java
@@ -70,14 +70,16 @@ public class VCFWriter {
}
+
+
/** attempt to close the VCF file */
public void close() {
try {
+ mWriter.flush();
mWriter.close();
} catch (IOException e) {
throw new RuntimeException("Unable to close VCFFile");
}
}
-
}
diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCallTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java
similarity index 82%
rename from java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCallTest.java
rename to java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java
index 03c623283..27de8d5ff 100644
--- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/SSGenotypeCallTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeCallTest.java
@@ -22,7 +22,7 @@ import java.util.List;
*
* test the SS Genotype call class
*/
-public class SSGenotypeCallTest extends BaseTest {
+public class GenotypeCallTest extends BaseTest {
// we need a fake GenotypeLikelihoods class
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
@@ -79,21 +79,21 @@ public class SSGenotypeCallTest extends BaseTest {
@Test
public void testBestVrsRefSame() {
Pair myPair = makePileup();
- SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
+ GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsRef2() {
Pair myPair = makePileup();
- SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
+ GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsRef3() {
Pair myPair = makePileup();
- SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
+ GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
}
@@ -101,21 +101,21 @@ public class SSGenotypeCallTest extends BaseTest {
@Test
public void testBestVrsNextSame() {
Pair myPair = makePileup();
- SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
+ GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNext2() {
Pair myPair = makePileup();
- SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
+ GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNext3() {
Pair myPair = makePileup();
- SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
+ GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
}
}