VCF can now be emitted from SSG. The basic's are there (the genotype, read depth, our error estimate), but more fields need to be added for each record as nessasary.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1797 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-10-08 19:50:04 +00:00
parent 95f24d671d
commit 98e3a0bf1a
18 changed files with 494 additions and 153 deletions

View File

@ -1,15 +1,16 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import java.util.*;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMReadGroupRecord;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
@ -79,14 +80,15 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
// for now, we need to special-case single sample mode
if ( samples.size() == 1 ) {
UnifiedGenotypeLikelihoods UGL = GLs.get(samples.iterator().next());
String sample = samples.iterator().next();
UnifiedGenotypeLikelihoods UGL = GLs.get(sample);
// if there were no good bases, the likelihoods object wouldn't exist
if ( UGL == null )
return false;
callsMetrics.nCalledBases++;
UGL.setPriors(priors);
SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
GenotypeCall call = new GenotypeCall(sample,context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
@ -183,7 +185,7 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
*/
//return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
//return new GenotypeCall(context.getLocation(), ref,gl, pileup);
return true;
}

View File

@ -15,10 +15,10 @@ import java.util.List;
* <p/>
* Class SSGenotypeCall
* <p/>
* The single sample implementation of the genotype interface, which contains
* The mplementation of the genotype interface, which contains
* extra information for the various genotype outputs
*/
public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked {
public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
private final char mRefBase;
private final GenotypeLikelihoods mGenotypeLikelihoods;
@ -33,18 +33,20 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
private DiploidGenotype mRefGenotype = null;
private DiploidGenotype mNextGenotype = null;
// are we best vrs ref or best vrs next - for internal consumption only
//private final boolean mBestVrsRef;
// the sample name, used to propulate the SampleBacked interface
private String mSampleName;
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
* @param sampleName the sample name
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
*/
public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
mSampleName = sampleName;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
@ -54,12 +56,14 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
/**
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
*
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
* @param sampleName the sample name
* @param location the location we're working with
* @param refBase the ref base
* @param gtlh the genotype likelihoods object
* @param pileup the pile-up of reads at the specified locus
*/
SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
mSampleName = sampleName;
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
mGenotypeLikelihoods = gtlh;
mLocation = location;
@ -73,8 +77,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
if (other == null)
return false;
if (other instanceof SSGenotypeCall) {
SSGenotypeCall otherCall = (SSGenotypeCall) other;
if (other instanceof GenotypeCall) {
GenotypeCall otherCall = (GenotypeCall) other;
if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) {
return false;
@ -90,8 +94,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
public String toString() {
lazyEval();
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
}
private void lazyEval() {
@ -122,25 +126,19 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
}
/**
* get the best genotype
*/
/** get the best genotype */
private DiploidGenotype getBestGenotype() {
lazyEval();
return mGenotype;
}
/**
* get the alternate genotype
*/
/** get the alternate genotype */
private DiploidGenotype getNextBest() {
lazyEval();
return mNextGenotype;
}
/**
* get the alternate genotype
*/
/** get the alternate genotype */
private DiploidGenotype getRefGenotype() {
lazyEval();
return mRefGenotype;
@ -211,6 +209,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* given the reference, are we a variant? (non-ref)
*
* @param ref the reference base or bases
*
* @return true if we're a variant
*/
@Override
@ -270,7 +269,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
* @return an array in lexigraphical order of the likelihoods
*/
public Genotype getGenotype(DiploidGenotype x) {
return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
return new GenotypeCall(mSampleName, mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
}
/**
@ -292,6 +291,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
public double[] getPosteriors() {
return this.mGenotypeLikelihoods.getPosteriors();
}
/** @return returns the sample name for this genotype */
@Override
public String getSampleName() {
return this.mSampleName;
}
/**
* get the filtering string for this genotype
*
* @return a string, representing the genotyping value
*/
@Override
public String getFilteringValue() {
return "0";
}
}

View File

@ -10,13 +10,15 @@ import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import java.io.File;
import java.util.Arrays;
@ReadFilters(ZeroMappingQualityReadFilter.class)
public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSampleGenotyper.CallResult> {
public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, SingleSampleGenotyper.CallResult> {
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
public File VARIANTS_FILE = null;
@ -47,6 +49,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false)
public boolean disableCache = false;
@Argument(fullName = "sample_name", shortName = "sn", doc = "What the label should be for the emited call track. Used by some genotype formats", required = false)
public String SAMPLE_NAME = null;
public class CallResult {
long nConfidentCalls = 0;
@ -57,14 +61,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
CallResult(GenotypeWriter writer) {
this.writer = writer;
}
public String toString() {
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
nConfidentCalls, nNonConfidentCalls, nCalledBases);
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
nConfidentCalls, nNonConfidentCalls, nCalledBases);
}
}
/**
/**
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
*
* @param tracker the meta data tracker
@ -79,32 +83,37 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
/** Initialize the walker with some sensible defaults */
public void initialize() {
//GenotypeLikelihoods.clearCache();
// nothing to do
if (SAMPLE_NAME == null) {
if (GenomeAnalysisEngine.instance.getArguments().samFiles.size() == 1) {
SAMPLE_NAME = GenomeAnalysisEngine.instance.getArguments().samFiles.get(0).getName();
} else {
SAMPLE_NAME = "Combined_BAM";
}
}
}
/**
* Compute at a given locus.
*
* @param tracker the meta data tracker
* @param tracker the meta data tracker
* @param refContext the reference base
* @param context contextual information around the locus
* @param context contextual information around the locus
*/
public SSGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
public GenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
char ref = refContext.getBase();
if ( BaseUtils.isRegularBase(ref) ) {
if (BaseUtils.isRegularBase(ref)) {
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
// setup GenotypeLike object
GenotypeLikelihoods gl = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
gl.setVerbose(VERBOSE);
gl.setEnableCacheFlag(! disableCache);
gl.setEnableCacheFlag(!disableCache);
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
gl.add(pileup, true);
gl.validate();
return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
return new GenotypeCall(SAMPLE_NAME,context.getLocation(), ref, gl, pileup);
} else {
return null;
}
@ -138,8 +147,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
* @return an empty string
*/
public CallResult reduceInit() {
if ( VARIANTS_FILE != null )
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE));
if (VARIANTS_FILE != null)
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT,
GenomeAnalysisEngine.instance.getSAMFileHeader(),
VARIANTS_FILE,
"SingleSampleGenotyper",
this.getToolkit().getArguments().referenceFile.getName()));
else
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out));
}
@ -152,7 +165,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
*
* @return an empty string
*/
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
public CallResult reduce(GenotypeCall call, CallResult sum) {
sum.nCalledBases++;
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
@ -160,7 +173,11 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
if (confidence >= LOD_THRESHOLD) {
sum.nConfidentCalls++;
//System.out.printf("Call %s%n", call);
sum.writer.addGenotypeCall(call);
if (sum.writer.supportsMulitSample()) {
sum.writer.addMultiSampleCall(Arrays.asList((Genotype)call));
} else {
sum.writer.addGenotypeCall(call);
}
} else
sum.nNonConfidentCalls++;
}

View File

@ -25,19 +25,25 @@
package org.broadinstitute.sting.gatk.walkers.genotyper;
import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.*;
import org.broadinstitute.sting.gatk.filters.*;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.*;
import org.broadinstitute.sting.utils.genotype.*;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.MissingReadGroupFilter;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import java.io.File;
import java.util.*;
import net.sf.samtools.SAMReadGroupRecord;
import java.util.HashSet;
import java.util.List;
@ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class})
@ -91,7 +97,9 @@ public class UnifiedGenotyper extends LocusWalker<Integer, Integer> {
// create the output writer stream
if ( VARIANTS_FILE != null )
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE);
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
"UnifiedGenotyper",
this.getToolkit().getArguments().referenceFile.getName());
else
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out);

View File

@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
import org.broadinstitute.sting.utils.BaseUtils;
@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
SSGenotypeCall call = SSG.map(tracker, ref, subContext);
GenotypeCall call = SSG.map(tracker, ref, subContext);
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
if (call != null) {

View File

@ -1,22 +1,19 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariationRod;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
import org.broadinstitute.sting.utils.genotype.Variation;
import java.util.Set;
import java.util.List;
import java.util.ArrayList;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Set;
/**
* Created by IntelliJ IDEA.
@ -73,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
}
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
SSGenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
GenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
SSGenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
GenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
if (!parent1.isVariant(parent1.getReference()) &&
parent1.getNegLog10PError() > 5 &&

View File

@ -1,53 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.Genotype;
import java.util.List;
/**
* @author aaron
* <p/>
* Interface GenotypeCall
* <p/>
* Genotype call interface, for indicating that a genotype is
* also a genotype call.
*/
public interface GenotypeCall extends Genotype {
/**
* gets the reference base
*
* @return the reference base we represent
*/
public char getReferencebase();
/**
* check to see if this called genotype is a variant, i.e. not homozygous reference
* @return true if it's not hom ref, false otherwise
*/
public boolean isVariation();
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
public GenomeLoc getLocation();
/**
* get the genotypes, sorted in asscending order by their ConfidenceScores (the best
* to the worst ConfidenceScores)
*
* @return a list of the genotypes, sorted by ConfidenceScores
*/
public List<Genotype> getGenotypes();
/**
* get the genotypes sorted lexigraphically
*
* @return a list of the genotypes sorted lexi
*/
public List<Genotype> getLexigraphicallySortedGenotypes();
}

View File

@ -1,5 +1,7 @@
package org.broadinstitute.sting.utils.genotype;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -50,4 +52,15 @@ public interface GenotypeWriter {
/** finish writing, closing any open files. */
public void close();
/**
* add a multi-sample call if we support it
* @param genotypes the list of genotypes, that are backed by sample information
*/
public void addMultiSampleCall(List<Genotype> genotypes);
/**
* @return true if we support multisample, false otherwise
*/
public boolean supportsMulitSample();
}

View File

@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter;
import java.io.File;
import java.io.PrintStream;
@ -20,7 +21,7 @@ import java.io.PrintStream;
public class GenotypeWriterFactory {
/** available genotype writers */
public enum GENOTYPE_FORMAT {
GELI, GLF, GFF, TABULAR, GELI_BINARY;
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF;
}
/**
@ -30,7 +31,7 @@ public class GenotypeWriterFactory {
* @param destination the destination file
* @return the genotype writer object
*/
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination) {
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, String source, String referenceName ) {
switch (format) {
case GLF:
return new GLFWriter(header.toString(), destination);
@ -38,6 +39,8 @@ public class GenotypeWriterFactory {
return new GeliTextWriter(destination);
case GELI_BINARY:
return new GeliAdapter(destination, header);
case VCF:
return new VCFGenotypeWriterAdapter(source, referenceName, destination);
default:
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
}

View File

@ -0,0 +1,23 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* <p/>
* Interface SampleBacked
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public interface SampleBacked {
/**
*
* @return returns the sample name for this genotype
*/
public String getSampleName();
/**
* get the filtering string for this genotype
* @return a string, representing the genotyping value
*/
public String getFilteringValue();
}

View File

@ -5,7 +5,6 @@ import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.*;
@ -111,7 +110,7 @@ public class GeliAdapter implements GenotypeWriter {
int readDepth = -1;
double nextVrsBest = 0;
double nextVrsRef = 0;
if (!(locus instanceof GenotypesBacked)) {
if (!(locus instanceof PosteriorsBacked)) {
posteriors = new double[10];
Arrays.fill(posteriors, Double.MIN_VALUE);
} else {
@ -128,8 +127,8 @@ public class GeliAdapter implements GenotypeWriter {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
}
}
SSGenotypeCall call = (SSGenotypeCall)locus;
LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
(int)locus.getLocation().getStart(),
ref,
@ -155,4 +154,20 @@ public class GeliAdapter implements GenotypeWriter {
this.writer.close();
}
}
/**
* add a multi-sample call if we support it
*
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall( List<Genotype> genotypes) {
throw new UnsupportedOperationException("Geli binary doesn't support multisample calls");
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMulitSample() {
return false;
}
}

View File

@ -1,4 +1,3 @@
package org.broadinstitute.sting.utils.genotype.geli;
import net.sf.samtools.SAMRecord;
@ -66,17 +65,17 @@ public class GeliTextWriter implements GenotypeWriter {
} else {
posteriors = ((PosteriorsBacked) locus).getPosteriors();
double[] lks;
lks = Arrays.copyOf(posteriors,posteriors.length);
lks = Arrays.copyOf(posteriors, posteriors.length);
Arrays.sort(lks);
nextVrsBest = lks[9] - lks[8];
if (ref != 'X') {
int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
if (ref != 'X') {
int index = (DiploidGenotype.valueOf(Utils.dupString(ref, 2)).ordinal());
nextVrsRef = lks[9] - posteriors[index];
}
}
double maxMappingQual = 0;
if (locus instanceof ReadBacked) {
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
List<SAMRecord> recs = ((ReadBacked) locus).getReads();
readDepth = recs.size();
for (SAMRecord rec : recs) {
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
@ -118,4 +117,20 @@ public class GeliTextWriter implements GenotypeWriter {
public void close() {
mWriter.close();
}
/**
* add a multi-sample call if we support it
*
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes) {
throw new UnsupportedOperationException("Geli text doesn't support multisample calls");
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMulitSample() {
return false;
}
}

View File

@ -278,6 +278,21 @@ public class GLFWriter implements GenotypeWriter {
}
/**
* add a multi-sample call if we support it
*
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes) {
throw new UnsupportedOperationException("GLF writer doesn't support multisample calls");
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMulitSample() {
return false;
}
}

View File

@ -7,6 +7,7 @@ import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.List;
/**
@ -95,4 +96,21 @@ public class TabularLFWriter implements GenotypeWriter {
outStream.close();
}
}
/**
* add a multi-sample call if we support it
*
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes) {
throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMulitSample() {
return false;
}
}

View File

@ -0,0 +1,247 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.ReadBacked;
import org.broadinstitute.sting.utils.genotype.SampleBacked;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author aaron
* <p/>
* Class VCFGenotypeWriterAdapter
* <p/>
* Adapt the VCF writter to the genotype output system
*/
public class VCFGenotypeWriterAdapter implements GenotypeWriter {
// our VCF objects
private VCFWriter mWriter = null;
private VCFHeader mHeader = null;
private String mSource;
private String mReferenceName;
private final Map<String, String> mSampleNames = new HashMap<String, String>();
private boolean mInitialized = false;
private final File mFile;
public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo) {
mReferenceName = referenceName;
mSource = source;
mFile = writeTo;
}
/**
* initialize this VCF writer
*
* @param genotypes the genotypes
* @param file the file location to write to
*/
private void lazyInitialize(List<Genotype> genotypes, File file) {
Map<String, String> hInfo = new HashMap<String, String>();
List<String> sampleNames = getSampleNames(genotypes);
// setup the header fields
hInfo.put("format", "VCRv3.2");
hInfo.put("source", mSource);
// setup the sample names
mHeader = new VCFHeader(hInfo, sampleNames);
mWriter = new VCFWriter(mHeader, file);
mInitialized = true;
}
/**
* get the samples names from genotype objects
*
* @param genotypes the genotype list
*
* @return a list of strings representing the sample names
*/
private static List<String> getSampleNames(List<Genotype> genotypes) {
List<String> strings = new ArrayList<String>();
for (Genotype genotype : genotypes) {
if (!(genotype instanceof SampleBacked))
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
strings.add(((SampleBacked) genotype).getSampleName());
}
return strings;
}
/**
* Add a genotype, given a genotype locus
*
* @param call the locus to add
*/
@Override
public void addGenotypeCall(Genotype call) {
throw new UnsupportedOperationException("VCF is a multi sample fomat");
}
/**
* add a no call to the genotype file, if supported.
*
* @param position the position to add the no call at
*/
@Override
public void addNoCall(int position) {
throw new UnsupportedOperationException("VCF is a multi sample fomat");
}
/** finish writing, closing any open files. */
@Override
public void close() {
mWriter.close();
}
/**
* add a multi-sample call if we support it
*
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes) {
if (!mInitialized)
lazyInitialize(genotypes, mFile);
VCFParamters params = new VCFParamters();
params.addFormatItem("GT");
for (Genotype gtype : genotypes) {
// setup the parameters
params.setLocations(gtype.getLocation(), gtype.getReference());
Map<String, String> map = new HashMap<String, String>();
if (!(gtype instanceof SampleBacked)) {
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
}
// calculate the RMS mapping qualities and the read depth
if (gtype instanceof ReadBacked) {
int readDepth = ((ReadBacked) gtype).getReadCount();
map.put("RD", String.valueOf(readDepth));
params.addFormatItem("RD");
}
double qual = gtype.getNegLog10PError();
map.put("GQ", String.format("%.2f", qual));
params.addFormatItem("GQ");
List<String> alleles = new ArrayList<String>();
for (char allele : gtype.getBases().toCharArray()) {
alleles.add(String.valueOf(allele));
params.addAlternateBase(allele);
}
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
alleles,
VCFGenotypeRecord.PHASE.UNPHASED,
map);
params.addGenotypeRecord(record);
}
Map<String, String> infoFields = new HashMap<String, String>();
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
params.getContig(),
params.getPosition(),
".",
params.getAlternateBases(),
0, /* BETTER VALUE HERE */
".",
infoFields,
params.getFormatString(),
params.getGenotypesRecords());
mWriter.addRecord(vcfRecord);
}
/** @return true if we support multisample, false otherwise */
@Override
public boolean supportsMulitSample() {
return true;
}
/**
* a helper class, which performs a lot of the safety checks on the parameters
* we feed to the VCF (like ensuring the same position for each genotype in a call).
*/
class VCFParamters {
private char referenceBase = '0';
private int position = 0;
private String contig = null;
private boolean initialized = false;
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
private List<String> formatList = new ArrayList<String>();
private List<String> alternateBases = new ArrayList<String>();
public void setLocations(GenomeLoc location, char refBase) {
// if we haven't set it up, we initialize the object
if (!initialized) {
initialized = true;
this.contig = location.getContig();
this.position = (int)location.getStart();
if (location.getStart() != location.getStop()) {
throw new IllegalArgumentException("The start and stop locations must be the same");
}
this.referenceBase = refBase;
} else {
if (contig.equals(this.contig))
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
if (position != this.position)
throw new IllegalArgumentException("The position has to be the same at a single locus");
if (refBase != this.referenceBase)
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
}
}
/** @return get the position */
public int getPosition() {
return position;
}
/** @return get the contig name */
public String getContig() {
return contig;
}
/** @return get the reference base */
public char getReferenceBase() {
return referenceBase;
}
public void addGenotypeRecord(VCFGenotypeRecord record) {
this.genotypesRecord.add(record);
}
public void addFormatItem(String item) {
if (!formatList.contains(item))
formatList.add(item);
}
public void addAlternateBase(char base) {
if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase())
alternateBases.add(String.valueOf(base));
}
public List<String> getAlternateBases() {
return alternateBases;
}
public String getFormatString() {
return Utils.join(";", formatList);
}
public List<VCFGenotypeRecord> getGenotypesRecords() {
return genotypesRecord;
}
}
}

View File

@ -216,8 +216,8 @@ public class VCFRecord {
*/
public Map<String, String> getInfoValues() {
if (this.mInfoFields.size() < 1) {
Map<String,String> map = new HashMap<String, String>();
map.put(".","");
Map<String, String> map = new HashMap<String, String>();
map.put(".", "");
return map;
}
return this.mInfoFields;
@ -305,6 +305,10 @@ public class VCFRecord {
this.mInfoFields.put(key, value);
}
/**
* the generation of a string representation, which is used by the VCF writer
* @return a string
*/
public String toString() {
StringBuilder builder = new StringBuilder();
@ -340,7 +344,7 @@ public class VCFRecord {
if (rec.getFields().get(s).equals("")) continue;
builder.append(":");
builder.append(rec.getFields().get(s));
}
}
}
}
return builder.toString();

View File

@ -70,14 +70,16 @@ public class VCFWriter {
}
/** attempt to close the VCF file */
public void close() {
try {
mWriter.flush();
mWriter.close();
} catch (IOException e) {
throw new RuntimeException("Unable to close VCFFile");
}
}
}

View File

@ -22,7 +22,7 @@ import java.util.List;
* <p/>
* test the SS Genotype call class
*/
public class SSGenotypeCallTest extends BaseTest {
public class GenotypeCallTest extends BaseTest {
// we need a fake GenotypeLikelihoods class
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
@ -79,21 +79,21 @@ public class SSGenotypeCallTest extends BaseTest {
@Test
public void testBestVrsRefSame() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsRef2() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsRef3() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
}
@ -101,21 +101,21 @@ public class SSGenotypeCallTest extends BaseTest {
@Test
public void testBestVrsNextSame() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNext2() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
}
@Test
public void testBestVrsNext3() {
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
}
}