VCF can now be emitted from SSG. The basic's are there (the genotype, read depth, our error estimate), but more fields need to be added for each record as nessasary.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1797 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
95f24d671d
commit
98e3a0bf1a
|
|
@ -1,15 +1,16 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.*;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
import net.sf.samtools.SAMRecord;
|
import java.util.List;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
|
||||||
|
|
||||||
public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
|
public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||||
|
|
||||||
|
|
@ -79,14 +80,15 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||||
|
|
||||||
// for now, we need to special-case single sample mode
|
// for now, we need to special-case single sample mode
|
||||||
if ( samples.size() == 1 ) {
|
if ( samples.size() == 1 ) {
|
||||||
UnifiedGenotypeLikelihoods UGL = GLs.get(samples.iterator().next());
|
String sample = samples.iterator().next();
|
||||||
|
UnifiedGenotypeLikelihoods UGL = GLs.get(sample);
|
||||||
// if there were no good bases, the likelihoods object wouldn't exist
|
// if there were no good bases, the likelihoods object wouldn't exist
|
||||||
if ( UGL == null )
|
if ( UGL == null )
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
callsMetrics.nCalledBases++;
|
callsMetrics.nCalledBases++;
|
||||||
UGL.setPriors(priors);
|
UGL.setPriors(priors);
|
||||||
SSGenotypeCall call = new SSGenotypeCall(context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
|
GenotypeCall call = new GenotypeCall(sample,context.getLocation(), ref, UGL.getGenotypeLikelihoods(), new ReadBackedPileup(ref, context));
|
||||||
|
|
||||||
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
|
if ( GENOTYPE_MODE || call.isVariant(call.getReference()) ) {
|
||||||
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
double confidence = (GENOTYPE_MODE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
||||||
|
|
@ -183,7 +185,7 @@ public class EMGenotypeCalculationModel extends GenotypeCalculationModel {
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
|
//return new GenotypeCall(context.getLocation(), ref,gl, pileup);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,10 +15,10 @@ import java.util.List;
|
||||||
* <p/>
|
* <p/>
|
||||||
* Class SSGenotypeCall
|
* Class SSGenotypeCall
|
||||||
* <p/>
|
* <p/>
|
||||||
* The single sample implementation of the genotype interface, which contains
|
* The mplementation of the genotype interface, which contains
|
||||||
* extra information for the various genotype outputs
|
* extra information for the various genotype outputs
|
||||||
*/
|
*/
|
||||||
public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked {
|
public class GenotypeCall implements Genotype, ReadBacked, GenotypesBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
|
||||||
private final char mRefBase;
|
private final char mRefBase;
|
||||||
private final GenotypeLikelihoods mGenotypeLikelihoods;
|
private final GenotypeLikelihoods mGenotypeLikelihoods;
|
||||||
|
|
||||||
|
|
@ -33,18 +33,20 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
private DiploidGenotype mRefGenotype = null;
|
private DiploidGenotype mRefGenotype = null;
|
||||||
private DiploidGenotype mNextGenotype = null;
|
private DiploidGenotype mNextGenotype = null;
|
||||||
|
|
||||||
// are we best vrs ref or best vrs next - for internal consumption only
|
// the sample name, used to propulate the SampleBacked interface
|
||||||
//private final boolean mBestVrsRef;
|
private String mSampleName;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||||
*
|
*
|
||||||
* @param location the location we're working with
|
* @param sampleName the sample name
|
||||||
* @param refBase the ref base
|
* @param location the location we're working with
|
||||||
* @param gtlh the genotype likelihoods object
|
* @param refBase the ref base
|
||||||
* @param pileup the pile-up of reads at the specified locus
|
* @param gtlh the genotype likelihoods object
|
||||||
|
* @param pileup the pile-up of reads at the specified locus
|
||||||
*/
|
*/
|
||||||
public SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
||||||
|
mSampleName = sampleName;
|
||||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
||||||
mGenotypeLikelihoods = gtlh;
|
mGenotypeLikelihoods = gtlh;
|
||||||
mLocation = location;
|
mLocation = location;
|
||||||
|
|
@ -54,12 +56,14 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
/**
|
/**
|
||||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||||
*
|
*
|
||||||
* @param location the location we're working with
|
* @param sampleName the sample name
|
||||||
* @param refBase the ref base
|
* @param location the location we're working with
|
||||||
* @param gtlh the genotype likelihoods object
|
* @param refBase the ref base
|
||||||
* @param pileup the pile-up of reads at the specified locus
|
* @param gtlh the genotype likelihoods object
|
||||||
|
* @param pileup the pile-up of reads at the specified locus
|
||||||
*/
|
*/
|
||||||
SSGenotypeCall(GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
||||||
|
mSampleName = sampleName;
|
||||||
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
mRefBase = String.valueOf(refBase).toUpperCase().charAt(0); // a round about way to make sure the ref base is up-case
|
||||||
mGenotypeLikelihoods = gtlh;
|
mGenotypeLikelihoods = gtlh;
|
||||||
mLocation = location;
|
mLocation = location;
|
||||||
|
|
@ -73,8 +77,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
|
|
||||||
if (other == null)
|
if (other == null)
|
||||||
return false;
|
return false;
|
||||||
if (other instanceof SSGenotypeCall) {
|
if (other instanceof GenotypeCall) {
|
||||||
SSGenotypeCall otherCall = (SSGenotypeCall) other;
|
GenotypeCall otherCall = (GenotypeCall) other;
|
||||||
|
|
||||||
if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) {
|
if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -90,8 +94,8 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
public String toString() {
|
public String toString() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
||||||
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
|
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
|
||||||
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void lazyEval() {
|
private void lazyEval() {
|
||||||
|
|
@ -122,25 +126,19 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
|
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** get the best genotype */
|
||||||
* get the best genotype
|
|
||||||
*/
|
|
||||||
private DiploidGenotype getBestGenotype() {
|
private DiploidGenotype getBestGenotype() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return mGenotype;
|
return mGenotype;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** get the alternate genotype */
|
||||||
* get the alternate genotype
|
|
||||||
*/
|
|
||||||
private DiploidGenotype getNextBest() {
|
private DiploidGenotype getNextBest() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return mNextGenotype;
|
return mNextGenotype;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** get the alternate genotype */
|
||||||
* get the alternate genotype
|
|
||||||
*/
|
|
||||||
private DiploidGenotype getRefGenotype() {
|
private DiploidGenotype getRefGenotype() {
|
||||||
lazyEval();
|
lazyEval();
|
||||||
return mRefGenotype;
|
return mRefGenotype;
|
||||||
|
|
@ -211,6 +209,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
* given the reference, are we a variant? (non-ref)
|
* given the reference, are we a variant? (non-ref)
|
||||||
*
|
*
|
||||||
* @param ref the reference base or bases
|
* @param ref the reference base or bases
|
||||||
|
*
|
||||||
* @return true if we're a variant
|
* @return true if we're a variant
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -270,7 +269,7 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
* @return an array in lexigraphical order of the likelihoods
|
* @return an array in lexigraphical order of the likelihoods
|
||||||
*/
|
*/
|
||||||
public Genotype getGenotype(DiploidGenotype x) {
|
public Genotype getGenotype(DiploidGenotype x) {
|
||||||
return new SSGenotypeCall(mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
|
return new GenotypeCall(mSampleName, mLocation, mRefBase, mGenotypeLikelihoods, mPileup, x);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -292,6 +291,22 @@ public class SSGenotypeCall implements Genotype, ReadBacked, GenotypesBacked, Li
|
||||||
public double[] getPosteriors() {
|
public double[] getPosteriors() {
|
||||||
return this.mGenotypeLikelihoods.getPosteriors();
|
return this.mGenotypeLikelihoods.getPosteriors();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return returns the sample name for this genotype */
|
||||||
|
@Override
|
||||||
|
public String getSampleName() {
|
||||||
|
return this.mSampleName;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the filtering string for this genotype
|
||||||
|
*
|
||||||
|
* @return a string, representing the genotyping value
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public String getFilteringValue() {
|
||||||
|
return "0";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -10,13 +10,15 @@ import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||||
public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSampleGenotyper.CallResult> {
|
public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, SingleSampleGenotyper.CallResult> {
|
||||||
// Control output settings
|
// Control output settings
|
||||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
|
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
|
||||||
public File VARIANTS_FILE = null;
|
public File VARIANTS_FILE = null;
|
||||||
|
|
@ -47,6 +49,8 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false)
|
@Argument(fullName = "disableCache", doc = "[ADVANCED] If true, we won't use the caching system. This argument is for testing purposes only", required = false)
|
||||||
public boolean disableCache = false;
|
public boolean disableCache = false;
|
||||||
|
|
||||||
|
@Argument(fullName = "sample_name", shortName = "sn", doc = "What the label should be for the emited call track. Used by some genotype formats", required = false)
|
||||||
|
public String SAMPLE_NAME = null;
|
||||||
|
|
||||||
public class CallResult {
|
public class CallResult {
|
||||||
long nConfidentCalls = 0;
|
long nConfidentCalls = 0;
|
||||||
|
|
@ -57,14 +61,14 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
CallResult(GenotypeWriter writer) {
|
CallResult(GenotypeWriter writer) {
|
||||||
this.writer = writer;
|
this.writer = writer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
|
return String.format("SSG: %d confident and %d non-confident calls were made at %d bases",
|
||||||
nConfidentCalls, nNonConfidentCalls, nCalledBases);
|
nConfidentCalls, nNonConfidentCalls, nCalledBases);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
||||||
*
|
*
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
|
|
@ -79,32 +83,37 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
|
|
||||||
/** Initialize the walker with some sensible defaults */
|
/** Initialize the walker with some sensible defaults */
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
//GenotypeLikelihoods.clearCache();
|
if (SAMPLE_NAME == null) {
|
||||||
// nothing to do
|
if (GenomeAnalysisEngine.instance.getArguments().samFiles.size() == 1) {
|
||||||
|
SAMPLE_NAME = GenomeAnalysisEngine.instance.getArguments().samFiles.get(0).getName();
|
||||||
|
} else {
|
||||||
|
SAMPLE_NAME = "Combined_BAM";
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compute at a given locus.
|
* Compute at a given locus.
|
||||||
*
|
*
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
* @param refContext the reference base
|
* @param refContext the reference base
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
*/
|
*/
|
||||||
public SSGenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
public GenotypeCall map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||||
char ref = refContext.getBase();
|
char ref = refContext.getBase();
|
||||||
if ( BaseUtils.isRegularBase(ref) ) {
|
if (BaseUtils.isRegularBase(ref)) {
|
||||||
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
DiploidGenotypePriors priors = new DiploidGenotypePriors(ref, heterozygosity, DiploidGenotypePriors.PROB_OF_TRISTATE_GENOTYPE);
|
||||||
|
|
||||||
// setup GenotypeLike object
|
// setup GenotypeLike object
|
||||||
GenotypeLikelihoods gl = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
GenotypeLikelihoods gl = GenotypeLikelihoodsFactory.makeGenotypeLikelihoods(baseModel, priors, defaultPlatform);
|
||||||
|
|
||||||
gl.setVerbose(VERBOSE);
|
gl.setVerbose(VERBOSE);
|
||||||
gl.setEnableCacheFlag(! disableCache);
|
gl.setEnableCacheFlag(!disableCache);
|
||||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||||
gl.add(pileup, true);
|
gl.add(pileup, true);
|
||||||
gl.validate();
|
gl.validate();
|
||||||
|
|
||||||
return new SSGenotypeCall(context.getLocation(), ref,gl, pileup);
|
return new GenotypeCall(SAMPLE_NAME,context.getLocation(), ref, gl, pileup);
|
||||||
} else {
|
} else {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
@ -138,8 +147,12 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
* @return an empty string
|
* @return an empty string
|
||||||
*/
|
*/
|
||||||
public CallResult reduceInit() {
|
public CallResult reduceInit() {
|
||||||
if ( VARIANTS_FILE != null )
|
if (VARIANTS_FILE != null)
|
||||||
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE));
|
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT,
|
||||||
|
GenomeAnalysisEngine.instance.getSAMFileHeader(),
|
||||||
|
VARIANTS_FILE,
|
||||||
|
"SingleSampleGenotyper",
|
||||||
|
this.getToolkit().getArguments().referenceFile.getName()));
|
||||||
else
|
else
|
||||||
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out));
|
return new CallResult(GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out));
|
||||||
}
|
}
|
||||||
|
|
@ -152,7 +165,7 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
*
|
*
|
||||||
* @return an empty string
|
* @return an empty string
|
||||||
*/
|
*/
|
||||||
public CallResult reduce(SSGenotypeCall call, CallResult sum) {
|
public CallResult reduce(GenotypeCall call, CallResult sum) {
|
||||||
sum.nCalledBases++;
|
sum.nCalledBases++;
|
||||||
|
|
||||||
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
|
if (call != null && (GENOTYPE || call.isVariant(call.getReference()))) {
|
||||||
|
|
@ -160,7 +173,11 @@ public class SingleSampleGenotyper extends LocusWalker<SSGenotypeCall, SingleSam
|
||||||
if (confidence >= LOD_THRESHOLD) {
|
if (confidence >= LOD_THRESHOLD) {
|
||||||
sum.nConfidentCalls++;
|
sum.nConfidentCalls++;
|
||||||
//System.out.printf("Call %s%n", call);
|
//System.out.printf("Call %s%n", call);
|
||||||
sum.writer.addGenotypeCall(call);
|
if (sum.writer.supportsMulitSample()) {
|
||||||
|
sum.writer.addMultiSampleCall(Arrays.asList((Genotype)call));
|
||||||
|
} else {
|
||||||
|
sum.writer.addGenotypeCall(call);
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
sum.nNonConfidentCalls++;
|
sum.nNonConfidentCalls++;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,19 +25,25 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.*;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.*;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.*;
|
import org.broadinstitute.sting.gatk.filters.MissingReadGroupFilter;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.cmdLine.*;
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
import org.broadinstitute.sting.utils.genotype.*;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.*;
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
|
||||||
|
|
||||||
|
|
||||||
@ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class})
|
@ReadFilters({ZeroMappingQualityReadFilter.class, MissingReadGroupFilter.class})
|
||||||
|
|
@ -91,7 +97,9 @@ public class UnifiedGenotyper extends LocusWalker<Integer, Integer> {
|
||||||
|
|
||||||
// create the output writer stream
|
// create the output writer stream
|
||||||
if ( VARIANTS_FILE != null )
|
if ( VARIANTS_FILE != null )
|
||||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE);
|
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||||
|
"UnifiedGenotyper",
|
||||||
|
this.getToolkit().getArguments().referenceFile.getName());
|
||||||
else
|
else
|
||||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out);
|
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
|
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
||||||
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
|
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
|
||||||
|
|
||||||
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
|
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
|
||||||
SSGenotypeCall call = SSG.map(tracker, ref, subContext);
|
GenotypeCall call = SSG.map(tracker, ref, subContext);
|
||||||
|
|
||||||
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||||
if (call != null) {
|
if (call != null) {
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,19 @@
|
||||||
package org.broadinstitute.sting.playground.gatk.walkers;
|
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariationRod;
|
import org.broadinstitute.sting.gatk.refdata.VariationRod;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.SingleSampleGenotyper;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
|
|
||||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||||
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import net.sf.samtools.SAMRecord;
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -73,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
|
||||||
}
|
}
|
||||||
|
|
||||||
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
|
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
|
||||||
SSGenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
|
GenotypeCall parent1 = SSG.map(tracker, ref, parent1_subContext);
|
||||||
|
|
||||||
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
|
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
|
||||||
SSGenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
|
GenotypeCall parent2 = SSG.map(tracker, ref, parent2_subContext);
|
||||||
|
|
||||||
if (!parent1.isVariant(parent1.getReference()) &&
|
if (!parent1.isVariant(parent1.getReference()) &&
|
||||||
parent1.getNegLog10PError() > 5 &&
|
parent1.getNegLog10PError() > 5 &&
|
||||||
|
|
|
||||||
|
|
@ -1,53 +0,0 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
* <p/>
|
|
||||||
* Interface GenotypeCall
|
|
||||||
* <p/>
|
|
||||||
* Genotype call interface, for indicating that a genotype is
|
|
||||||
* also a genotype call.
|
|
||||||
*/
|
|
||||||
public interface GenotypeCall extends Genotype {
|
|
||||||
/**
|
|
||||||
* gets the reference base
|
|
||||||
*
|
|
||||||
* @return the reference base we represent
|
|
||||||
*/
|
|
||||||
public char getReferencebase();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* check to see if this called genotype is a variant, i.e. not homozygous reference
|
|
||||||
* @return true if it's not hom ref, false otherwise
|
|
||||||
*/
|
|
||||||
public boolean isVariation();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
|
||||||
* is located right <i>after</i> the specified location
|
|
||||||
*
|
|
||||||
* @return position on the genome wrapped in GenomeLoc object
|
|
||||||
*/
|
|
||||||
public GenomeLoc getLocation();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the genotypes, sorted in asscending order by their ConfidenceScores (the best
|
|
||||||
* to the worst ConfidenceScores)
|
|
||||||
*
|
|
||||||
* @return a list of the genotypes, sorted by ConfidenceScores
|
|
||||||
*/
|
|
||||||
public List<Genotype> getGenotypes();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the genotypes sorted lexigraphically
|
|
||||||
*
|
|
||||||
* @return a list of the genotypes sorted lexi
|
|
||||||
*/
|
|
||||||
public List<Genotype> getLexigraphicallySortedGenotypes();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
@ -50,4 +52,15 @@ public interface GenotypeWriter {
|
||||||
/** finish writing, closing any open files. */
|
/** finish writing, closing any open files. */
|
||||||
public void close();
|
public void close();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a multi-sample call if we support it
|
||||||
|
* @param genotypes the list of genotypes, that are backed by sample information
|
||||||
|
*/
|
||||||
|
public void addMultiSampleCall(List<Genotype> genotypes);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if we support multisample, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean supportsMulitSample();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import org.broadinstitute.sting.utils.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
|
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
|
||||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -20,7 +21,7 @@ import java.io.PrintStream;
|
||||||
public class GenotypeWriterFactory {
|
public class GenotypeWriterFactory {
|
||||||
/** available genotype writers */
|
/** available genotype writers */
|
||||||
public enum GENOTYPE_FORMAT {
|
public enum GENOTYPE_FORMAT {
|
||||||
GELI, GLF, GFF, TABULAR, GELI_BINARY;
|
GELI, GLF, GFF, TABULAR, GELI_BINARY, VCF;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -30,7 +31,7 @@ public class GenotypeWriterFactory {
|
||||||
* @param destination the destination file
|
* @param destination the destination file
|
||||||
* @return the genotype writer object
|
* @return the genotype writer object
|
||||||
*/
|
*/
|
||||||
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination) {
|
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination, String source, String referenceName ) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
case GLF:
|
case GLF:
|
||||||
return new GLFWriter(header.toString(), destination);
|
return new GLFWriter(header.toString(), destination);
|
||||||
|
|
@ -38,6 +39,8 @@ public class GenotypeWriterFactory {
|
||||||
return new GeliTextWriter(destination);
|
return new GeliTextWriter(destination);
|
||||||
case GELI_BINARY:
|
case GELI_BINARY:
|
||||||
return new GeliAdapter(destination, header);
|
return new GeliAdapter(destination, header);
|
||||||
|
case VCF:
|
||||||
|
return new VCFGenotypeWriterAdapter(source, referenceName, destination);
|
||||||
default:
|
default:
|
||||||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Interface SampleBacked
|
||||||
|
* <p/>
|
||||||
|
* A descriptions should go here. Blame aaron if it's missing.
|
||||||
|
*/
|
||||||
|
public interface SampleBacked {
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return returns the sample name for this genotype
|
||||||
|
*/
|
||||||
|
public String getSampleName();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the filtering string for this genotype
|
||||||
|
* @return a string, representing the genotyping value
|
||||||
|
*/
|
||||||
|
public String getFilteringValue();
|
||||||
|
}
|
||||||
|
|
@ -5,7 +5,6 @@ import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.SSGenotypeCall;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.genotype.*;
|
import org.broadinstitute.sting.utils.genotype.*;
|
||||||
|
|
||||||
|
|
@ -111,7 +110,7 @@ public class GeliAdapter implements GenotypeWriter {
|
||||||
int readDepth = -1;
|
int readDepth = -1;
|
||||||
double nextVrsBest = 0;
|
double nextVrsBest = 0;
|
||||||
double nextVrsRef = 0;
|
double nextVrsRef = 0;
|
||||||
if (!(locus instanceof GenotypesBacked)) {
|
if (!(locus instanceof PosteriorsBacked)) {
|
||||||
posteriors = new double[10];
|
posteriors = new double[10];
|
||||||
Arrays.fill(posteriors, Double.MIN_VALUE);
|
Arrays.fill(posteriors, Double.MIN_VALUE);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -128,8 +127,8 @@ public class GeliAdapter implements GenotypeWriter {
|
||||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SSGenotypeCall call = (SSGenotypeCall)locus;
|
|
||||||
LikelihoodObject obj = new LikelihoodObject(call.getProbabilities(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
|
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
|
||||||
(int)locus.getLocation().getStart(),
|
(int)locus.getLocation().getStart(),
|
||||||
ref,
|
ref,
|
||||||
|
|
@ -155,4 +154,20 @@ public class GeliAdapter implements GenotypeWriter {
|
||||||
this.writer.close();
|
this.writer.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a multi-sample call if we support it
|
||||||
|
*
|
||||||
|
* @param genotypes the list of genotypes, that are backed by sample information
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addMultiSampleCall( List<Genotype> genotypes) {
|
||||||
|
throw new UnsupportedOperationException("Geli binary doesn't support multisample calls");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return true if we support multisample, false otherwise */
|
||||||
|
@Override
|
||||||
|
public boolean supportsMulitSample() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.genotype.geli;
|
package org.broadinstitute.sting.utils.genotype.geli;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
@ -66,17 +65,17 @@ public class GeliTextWriter implements GenotypeWriter {
|
||||||
} else {
|
} else {
|
||||||
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
||||||
double[] lks;
|
double[] lks;
|
||||||
lks = Arrays.copyOf(posteriors,posteriors.length);
|
lks = Arrays.copyOf(posteriors, posteriors.length);
|
||||||
Arrays.sort(lks);
|
Arrays.sort(lks);
|
||||||
nextVrsBest = lks[9] - lks[8];
|
nextVrsBest = lks[9] - lks[8];
|
||||||
if (ref != 'X') {
|
if (ref != 'X') {
|
||||||
int index = (DiploidGenotype.valueOf(Utils.dupString(ref,2)).ordinal());
|
int index = (DiploidGenotype.valueOf(Utils.dupString(ref, 2)).ordinal());
|
||||||
nextVrsRef = lks[9] - posteriors[index];
|
nextVrsRef = lks[9] - posteriors[index];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
double maxMappingQual = 0;
|
double maxMappingQual = 0;
|
||||||
if (locus instanceof ReadBacked) {
|
if (locus instanceof ReadBacked) {
|
||||||
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
|
List<SAMRecord> recs = ((ReadBacked) locus).getReads();
|
||||||
readDepth = recs.size();
|
readDepth = recs.size();
|
||||||
for (SAMRecord rec : recs) {
|
for (SAMRecord rec : recs) {
|
||||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||||
|
|
@ -118,4 +117,20 @@ public class GeliTextWriter implements GenotypeWriter {
|
||||||
public void close() {
|
public void close() {
|
||||||
mWriter.close();
|
mWriter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a multi-sample call if we support it
|
||||||
|
*
|
||||||
|
* @param genotypes the list of genotypes, that are backed by sample information
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addMultiSampleCall(List<Genotype> genotypes) {
|
||||||
|
throw new UnsupportedOperationException("Geli text doesn't support multisample calls");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return true if we support multisample, false otherwise */
|
||||||
|
@Override
|
||||||
|
public boolean supportsMulitSample() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -278,6 +278,21 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a multi-sample call if we support it
|
||||||
|
*
|
||||||
|
* @param genotypes the list of genotypes, that are backed by sample information
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addMultiSampleCall(List<Genotype> genotypes) {
|
||||||
|
throw new UnsupportedOperationException("GLF writer doesn't support multisample calls");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return true if we support multisample, false otherwise */
|
||||||
|
@Override
|
||||||
|
public boolean supportsMulitSample() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -95,4 +96,21 @@ public class TabularLFWriter implements GenotypeWriter {
|
||||||
outStream.close();
|
outStream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a multi-sample call if we support it
|
||||||
|
*
|
||||||
|
* @param genotypes the list of genotypes, that are backed by sample information
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addMultiSampleCall(List<Genotype> genotypes) {
|
||||||
|
throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return true if we support multisample, false otherwise */
|
||||||
|
@Override
|
||||||
|
public boolean supportsMulitSample() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,247 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.ReadBacked;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.SampleBacked;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Class VCFGenotypeWriterAdapter
|
||||||
|
* <p/>
|
||||||
|
* Adapt the VCF writter to the genotype output system
|
||||||
|
*/
|
||||||
|
public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
||||||
|
// our VCF objects
|
||||||
|
private VCFWriter mWriter = null;
|
||||||
|
private VCFHeader mHeader = null;
|
||||||
|
private String mSource;
|
||||||
|
private String mReferenceName;
|
||||||
|
private final Map<String, String> mSampleNames = new HashMap<String, String>();
|
||||||
|
private boolean mInitialized = false;
|
||||||
|
private final File mFile;
|
||||||
|
|
||||||
|
public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo) {
|
||||||
|
mReferenceName = referenceName;
|
||||||
|
mSource = source;
|
||||||
|
mFile = writeTo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* initialize this VCF writer
|
||||||
|
*
|
||||||
|
* @param genotypes the genotypes
|
||||||
|
* @param file the file location to write to
|
||||||
|
*/
|
||||||
|
private void lazyInitialize(List<Genotype> genotypes, File file) {
|
||||||
|
Map<String, String> hInfo = new HashMap<String, String>();
|
||||||
|
List<String> sampleNames = getSampleNames(genotypes);
|
||||||
|
|
||||||
|
// setup the header fields
|
||||||
|
hInfo.put("format", "VCRv3.2");
|
||||||
|
hInfo.put("source", mSource);
|
||||||
|
|
||||||
|
// setup the sample names
|
||||||
|
mHeader = new VCFHeader(hInfo, sampleNames);
|
||||||
|
mWriter = new VCFWriter(mHeader, file);
|
||||||
|
mInitialized = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the samples names from genotype objects
|
||||||
|
*
|
||||||
|
* @param genotypes the genotype list
|
||||||
|
*
|
||||||
|
* @return a list of strings representing the sample names
|
||||||
|
*/
|
||||||
|
private static List<String> getSampleNames(List<Genotype> genotypes) {
|
||||||
|
List<String> strings = new ArrayList<String>();
|
||||||
|
for (Genotype genotype : genotypes) {
|
||||||
|
if (!(genotype instanceof SampleBacked))
|
||||||
|
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||||
|
strings.add(((SampleBacked) genotype).getSampleName());
|
||||||
|
}
|
||||||
|
return strings;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a genotype, given a genotype locus
|
||||||
|
*
|
||||||
|
* @param call the locus to add
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addGenotypeCall(Genotype call) {
|
||||||
|
throw new UnsupportedOperationException("VCF is a multi sample fomat");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a no call to the genotype file, if supported.
|
||||||
|
*
|
||||||
|
* @param position the position to add the no call at
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addNoCall(int position) {
|
||||||
|
throw new UnsupportedOperationException("VCF is a multi sample fomat");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** finish writing, closing any open files. */
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
mWriter.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a multi-sample call if we support it
|
||||||
|
*
|
||||||
|
* @param genotypes the list of genotypes, that are backed by sample information
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addMultiSampleCall(List<Genotype> genotypes) {
|
||||||
|
if (!mInitialized)
|
||||||
|
lazyInitialize(genotypes, mFile);
|
||||||
|
|
||||||
|
|
||||||
|
VCFParamters params = new VCFParamters();
|
||||||
|
params.addFormatItem("GT");
|
||||||
|
|
||||||
|
for (Genotype gtype : genotypes) {
|
||||||
|
// setup the parameters
|
||||||
|
params.setLocations(gtype.getLocation(), gtype.getReference());
|
||||||
|
|
||||||
|
Map<String, String> map = new HashMap<String, String>();
|
||||||
|
if (!(gtype instanceof SampleBacked)) {
|
||||||
|
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculate the RMS mapping qualities and the read depth
|
||||||
|
if (gtype instanceof ReadBacked) {
|
||||||
|
int readDepth = ((ReadBacked) gtype).getReadCount();
|
||||||
|
map.put("RD", String.valueOf(readDepth));
|
||||||
|
params.addFormatItem("RD");
|
||||||
|
}
|
||||||
|
double qual = gtype.getNegLog10PError();
|
||||||
|
map.put("GQ", String.format("%.2f", qual));
|
||||||
|
params.addFormatItem("GQ");
|
||||||
|
|
||||||
|
List<String> alleles = new ArrayList<String>();
|
||||||
|
for (char allele : gtype.getBases().toCharArray()) {
|
||||||
|
alleles.add(String.valueOf(allele));
|
||||||
|
params.addAlternateBase(allele);
|
||||||
|
}
|
||||||
|
|
||||||
|
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
|
||||||
|
alleles,
|
||||||
|
VCFGenotypeRecord.PHASE.UNPHASED,
|
||||||
|
map);
|
||||||
|
params.addGenotypeRecord(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, String> infoFields = new HashMap<String, String>();
|
||||||
|
|
||||||
|
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
|
||||||
|
params.getContig(),
|
||||||
|
params.getPosition(),
|
||||||
|
".",
|
||||||
|
params.getAlternateBases(),
|
||||||
|
0, /* BETTER VALUE HERE */
|
||||||
|
".",
|
||||||
|
infoFields,
|
||||||
|
params.getFormatString(),
|
||||||
|
params.getGenotypesRecords());
|
||||||
|
|
||||||
|
mWriter.addRecord(vcfRecord);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return true if we support multisample, false otherwise */
|
||||||
|
@Override
|
||||||
|
public boolean supportsMulitSample() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* a helper class, which performs a lot of the safety checks on the parameters
|
||||||
|
* we feed to the VCF (like ensuring the same position for each genotype in a call).
|
||||||
|
*/
|
||||||
|
class VCFParamters {
|
||||||
|
private char referenceBase = '0';
|
||||||
|
private int position = 0;
|
||||||
|
private String contig = null;
|
||||||
|
private boolean initialized = false;
|
||||||
|
private List<VCFGenotypeRecord> genotypesRecord = new ArrayList<VCFGenotypeRecord>();
|
||||||
|
private List<String> formatList = new ArrayList<String>();
|
||||||
|
private List<String> alternateBases = new ArrayList<String>();
|
||||||
|
|
||||||
|
public void setLocations(GenomeLoc location, char refBase) {
|
||||||
|
// if we haven't set it up, we initialize the object
|
||||||
|
if (!initialized) {
|
||||||
|
initialized = true;
|
||||||
|
this.contig = location.getContig();
|
||||||
|
this.position = (int)location.getStart();
|
||||||
|
if (location.getStart() != location.getStop()) {
|
||||||
|
throw new IllegalArgumentException("The start and stop locations must be the same");
|
||||||
|
}
|
||||||
|
this.referenceBase = refBase;
|
||||||
|
} else {
|
||||||
|
if (contig.equals(this.contig))
|
||||||
|
throw new IllegalArgumentException("The contig name has to be the same at a single locus");
|
||||||
|
if (position != this.position)
|
||||||
|
throw new IllegalArgumentException("The position has to be the same at a single locus");
|
||||||
|
if (refBase != this.referenceBase)
|
||||||
|
throw new IllegalArgumentException("The reference base name has to be the same at a single locus");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return get the position */
|
||||||
|
public int getPosition() {
|
||||||
|
return position;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return get the contig name */
|
||||||
|
public String getContig() {
|
||||||
|
return contig;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return get the reference base */
|
||||||
|
public char getReferenceBase() {
|
||||||
|
return referenceBase;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addGenotypeRecord(VCFGenotypeRecord record) {
|
||||||
|
this.genotypesRecord.add(record);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addFormatItem(String item) {
|
||||||
|
if (!formatList.contains(item))
|
||||||
|
formatList.add(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addAlternateBase(char base) {
|
||||||
|
if (!alternateBases.contains(String.valueOf(base)) && base != this.getReferenceBase())
|
||||||
|
alternateBases.add(String.valueOf(base));
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getAlternateBases() {
|
||||||
|
return alternateBases;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getFormatString() {
|
||||||
|
return Utils.join(";", formatList);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<VCFGenotypeRecord> getGenotypesRecords() {
|
||||||
|
return genotypesRecord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -216,8 +216,8 @@ public class VCFRecord {
|
||||||
*/
|
*/
|
||||||
public Map<String, String> getInfoValues() {
|
public Map<String, String> getInfoValues() {
|
||||||
if (this.mInfoFields.size() < 1) {
|
if (this.mInfoFields.size() < 1) {
|
||||||
Map<String,String> map = new HashMap<String, String>();
|
Map<String, String> map = new HashMap<String, String>();
|
||||||
map.put(".","");
|
map.put(".", "");
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
return this.mInfoFields;
|
return this.mInfoFields;
|
||||||
|
|
@ -305,6 +305,10 @@ public class VCFRecord {
|
||||||
this.mInfoFields.put(key, value);
|
this.mInfoFields.put(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the generation of a string representation, which is used by the VCF writer
|
||||||
|
* @return a string
|
||||||
|
*/
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder builder = new StringBuilder();
|
StringBuilder builder = new StringBuilder();
|
||||||
|
|
||||||
|
|
@ -340,7 +344,7 @@ public class VCFRecord {
|
||||||
if (rec.getFields().get(s).equals("")) continue;
|
if (rec.getFields().get(s).equals("")) continue;
|
||||||
builder.append(":");
|
builder.append(":");
|
||||||
builder.append(rec.getFields().get(s));
|
builder.append(rec.getFields().get(s));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
|
|
|
||||||
|
|
@ -70,14 +70,16 @@ public class VCFWriter {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/** attempt to close the VCF file */
|
/** attempt to close the VCF file */
|
||||||
public void close() {
|
public void close() {
|
||||||
try {
|
try {
|
||||||
|
mWriter.flush();
|
||||||
mWriter.close();
|
mWriter.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException("Unable to close VCFFile");
|
throw new RuntimeException("Unable to close VCFFile");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ import java.util.List;
|
||||||
* <p/>
|
* <p/>
|
||||||
* test the SS Genotype call class
|
* test the SS Genotype call class
|
||||||
*/
|
*/
|
||||||
public class SSGenotypeCallTest extends BaseTest {
|
public class GenotypeCallTest extends BaseTest {
|
||||||
|
|
||||||
// we need a fake GenotypeLikelihoods class
|
// we need a fake GenotypeLikelihoods class
|
||||||
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
|
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
|
||||||
|
|
@ -79,21 +79,21 @@ public class SSGenotypeCallTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsRefSame() {
|
public void testBestVrsRefSame() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsRef2() {
|
public void testBestVrsRef2() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsRef3() {
|
public void testBestVrsRef3() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -101,21 +101,21 @@ public class SSGenotypeCallTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsNextSame() {
|
public void testBestVrsNextSame() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsNext2() {
|
public void testBestVrsNext2() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call2 = new SSGenotypeCall(myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBestVrsNext3() {
|
public void testBestVrsNext3() {
|
||||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||||
SSGenotypeCall call3 = new SSGenotypeCall(myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||||
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
|
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
Reference in New Issue