2nd stage of the genotyper output refactoring is complete.

Now, all output is generalized and all of the intelligence lies where it is supposed to.
Next stage is syncing up old and new models and making sure we're outputting exactly what we should.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1960 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2009-11-02 22:43:08 +00:00
parent ba67c7f02b
commit 3a33401822
23 changed files with 334 additions and 140 deletions

View File

@ -17,7 +17,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
protected EMGenotypeCalculationModel() {}
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
@ -27,24 +27,35 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
// run the EM calculation
EMOutput overall = runEM(ref, contexts, priors, StratifiedContext.OVERALL);
double lod = overall.getPofD() - overall.getPofNull();
logger.debug(String.format("LOD=%f", lod));
// return a null call if we don't pass the lod cutoff
if ( !ALL_BASE_MODE && lod < LOD_THRESHOLD )
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
// calculate strand score
EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE);
double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
// generate the calls
GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
return new Pair<List<Genotype>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(lod);
}
if ( locusdata instanceof SLODBacked ) {
// calculate strand score
EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE);
double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
logger.debug(String.format("SLOD=%f", lod, strandScore));
((SLODBacked)locusdata).setSLOD(strandScore);
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency(overall.getMAF());
}
}
return new Pair<List<Genotype>, GenotypeLocusData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), locusdata);
}
protected List<Genotype> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {

View File

@ -96,7 +96,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
*
* @return calls
*/
public abstract Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker,
public abstract Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker,
char ref,
AlignmentContext context,
DiploidGenotypePriors priors);

View File

@ -37,7 +37,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
private enum GenotypeType { REF, HET, HOM }
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// keep track of the context for each sample, overall and separated by strand
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
@ -49,24 +49,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
// run joint estimation for the full GL contexts
initializeGenotypeLikelihoods(ref, contexts, StratifiedContext.OVERALL);
calculateAlleleFrequencyPosteriors(ref, context.getLocation());
return createCalls(ref, contexts);
//double lod = overall.getPofD() - overall.getPofNull();
//logger.debug("lod=" + lod);
// calculate strand score
//EMOutput forward = calculate(ref, contexts, priors, StratifiedContext.FORWARD);
//EMOutput reverse = calculate(ref, contexts, priors, StratifiedContext.REVERSE);
//double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
//double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
//logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
//double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
//logger.debug(String.format("LOD=%f, SLOD=%f", lod, strandScore));
// generate the calls
//GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
//return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
return createCalls(ref, contexts, context.getLocation());
}
private void initializeAlleleFrequencies(int numSamples) {
@ -292,7 +275,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
verboseWriter.println();
}
private Pair<List<Genotype>, GenotypeMetaData> createCalls(char ref, HashMap<String, AlignmentContextBySample> contexts) {
private Pair<List<Genotype>, GenotypeLocusData> createCalls(char ref, HashMap<String, AlignmentContextBySample> contexts, GenomeLoc loc) {
// first, find the alt allele with maximum confidence
int indexOfMax = 0;
char baseOfMax = ref;
@ -312,14 +295,35 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
// return a null call if we don't pass the confidence cutoff
if ( !ALL_BASE_MODE && phredScaledConfidence < CONFIDENCE_THRESHOLD )
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
ArrayList<Genotype> calls = new ArrayList<Genotype>();
// TODO -- generate strand score
double strandScore = 0.0;
GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess);
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, loc);
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(phredScaledConfidence);
}
if ( locusdata instanceof SLODBacked ) {
// TODO -- generate strand score
double strandScore = 0.0;
// calculate strand score
//EMOutput forward = runEM(ref, contexts, priors, StratifiedContext.FORWARD);
//EMOutput reverse = runEM(ref, contexts, priors, StratifiedContext.REVERSE);
//double forwardLod = (forward.getPofD() + reverse.getPofNull()) - overall.getPofNull();
//double reverseLod = (reverse.getPofD() + forward.getPofNull()) - overall.getPofNull();
//logger.debug("forward lod=" + forwardLod + ", reverse lod=" + reverseLod);
//double strandScore = Math.max(forwardLod - lod, reverseLod - lod);
//logger.debug(String.format("SLOD=%f", lod, strandScore));
((SLODBacked)locusdata).setSLOD(strandScore);
}
if ( locusdata instanceof AlleleFrequencyBacked ) {
((AlleleFrequencyBacked)locusdata).setAlleleFrequency(bestAFguess);
}
}
for ( String sample : GLs.keySet() ) {
@ -343,6 +347,6 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
calls.add(call);
}
return new Pair<List<Genotype>, GenotypeMetaData>(calls, metadata);
return new Pair<List<Genotype>, GenotypeLocusData>(calls, locusdata);
}
}

View File

@ -24,7 +24,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// overload this method so we can special-case the single sample
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
public Pair<List<Genotype>, GenotypeLocusData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
// we don't actually want to run EM for single samples
if ( samples.size() == 1 ) {
@ -45,18 +45,17 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
// get the genotype likelihoods
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
// are we above the lod threshold for emitting calls (and not in all-bases-mode)?
if ( !ALL_BASE_MODE ) {
double[] posteriors = discoveryGL.second.getPosteriors();
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]];
double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]];
double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype));
// calculate the lod threshold
double[] posteriors = discoveryGL.second.getPosteriors();
Integer sortedPosteriors[] = Utils.SortPermutation(posteriors);
double bestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 1]];
double nextBestGenotype = posteriors[sortedPosteriors[sortedPosteriors.length - 2]];
double refGenotype = posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
double lodConfidence = (GENOTYPE_MODE ? (bestGenotype - nextBestGenotype) : (bestGenotype - refGenotype));
// return a null call
if ( lodConfidence < LOD_THRESHOLD )
return new Pair<List<Genotype>, GenotypeMetaData>(null, null);
// are we above the lod threshold for emitting calls (and not in all-bases mode)?
if ( !ALL_BASE_MODE && lodConfidence < LOD_THRESHOLD ) {
return new Pair<List<Genotype>, GenotypeLocusData>(null, null);
}
// we can now create the genotype call object
@ -75,7 +74,14 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
}
return new Pair<List<Genotype>, GenotypeMetaData>(Arrays.asList(call), null);
GenotypeLocusData locusdata = GenotypeWriterFactory.createSupportedGenotypeLocusData(OUTPUT_FORMAT, ref, context.getLocation());
if ( locusdata != null ) {
if ( locusdata instanceof ConfidenceBacked ) {
((ConfidenceBacked)locusdata).setConfidence(lodConfidence);
}
}
return new Pair<List<Genotype>, GenotypeLocusData>(Arrays.asList(call), locusdata);
}
return super.calculateGenotype(tracker, ref, context, priors);

View File

@ -43,7 +43,7 @@ import org.broadinstitute.sting.utils.cmdLine.ArgumentCollection;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
import org.broadinstitute.sting.utils.genotype.GenotypeLocusData;
import java.io.File;
import java.util.HashSet;
@ -52,7 +52,7 @@ import java.util.ArrayList;
@ReadFilters({ZeroMappingQualityReadFilter.class})
public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeMetaData>, Integer> {
public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeLocusData>, Integer> {
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
@ -149,7 +149,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeM
* @param refContext the reference base
* @param context contextual information around the locus
*/
public Pair<List<Genotype>, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
public Pair<List<Genotype>, GenotypeLocusData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
char ref = Character.toUpperCase(refContext.getBase());
if ( !BaseUtils.isRegularBase(ref) )
return null;
@ -211,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeM
public Integer reduceInit() { return 0; }
public Integer reduce(Pair<List<Genotype>, GenotypeMetaData> value, Integer sum) {
public Integer reduce(Pair<List<Genotype>, GenotypeLocusData> value, Integer sum) {
// can't call the locus because of no coverage
if ( value == null )
return sum;

View File

@ -11,8 +11,6 @@ import org.broadinstitute.sting.utils.genotype.*;
import java.util.*;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.OutputStream;
import java.io.FileNotFoundException;
import net.sf.samtools.SAMRecord;
@ -374,7 +372,7 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
}
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
Pair<List<Genotype>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
Pair<List<Genotype>, GenotypeLocusData> calls = ug.map(tracker,ref,context);
if (calls == null || calls.first == null)
return false;
return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());

View File

@ -80,7 +80,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
Pair<List<Genotype>, GenotypeMetaData> calls = UG.map(tracker, ref, subContext);
Pair<List<Genotype>, GenotypeLocusData> calls = UG.map(tracker, ref, subContext);
if (calls != null && calls.first != null) {
Genotype call = calls.first.get(0);
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";

View File

@ -70,10 +70,10 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
}
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
Pair<List<Genotype>, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext);
Pair<List<Genotype>, GenotypeLocusData> parent1 = UG.map(tracker, ref, parent1_subContext);
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
Pair<List<Genotype>, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext);
Pair<List<Genotype>, GenotypeLocusData> parent2 = UG.map(tracker, ref, parent2_subContext);
if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) {
Genotype parent1call = parent1.first.get(0);

View File

@ -257,7 +257,7 @@ public class ArtificialPoolContext {
public Genotype getGenotype(int group) {
AlignmentContext alicon = this.getAlignmentContext();
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
Pair<List<Genotype>, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
Pair<List<Genotype>, GenotypeLocusData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group]));
return (result.first == null ? null : result.first.get(0));
}

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface AlleleFrequencyBacked
*
* this interface indicates that the genotype is
* backed up by allele frequency information.
*/
public interface AlleleFrequencyBacked {
/**
*
* @return returns the allele frequency for this genotype
*/
public double getAlleleFrequency();
/**
*
* @param frequency the allele frequency for this genotype
*/
public void setAlleleFrequency(double frequency);
}

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface ConfidenceBacked
*
* this interface indicates that the genotype is
* backed up by sample information.
*/
public interface ConfidenceBacked {
/**
*
* @return returns the confidence for this genotype
*/
public double getConfidence();
/**
*
* @param confidence the confidence for this genotype
*/
public void setConfidence(double confidence);
}

View File

@ -0,0 +1,28 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
/**
* @author ebanks
* <p/>
* Class GenotypeLocusData
* <p/>
* represents the locus specific data associated with a genotype object.
*/
public interface GenotypeLocusData {
/**
* get the reference base.
* @return a character, representing the reference base
*/
public char getReference();
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
public GenomeLoc getLocation();
}

View File

@ -1,61 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* <p/>
* Class GenotypeMetaData
* <p/>
* represents the meta data for a genotype object.
*/
public class GenotypeMetaData {
// the discovery lod score
private double mLOD;
// the strand score lod
private double mSLOD;
// the allele frequency
private double mAlleleFrequency;
/**
* create a basic genotype meta data pbject, given the following fields
*
* @param discoveryLOD the discovery lod
* @param strandLOD the strand score lod
* @param alleleFrequency the allele frequency
*/
public GenotypeMetaData(double discoveryLOD, double strandLOD, double alleleFrequency) {
mLOD = discoveryLOD;
mSLOD = strandLOD;
mAlleleFrequency = alleleFrequency;
}
/**
* get the discovery lod
*
* @return the discovery lod
*/
public double getLOD() {
return mLOD;
}
/**
* get the strand lod
*
* @return the strand lod
*/
public double getSLOD() {
return mSLOD;
}
/**
* get the allele frequency
*
* @return the allele frequency
*/
public double getAlleleFrequency() {
return mAlleleFrequency;
}
}

View File

@ -56,7 +56,7 @@ public interface GenotypeWriter {
* add a multi-sample call if we support it
* @param genotypes the list of genotypes, that are backed by sample information
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata);
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata);
/**
* @return true if we support multisample, false otherwise

View File

@ -30,6 +30,9 @@ public class GenotypeWriterFactory {
* @param format the format
* @param header the sam file header
* @param destination the destination file
* @param source the source
* @param referenceName the ref name
* @param sampleNames the sample names
* @return the genotype writer object
*/
public static GenotypeWriter create(GENOTYPE_FORMAT format,
@ -73,6 +76,8 @@ public class GenotypeWriterFactory {
/**
* create a genotype call
* @param format the format
* @param ref the reference base
* @param loc the location
* @return an unpopulated genotype call object
*/
public static Genotype createSupportedCall(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
@ -88,4 +93,25 @@ public class GenotypeWriterFactory {
throw new StingException("Genotype format " + format.toString() + " is not implemented");
}
}
/**
* create a genotype locus data object
* @param format the format
* @param ref the reference base
* @param loc the location
* @return an unpopulated genotype locus data object
*/
public static GenotypeLocusData createSupportedGenotypeLocusData(GENOTYPE_FORMAT format, char ref, GenomeLoc loc) {
switch (format) {
case VCF:
return new VCFGenotypeLocusData(ref, loc);
case GELI:
case GELI_BINARY:
return null;
case GLF:
return null;
default:
throw new StingException("Genotype format " + format.toString() + " is not implemented");
}
}
}

View File

@ -0,0 +1,24 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author ebanks
* Interface SLODBacked
*
* this interface indicates that the genotype is
* backed up by strand lod information.
*/
public interface SLODBacked {
/**
*
* @return returns the strand lod for this genotype
*/
public double getSLOD();
/**
*
* @param slod the strand lod for this genotype
*/
public void setSLOD(double slod);
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* Interface PosteriorsBacked
* Interface SampleBacked
*
* this interface indicates that the genotype is
* backed up by sample information.

View File

@ -147,7 +147,7 @@ public class GeliAdapter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall( List<Genotype> genotypes, GenotypeMetaData metadata) {
public void addMultiSampleCall( List<Genotype> genotypes, GenotypeLocusData metadata) {
throw new UnsupportedOperationException("Geli binary doesn't support multisample calls");
}

View File

@ -113,7 +113,7 @@ public class GeliTextWriter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata) {
throw new UnsupportedOperationException("Geli text doesn't support multisample calls");
}

View File

@ -287,7 +287,7 @@ public class GLFWriter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata) {
throw new UnsupportedOperationException("GLF writer doesn't support multisample calls");
}

View File

@ -104,7 +104,7 @@ public class TabularLFWriter implements GenotypeWriter {
* @param genotypes the list of genotypes, that are backed by sample information
*/
@Override
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData metadata) {
throw new UnsupportedOperationException("Tabular LF doesn't support multisample calls");
}

View File

@ -0,0 +1,108 @@
package org.broadinstitute.sting.utils.genotype.vcf;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.genotype.*;
/**
* @author ebanks
* <p/>
* Class VCFGenotypeLocusData
* <p/>
* represents the meta data for a genotype object.
*/
public class VCFGenotypeLocusData implements GenotypeLocusData, ConfidenceBacked, SLODBacked, AlleleFrequencyBacked {
// the discovery lod score
private double mConfidence = 0.0;
// the strand score lod
private double mSLOD = 0.0;
// the allele frequency
private double mAlleleFrequency = 0.0;
// the location
private GenomeLoc mLoc;
// the ref base
private char mRefBase;
/**
* create a basic genotype meta data pbject, given the following fields
*
* @param ref the reference base
* @param loc the locus
*/
public VCFGenotypeLocusData(char ref, GenomeLoc loc) {
mRefBase = ref;
mLoc = loc;
}
/**
* get the reference base.
* @return a character, representing the reference base
*/
public char getReference() {
return mRefBase;
}
/**
* get the genotype's location
*
* @return a GenomeLoc representing the location
*/
public GenomeLoc getLocation() {
return mLoc;
}
/**
* get the confidence
*
* @return the confidence
*/
public double getConfidence() {
return mConfidence;
}
/**
*
* @param confidence the confidence for this genotype
*/
public void setConfidence(double confidence) {
mConfidence = confidence;
}
/**
* get the strand lod
*
* @return the strand lod
*/
public double getSLOD() {
return mSLOD;
}
/**
*
* @param slod the strand lod for this genotype
*/
public void setSLOD(double slod) {
mSLOD = slod;
}
/**
* get the allele frequency
*
* @return the allele frequency
*/
public double getAlleleFrequency() {
return mAlleleFrequency;
}
/**
*
* @param frequency the allele frequency for this genotype
*/
public void setAlleleFrequency(double frequency) {
mAlleleFrequency = frequency;
}
}

View File

@ -95,7 +95,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
* @param call the locus to add
*/
public void addGenotypeCall(Genotype call) {
throw new UnsupportedOperationException("VCF calls require metadata; use the addMultiSampleCall method instead");
throw new UnsupportedOperationException("VCF calls require locusdata; use the addMultiSampleCall method instead");
}
/**
@ -118,10 +118,12 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
*
* @param genotypes the list of genotypes
*/
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeLocusData locusdata) {
if (!mInitialized)
lazyInitialize(mFile, mStream);
if ( locusdata != null && !(locusdata instanceof VCFGenotypeLocusData) )
throw new IllegalArgumentException("Only VCFGenotypeLocusData objects should be passed in to the VCF writers");
VCFParameters params = new VCFParameters();
params.addFormatItem("GT");
@ -154,9 +156,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
throw new IllegalArgumentException("Genotype array passed to VCFGenotypeWriterAdapter contained Genotypes not in the VCF header");
}
Map<String, String> infoFields = getInfoFields(metadata, params);
Map<String, String> infoFields = getInfoFields((VCFGenotypeLocusData)locusdata, params);
double qual = (metadata == null) ? 0 : metadata.getLOD();
double qual = (locusdata == null) ? 0 : ((VCFGenotypeLocusData)locusdata).getConfidence();
// maintain 0-99 based Q-scores
qual = Math.min(qual, 99);
qual = Math.max(qual, 0);
@ -178,16 +180,16 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
/**
* get the information fields of the VCF record, given the meta data and parameters
*
* @param metadata the metadata associated with this multi sample call
* @param locusdata the metadata associated with this multi sample call
* @param params the parameters
*
* @return a mapping of info field to value
*/
private Map<String, String> getInfoFields(GenotypeMetaData metadata, VCFParameters params) {
private Map<String, String> getInfoFields(VCFGenotypeLocusData locusdata, VCFParameters params) {
Map<String, String> infoFields = new HashMap<String, String>();
if (metadata != null) {
infoFields.put("SB", String.format("%.2f", metadata.getSLOD()));
infoFields.put("AF", String.format("%.2f", metadata.getAlleleFrequency()));
if ( locusdata != null ) {
infoFields.put("SB", String.format("%.2f", locusdata.getSLOD()));
infoFields.put("AF", String.format("%.2f", locusdata.getAlleleFrequency()));
}
infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size()));
return infoFields;