Sweeping changes to the genotype output system, as per several discussions with Matt & Aaron.
Some things still need to be changed, but it will entail some more design decisions first (which means I get to bug M&A again tomorrow!). git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1930 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
86573177d1
commit
3091443dc7
|
|
@ -18,7 +18,7 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
|
||||
protected EMGenotypeCalculationModel() {}
|
||||
|
||||
public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
||||
// keep track of the context for each sample, overall and separated by strand
|
||||
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
|
||||
|
|
@ -42,26 +42,42 @@ public abstract class EMGenotypeCalculationModel extends GenotypeCalculationMode
|
|||
|
||||
// generate the calls
|
||||
GenotypeMetaData metadata = new GenotypeMetaData(lod, strandScore, overall.getMAF());
|
||||
return new Pair<List<GenotypeCall>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(genotypeCallsFromGenotypeLikelihoods(overall, ref, contexts), metadata);
|
||||
}
|
||||
|
||||
protected List<GenotypeCall> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
||||
protected List<Genotype> genotypeCallsFromGenotypeLikelihoods(EMOutput results, char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
||||
HashMap<String, GenotypeLikelihoods> GLs = results.getGenotypeLikelihoods();
|
||||
|
||||
ArrayList<GenotypeCall> calls = new ArrayList<GenotypeCall>();
|
||||
ArrayList<Genotype> calls = new ArrayList<Genotype>();
|
||||
int variantCalls = 0;
|
||||
|
||||
for ( String sample : GLs.keySet() ) {
|
||||
// get the pileup
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
pileup.setIncludeDeletionsInPileupString(true);
|
||||
|
||||
// create the call
|
||||
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup);
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT);
|
||||
call.setReference(ref);
|
||||
|
||||
// get the context
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||
call.setLocation(context.getLocation());
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setReads(context.getReads());
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
((SampleBacked)call).setSampleName(sample);
|
||||
}
|
||||
if ( call instanceof LikelihoodsBacked ) {
|
||||
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
|
||||
}
|
||||
|
||||
calls.add(call);
|
||||
|
||||
// increment the variant count if it's non-ref
|
||||
if ( call.isVariant() )
|
||||
if ( call.isVariant(ref) )
|
||||
variantCalls++;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,9 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
|
|
@ -30,6 +30,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
protected Logger logger;
|
||||
protected double heterozygosity;
|
||||
protected EmpiricalSubstitutionGenotypeLikelihoods.SequencerPlatform defaultPlatform;
|
||||
protected GenotypeWriterFactory.GENOTYPE_FORMAT OUTPUT_FORMAT;
|
||||
protected boolean ALL_BASE_MODE;
|
||||
protected boolean GENOTYPE_MODE;
|
||||
protected boolean POOLED_INPUT;
|
||||
|
|
@ -62,6 +63,7 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
baseModel = UAC.baseModel;
|
||||
heterozygosity = UAC.heterozygosity;
|
||||
defaultPlatform = UAC.defaultPlatform;
|
||||
OUTPUT_FORMAT = UAC.VAR_FORMAT;
|
||||
ALL_BASE_MODE = UAC.ALL_BASES;
|
||||
GENOTYPE_MODE = UAC.GENOTYPE;
|
||||
POOLED_INPUT = UAC.POOLED;
|
||||
|
|
@ -80,12 +82,6 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
}
|
||||
}
|
||||
|
||||
public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
|
||||
// just close and re-initialize
|
||||
close();
|
||||
initialize(this.samples, this.logger, UAC);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if ( verboseWriter != null )
|
||||
verboseWriter.close();
|
||||
|
|
@ -100,10 +96,10 @@ public abstract class GenotypeCalculationModel implements Cloneable {
|
|||
*
|
||||
* @return calls
|
||||
*/
|
||||
public abstract Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker,
|
||||
char ref,
|
||||
AlignmentContext context,
|
||||
DiploidGenotypePriors priors);
|
||||
public abstract Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker,
|
||||
char ref,
|
||||
AlignmentContext context,
|
||||
DiploidGenotypePriors priors);
|
||||
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,302 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeCall
|
||||
* <p/>
|
||||
* The implementation of the genotype interface, which contains
|
||||
* extra information for the various genotype outputs
|
||||
*/
|
||||
public class GenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked, PosteriorsBacked, SampleBacked {
|
||||
private final char mRefBase;
|
||||
private final GenotypeLikelihoods mGenotypeLikelihoods;
|
||||
|
||||
// the next two values are filled in on-demand. Default to -1 since they can never be negitive
|
||||
private final GenomeLoc mLocation;
|
||||
private final ReadBackedPileup mPileup;
|
||||
|
||||
// if this is null, we were constructed with the intention that we'd represent the best genotype
|
||||
private DiploidGenotype mGenotype = null;
|
||||
|
||||
// the reference genotype and the next best genotype, lazy loaded
|
||||
private DiploidGenotype mRefGenotype = null;
|
||||
private DiploidGenotype mNextGenotype = null;
|
||||
|
||||
// the sample name, used to propulate the SampleBacked interface
|
||||
private String mSampleName;
|
||||
|
||||
/**
|
||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||
*
|
||||
* @param sampleName the sample name
|
||||
* @param location the location we're working with
|
||||
* @param refBase the ref base
|
||||
* @param gtlh the genotype likelihoods object
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
*/
|
||||
public GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup) {
|
||||
mSampleName = sampleName;
|
||||
mRefBase = Character.toUpperCase(refBase);
|
||||
mGenotypeLikelihoods = gtlh;
|
||||
mLocation = location;
|
||||
mPileup = pileup;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||
*
|
||||
* @param sampleName the sample name
|
||||
* @param location the location we're working with
|
||||
* @param refBase the ref base
|
||||
* @param gtlh the genotype likelihoods object
|
||||
* @param pileup the pile-up of reads at the specified locus
|
||||
*/
|
||||
GenotypeCall(String sampleName, GenomeLoc location, char refBase, GenotypeLikelihoods gtlh, ReadBackedPileup pileup, DiploidGenotype genotype) {
|
||||
mSampleName = sampleName;
|
||||
mRefBase = Character.toUpperCase(refBase);
|
||||
mGenotypeLikelihoods = gtlh;
|
||||
mLocation = location;
|
||||
mGenotype = genotype;
|
||||
mPileup = pileup;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
lazyEval();
|
||||
|
||||
if (other == null)
|
||||
return false;
|
||||
if (other instanceof GenotypeCall) {
|
||||
GenotypeCall otherCall = (GenotypeCall) other;
|
||||
|
||||
if (!this.mGenotypeLikelihoods.equals(otherCall.mGenotypeLikelihoods)) {
|
||||
return false;
|
||||
}
|
||||
if (!this.mGenotype.equals(otherCall.mGenotype))
|
||||
return false;
|
||||
return (this.mRefBase == otherCall.mRefBase) &&
|
||||
this.mPileup.equals(mPileup);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
lazyEval();
|
||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError = %.2f, likelihoods=%s",
|
||||
getLocation(), mGenotype, mRefGenotype, mRefBase, mPileup.getReads().size(),
|
||||
getNegLog10PError(), Arrays.toString(mGenotypeLikelihoods.getLikelihoods()));
|
||||
}
|
||||
|
||||
private void lazyEval() {
|
||||
// us
|
||||
if (mGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
}
|
||||
|
||||
// our comparison
|
||||
if (mRefGenotype == null) {
|
||||
mRefGenotype = DiploidGenotype.valueOf(Utils.dupString(this.getReference(), 2));
|
||||
}
|
||||
if (mNextGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mGenotypeLikelihoods.getPosteriors());
|
||||
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the confidence we have
|
||||
*
|
||||
* @return get the one minus error value
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getNextBest()));
|
||||
}
|
||||
|
||||
/** get the best genotype */
|
||||
private DiploidGenotype getBestGenotype() {
|
||||
lazyEval();
|
||||
return mGenotype;
|
||||
}
|
||||
|
||||
/** get the alternate genotype */
|
||||
private DiploidGenotype getNextBest() {
|
||||
lazyEval();
|
||||
return mNextGenotype;
|
||||
}
|
||||
|
||||
/** get the alternate genotype */
|
||||
private DiploidGenotype getRefGenotype() {
|
||||
lazyEval();
|
||||
return mRefGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() {
|
||||
return getBestGenotype().toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
public boolean isHom() {
|
||||
return getBestGenotype().isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return this.mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant(char ref) {
|
||||
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a variant? (non-ref)
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
return isVariant(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Variation toVariation() {
|
||||
double bestRef = Math.abs(mGenotypeLikelihoods.getPosterior(getBestGenotype()) - mGenotypeLikelihoods.getPosterior(getRefGenotype()));
|
||||
return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* return the likelihoods as a double array, in lexographic order
|
||||
*
|
||||
* @return the likelihoods
|
||||
*/
|
||||
public double[] getProbabilities() {
|
||||
return this.mGenotypeLikelihoods.getPosteriors();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the SAM records that back this genotype call
|
||||
*
|
||||
* @return a list of SAM records
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
return this.mPileup.getReads();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the count of reads
|
||||
*
|
||||
* @return the number of reads we're backed by
|
||||
*/
|
||||
public int getReadCount() {
|
||||
return this.mPileup.getReads().size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference character
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public char getReference() {
|
||||
return this.mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihood information for this call
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double[] getLikelihoods() {
|
||||
return this.mGenotypeLikelihoods.getLikelihoods();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the posteriors information for this call
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double[] getPosteriors() {
|
||||
return this.mGenotypeLikelihoods.getPosteriors();
|
||||
}
|
||||
|
||||
/** @return returns the sample name for this genotype */
|
||||
public String getSampleName() {
|
||||
return this.mSampleName;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the filtering string for this genotype
|
||||
*
|
||||
* @return a string, representing the genotyping value
|
||||
*/
|
||||
public String getFilteringValue() {
|
||||
return "0";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
private enum GenotypeType { REF, HET, HOM }
|
||||
|
||||
|
||||
public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
||||
// keep track of the context for each sample, overall and separated by strand
|
||||
HashMap<String, AlignmentContextBySample> contexts = splitContextBySample(context);
|
||||
|
|
@ -292,7 +292,7 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
verboseWriter.println();
|
||||
}
|
||||
|
||||
private Pair<List<GenotypeCall>, GenotypeMetaData> createCalls(char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
||||
private Pair<List<Genotype>, GenotypeMetaData> createCalls(char ref, HashMap<String, AlignmentContextBySample> contexts) {
|
||||
// first, find the alt allele with maximum confidence
|
||||
int indexOfMax = 0;
|
||||
char baseOfMax = ref;
|
||||
|
|
@ -308,9 +308,9 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
}
|
||||
}
|
||||
double phredScaledConfidence = -10.0 * Math.log10(alleleFrequencyPosteriors[indexOfMax][0]);
|
||||
double bestAFguess = (double)findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
|
||||
double bestAFguess = findMaxEntry(alleleFrequencyPosteriors[indexOfMax]).second / (double)(frequencyEstimationPoints-1);
|
||||
|
||||
ArrayList<GenotypeCall> calls = new ArrayList<GenotypeCall>();
|
||||
ArrayList<Genotype> calls = new ArrayList<Genotype>();
|
||||
// TODO -- generate strand score
|
||||
double strandScore = 0.0;
|
||||
GenotypeMetaData metadata = new GenotypeMetaData(phredScaledConfidence, strandScore, bestAFguess);
|
||||
|
|
@ -319,21 +319,31 @@ public class JointEstimateGenotypeCalculationModel extends GenotypeCalculationMo
|
|||
if ( phredScaledConfidence >= CONFIDENCE_THRESHOLD || ALL_BASE_MODE ) {
|
||||
|
||||
for ( String sample : GLs.keySet() ) {
|
||||
// get the pileup
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
pileup.setIncludeDeletionsInPileupString(true);
|
||||
// TODO -- fix GenotypeCall code so that each call doesn't need its own pileup
|
||||
|
||||
// create the call
|
||||
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, GLs.get(sample), pileup);
|
||||
calls.add(call);
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT);
|
||||
call.setReference(ref);
|
||||
|
||||
// TODO -- fix GenotypeCall code so that UG tells it which genotypes to use
|
||||
// TODO -- all of the intelligence for making calls should be in UG
|
||||
AlignmentContext context = contexts.get(sample).getContext(StratifiedContext.OVERALL);
|
||||
call.setLocation(context.getLocation());
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setReads(context.getReads());
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
((SampleBacked)call).setSampleName(sample);
|
||||
}
|
||||
if ( call instanceof LikelihoodsBacked ) {
|
||||
((LikelihoodsBacked)call).setLikelihoods(GLs.get(sample).getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(GLs.get(sample).getPosteriors());
|
||||
}
|
||||
|
||||
calls.add(call);
|
||||
}
|
||||
}
|
||||
|
||||
return new Pair<List<GenotypeCall>, GenotypeMetaData>(calls, metadata);
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(calls, metadata);
|
||||
}
|
||||
}
|
||||
|
|
@ -3,8 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -25,7 +24,7 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
|
||||
|
||||
// overload this method so we can special-case the single sample
|
||||
public Pair<List<GenotypeCall>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
public Pair<List<Genotype>, GenotypeMetaData> calculateGenotype(RefMetaDataTracker tracker, char ref, AlignmentContext context, DiploidGenotypePriors priors) {
|
||||
|
||||
// we don't actually want to run EM for single samples
|
||||
if ( samples.size() == 1 ) {
|
||||
|
|
@ -44,9 +43,27 @@ public class PointEstimateGenotypeCalculationModel extends EMGenotypeCalculation
|
|||
return null;
|
||||
|
||||
// create the genotype call object
|
||||
// create the call
|
||||
Genotype call = GenotypeWriterFactory.createSupportedCall(OUTPUT_FORMAT);
|
||||
call.setReference(ref);
|
||||
call.setLocation(context.getLocation());
|
||||
|
||||
Pair<ReadBackedPileup, GenotypeLikelihoods> discoveryGL = getSingleSampleLikelihoods(ref, sampleContext, priors, StratifiedContext.OVERALL);
|
||||
GenotypeCall call = new GenotypeCall(sample, context.getLocation(), ref, discoveryGL.second, discoveryGL.first);
|
||||
return new Pair<List<GenotypeCall>, GenotypeMetaData>(Arrays.asList(call), null);
|
||||
|
||||
if ( call instanceof ReadBacked ) {
|
||||
((ReadBacked)call).setReads(discoveryGL.first.getReads());
|
||||
}
|
||||
if ( call instanceof SampleBacked ) {
|
||||
((SampleBacked)call).setSampleName(sample);
|
||||
}
|
||||
if ( call instanceof LikelihoodsBacked ) {
|
||||
((LikelihoodsBacked)call).setLikelihoods(discoveryGL.second.getLikelihoods());
|
||||
}
|
||||
if ( call instanceof PosteriorsBacked ) {
|
||||
((PosteriorsBacked)call).setPosteriors(discoveryGL.second.getPosteriors());
|
||||
}
|
||||
|
||||
return new Pair<List<Genotype>, GenotypeMetaData>(Arrays.asList(call), null);
|
||||
}
|
||||
|
||||
return super.calculateGenotype(tracker, ref, context, priors);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
|
||||
|
||||
public class UnifiedArgumentCollection {
|
||||
|
|
@ -48,6 +49,9 @@ public class UnifiedArgumentCollection {
|
|||
|
||||
|
||||
// control the output
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "Format to be used to represent variants; default is VCF", required = false)
|
||||
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confident genotypes or just the variants?", required = false)
|
||||
public boolean GENOTYPE = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ import java.util.ArrayList;
|
|||
|
||||
|
||||
@ReadFilters({ZeroMappingQualityReadFilter.class})
|
||||
public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, GenotypeMetaData>, Integer> {
|
||||
public class UnifiedGenotyper extends LocusWalker<Pair<List<Genotype>, GenotypeMetaData>, Integer> {
|
||||
|
||||
@ArgumentCollection private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
|
||||
|
||||
|
|
@ -60,9 +60,6 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = false)
|
||||
public File VARIANTS_FILE = null;
|
||||
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File format to be used; default is VCF", required = false)
|
||||
public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.VCF;
|
||||
|
||||
|
||||
// the model used for calculating genotypes
|
||||
private GenotypeCalculationModel gcm;
|
||||
|
|
@ -88,9 +85,13 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
* To be used with walkers that call the UnifiedGenotyper's map function
|
||||
* and consequently can't set these arguments on the command-line
|
||||
*
|
||||
* @param UAC the UnifiedArgumentCollection
|
||||
*
|
||||
**/
|
||||
public void setUnifiedArgumentCollection(UnifiedArgumentCollection UAC) {
|
||||
gcm.setUnifiedArgumentCollection(UAC);
|
||||
gcm.close();
|
||||
this.UAC = UAC;
|
||||
initialize();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -130,12 +131,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
|
||||
// create the output writer stream
|
||||
if ( VARIANTS_FILE != null )
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE,
|
||||
"UnifiedGenotyper",
|
||||
this.getToolkit().getArguments().referenceFile.getName(),
|
||||
samples);
|
||||
else
|
||||
writer = GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
||||
writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper",
|
||||
this.getToolkit().getArguments().referenceFile.getName(),
|
||||
samples);
|
||||
callsMetrics = new CallMetrics();
|
||||
|
|
@ -148,7 +149,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
* @param refContext the reference base
|
||||
* @param context contextual information around the locus
|
||||
*/
|
||||
public Pair<List<GenotypeCall>, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||
public Pair<List<Genotype>, GenotypeMetaData> map(RefMetaDataTracker tracker, ReferenceContext refContext, AlignmentContext context) {
|
||||
char ref = Character.toUpperCase(refContext.getBase());
|
||||
if ( !BaseUtils.isRegularBase(ref) )
|
||||
return null;
|
||||
|
|
@ -210,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Pair<List<GenotypeCall>, GenotypeMetaData> value, Integer sum) {
|
||||
public Integer reduce(Pair<List<Genotype>, GenotypeMetaData> value, Integer sum) {
|
||||
if ( value == null || value.first == null )
|
||||
return sum;
|
||||
|
||||
|
|
@ -221,7 +222,7 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
|
||||
// special-case for single-sample using PointEstimate model
|
||||
if ( value.second == null ) {
|
||||
GenotypeCall call = value.first.get(0);
|
||||
Genotype call = value.first.get(0);
|
||||
if ( UAC.GENOTYPE || call.isVariant(call.getReference()) ) {
|
||||
double confidence = (UAC.GENOTYPE ? call.getNegLog10PError() : call.toVariation().getNegLog10PError());
|
||||
if ( confidence >= UAC.LOD_THRESHOLD ) {
|
||||
|
|
@ -236,13 +237,12 @@ public class UnifiedGenotyper extends LocusWalker<Pair<List<GenotypeCall>, Genot
|
|||
// use multi-sample mode if we have multiple samples or the output type allows it
|
||||
else if ( writer.supportsMultiSample() || samples.size() > 1 ) {
|
||||
|
||||
// annoying hack to get around Java generics
|
||||
ArrayList<Genotype> callList = new ArrayList<Genotype>();
|
||||
for ( GenotypeCall call : value.first )
|
||||
callList.add(call);
|
||||
|
||||
callsMetrics.nConfidentCalls++;
|
||||
writer.addMultiSampleCall(callList, value.second);
|
||||
if ( UAC.CONFIDENCE_THRESHOLD <= value.second.getLOD() && UAC.LOD_THRESHOLD <= value.second.getLOD() ) {
|
||||
callsMetrics.nConfidentCalls++;
|
||||
writer.addMultiSampleCall(value.first, value.second);
|
||||
} else {
|
||||
callsMetrics.nNonConfidentCalls++;
|
||||
}
|
||||
}
|
||||
|
||||
// otherwise, use single sample mode
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -373,10 +373,10 @@ public class BaseTransitionTableCalculatorJavaWalker extends LocusWalker<Set<Bas
|
|||
}
|
||||
|
||||
public boolean baseIsConfidentRef( RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context ) {
|
||||
Pair<List<GenotypeCall>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
|
||||
Pair<List<Genotype>, GenotypeMetaData> calls = ug.map(tracker,ref,context);
|
||||
if (calls == null)
|
||||
return false;
|
||||
return (! calls.first.get(0).isVariant()) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());
|
||||
return (! calls.first.get(0).isVariant(ref.getBase())) && calls.first.get(0).getNegLog10PError() > confidentRefThreshold && BaseUtils.isRegularBase(ref.getBase());
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.RodGenotypeChipAsGFF;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -81,9 +80,9 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
|
||||
|
||||
AlignmentContext subContext = new AlignmentContext(context.getLocation(), sub_reads, sub_offsets);
|
||||
Pair<List<GenotypeCall>, GenotypeMetaData> calls = UG.map(tracker, ref, subContext);
|
||||
Pair<List<Genotype>, GenotypeMetaData> calls = UG.map(tracker, ref, subContext);
|
||||
if (calls != null && calls.first != null) {
|
||||
GenotypeCall call = calls.first.get(0);
|
||||
Genotype call = calls.first.get(0);
|
||||
String callType = (call.isVariant(call.getReference())) ? ((call.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,10 +7,8 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariationRod;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeCall;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper;
|
||||
import org.broadinstitute.sting.utils.genotype.Variation;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeMetaData;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -72,14 +70,14 @@ public class DeNovoSNPWalker extends RefWalker<String, Integer>{
|
|||
}
|
||||
|
||||
AlignmentContext parent1_subContext = new AlignmentContext(context.getLocation(), parent1_reads, parent1_offsets);
|
||||
Pair<List<GenotypeCall>, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext);
|
||||
Pair<List<Genotype>, GenotypeMetaData> parent1 = UG.map(tracker, ref, parent1_subContext);
|
||||
|
||||
AlignmentContext parent2_subContext = new AlignmentContext(context.getLocation(), parent2_reads, parent2_offsets);
|
||||
Pair<List<GenotypeCall>, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext);
|
||||
Pair<List<Genotype>, GenotypeMetaData> parent2 = UG.map(tracker, ref, parent2_subContext);
|
||||
|
||||
if ( parent1 != null && parent1.first != null && parent2 != null && parent2.first != null ) {
|
||||
GenotypeCall parent1call = parent1.first.get(0);
|
||||
GenotypeCall parent2call = parent2.first.get(0);
|
||||
Genotype parent1call = parent1.first.get(0);
|
||||
Genotype parent2call = parent2.first.get(0);
|
||||
|
||||
if (!parent1call.isVariant(parent1call.getReference()) &&
|
||||
parent1call.getNegLog10PError() > 5 &&
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ public class ArtificialPoolContext {
|
|||
public Genotype getGenotype(int group) {
|
||||
AlignmentContext alicon = this.getAlignmentContext();
|
||||
Pair<List<SAMRecord>[],List<Integer>[]> byGroupSplitPair = this.splitByGroup(alicon.getReads(),alicon.getOffsets());
|
||||
Pair<List<GenotypeCall>, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
|
||||
Pair<List<Genotype>, GenotypeMetaData> result = ug.map(this.getRefMetaDataTracker(),this.getReferenceContext(),
|
||||
new AlignmentContext(this.getAlignmentContext().getLocation(), byGroupSplitPair.first[group],byGroupSplitPair.second[group]));
|
||||
return (result.first == null ? null : result.first.get(0));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,6 +113,11 @@ public class BasicGenotype implements Genotype {
|
|||
return mLocation;
|
||||
}
|
||||
|
||||
// set the location
|
||||
public void setLocation(GenomeLoc loc) {
|
||||
mLocation = loc;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
|
|
@ -145,6 +150,11 @@ public class BasicGenotype implements Genotype {
|
|||
return mRef;
|
||||
}
|
||||
|
||||
// set the reference base
|
||||
public void setReference(char refBase) {
|
||||
mRef = Character.toUpperCase(refBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
|
|
|
|||
|
|
@ -52,6 +52,12 @@ public interface Genotype {
|
|||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* set the location.
|
||||
* @param loc the location
|
||||
*/
|
||||
public void setLocation(GenomeLoc loc);
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
|
|
@ -74,6 +80,12 @@ public interface Genotype {
|
|||
*/
|
||||
public char getReference();
|
||||
|
||||
/**
|
||||
* set the reference base.
|
||||
* @param ref the ref base
|
||||
*/
|
||||
public void setReference(char ref);
|
||||
|
||||
/**
|
||||
* return this genotype as a variant
|
||||
*
|
||||
|
|
|
|||
|
|
@ -2,14 +2,12 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriterAdapter;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.*;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.*;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
|
|
@ -70,4 +68,23 @@ public class GenotypeWriterFactory {
|
|||
throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype call
|
||||
* @param format the format
|
||||
* @return an unpopulated genotype call object
|
||||
*/
|
||||
public static Genotype createSupportedCall(GENOTYPE_FORMAT format) {
|
||||
switch (format) {
|
||||
case VCF:
|
||||
return new VCFGenotypeCall();
|
||||
case GELI:
|
||||
case GELI_BINARY:
|
||||
return new GeliGenotypeCall();
|
||||
case GLF:
|
||||
return new GLFGenotypeCall();
|
||||
default:
|
||||
throw new StingException("Genotype format " + format.toString() + " is not implemented");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,8 +10,15 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
public interface LikelihoodsBacked {
|
||||
|
||||
/**
|
||||
* get the likelihood information for this
|
||||
* @return
|
||||
*
|
||||
* @return the likelihood information for this genotype
|
||||
*/
|
||||
public double[] getLikelihoods();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param likelihoods the likelihoods for this genotype
|
||||
*/
|
||||
public void setLikelihoods(double[] likelihoods);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,17 +2,24 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface PosteriorsBacked
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
* Interface PosteriorsBacked
|
||||
*
|
||||
* this interface indicates that the genotype is
|
||||
* backed up by genotype posterior information.
|
||||
*/
|
||||
public interface PosteriorsBacked {
|
||||
|
||||
/**
|
||||
* get the likelihood information for this
|
||||
* @return
|
||||
*
|
||||
* @return the likelihood information for this genotype
|
||||
*/
|
||||
public double[] getPosteriors();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param posteriors the posteriors for this genotype
|
||||
*/
|
||||
public void setPosteriors(double[] posteriors);
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,6 +18,12 @@ public interface ReadBacked {
|
|||
*/
|
||||
public List<SAMRecord> getReads();
|
||||
|
||||
/**
|
||||
*
|
||||
* @param reads the reads for this genotype
|
||||
*/
|
||||
public void setReads(List<SAMRecord> reads);
|
||||
|
||||
/**
|
||||
* get the count of reads
|
||||
* @return the number of reads we're backed by
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface SampleBacked
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
* Interface PosteriorsBacked
|
||||
*
|
||||
* this interface indicates that the genotype is
|
||||
* backed up by sample information.
|
||||
*/
|
||||
public interface SampleBacked {
|
||||
|
||||
|
|
@ -16,8 +16,9 @@ public interface SampleBacked {
|
|||
public String getSampleName();
|
||||
|
||||
/**
|
||||
* get the filtering string for this genotype
|
||||
* @return a string, representing the genotyping value
|
||||
*
|
||||
* @param name the sample name for this genotype
|
||||
*/
|
||||
public String getFilteringValue();
|
||||
public void setSampleName(String name);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
|
|||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
|
|
@ -79,7 +78,7 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
GenotypeLikelihoods lk = likelihoods.convertToGenotypeLikelihoods(writer.getFileHeader(), contig.getSequenceIndex(), position, (byte) referenceBase);
|
||||
lk.setNumReads(readCount);
|
||||
|
||||
lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? (short)Short.MAX_VALUE : (short)Math.round(maxMappingQuality));
|
||||
lk.setMaxMappingQuality(maxMappingQuality > Short.MAX_VALUE ? Short.MAX_VALUE : (short)Math.round(maxMappingQuality));
|
||||
writer.addGenotypeLikelihoods(lk);
|
||||
}
|
||||
|
||||
|
|
@ -100,37 +99,27 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
* Add a genotype, given a genotype call
|
||||
*
|
||||
* @param locus the locus to add
|
||||
* @param call the call to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(Genotype locus) {
|
||||
double posteriors[];
|
||||
int readDepth = -1;
|
||||
double nextVrsBest = 0;
|
||||
double nextVrsRef = 0;
|
||||
if (!(locus instanceof PosteriorsBacked)) {
|
||||
posteriors = new double[10];
|
||||
Arrays.fill(posteriors, Double.MIN_VALUE);
|
||||
} else {
|
||||
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
||||
public void addGenotypeCall(Genotype call) {
|
||||
if ( !(call instanceof GeliGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
|
||||
|
||||
}
|
||||
char ref = locus.getReference();
|
||||
int readCount = 0;
|
||||
char ref = gCall.getReference();
|
||||
List<SAMRecord> recs = gCall.getReads();
|
||||
int readCount = recs.size();
|
||||
double maxMappingQual = 0;
|
||||
if (locus instanceof ReadBacked) {
|
||||
List<SAMRecord> recs = ((ReadBacked)locus).getReads();
|
||||
readCount = recs.size();
|
||||
for (SAMRecord rec : recs) {
|
||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||
}
|
||||
for (SAMRecord rec : recs) {
|
||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||
}
|
||||
|
||||
double[] posteriors = gCall.getPosteriors();
|
||||
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),
|
||||
(int)locus.getLocation().getStart(),
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()),
|
||||
(int)gCall.getLocation().getStart(),
|
||||
ref,
|
||||
maxMappingQual,
|
||||
readCount,
|
||||
|
|
@ -142,13 +131,11 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
*
|
||||
* @param position
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position) {
|
||||
throw new UnsupportedOperationException("Geli format does not support no-calls");
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
@Override
|
||||
public void close() {
|
||||
if (this.writer != null) {
|
||||
this.writer.close();
|
||||
|
|
@ -158,15 +145,13 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
/**
|
||||
* add a multi-sample call if we support it
|
||||
*
|
||||
* @param genotypes the list of genotypes, that are backed by sample information
|
||||
* @param genotypes the list of genotypes
|
||||
*/
|
||||
@Override
|
||||
public void addMultiSampleCall( List<Genotype> genotypes, GenotypeMetaData metadata) {
|
||||
throw new UnsupportedOperationException("Geli binary doesn't support multisample calls");
|
||||
}
|
||||
|
||||
/** @return true if we support multisample, false otherwise */
|
||||
@Override
|
||||
public boolean supportsMultiSample() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,240 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class GeliGenotypeCall
|
||||
* <p/>
|
||||
* The implementation of the genotype interface, specific to Geli
|
||||
*/
|
||||
public class GeliGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked {
|
||||
private char mRefBase;
|
||||
private GenomeLoc mLocation;
|
||||
|
||||
private List<SAMRecord> mReads;
|
||||
private double[] mPosteriors;
|
||||
|
||||
|
||||
// the reference genotype, the best genotype, and the next best genotype, lazy loaded
|
||||
private DiploidGenotype mRefGenotype = null;
|
||||
private DiploidGenotype mBestGenotype = null;
|
||||
private DiploidGenotype mNextGenotype = null;
|
||||
|
||||
/**
|
||||
* Generate a single sample genotype object
|
||||
*
|
||||
*/
|
||||
public GeliGenotypeCall() {
|
||||
// fill in empty data
|
||||
mRefBase = 'N';
|
||||
mPosteriors = new double[10];
|
||||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
mReads = new ArrayList<SAMRecord>();
|
||||
}
|
||||
|
||||
public void setReference(char refBase) {
|
||||
mRefBase = Character.toUpperCase(refBase);
|
||||
}
|
||||
|
||||
public void setLocation(GenomeLoc loc) {
|
||||
mLocation = loc;
|
||||
}
|
||||
|
||||
public void setReads(List<SAMRecord> reads) {
|
||||
mReads = new ArrayList<SAMRecord>(reads);
|
||||
}
|
||||
|
||||
public void setPosteriors(double[] posteriors) {
|
||||
mPosteriors = posteriors;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
lazyEval();
|
||||
|
||||
if (other == null)
|
||||
return false;
|
||||
if (other instanceof GeliGenotypeCall) {
|
||||
GeliGenotypeCall otherCall = (GeliGenotypeCall) other;
|
||||
|
||||
if (!this.mBestGenotype.equals(otherCall.mBestGenotype))
|
||||
return false;
|
||||
return (this.mRefBase == otherCall.mRefBase);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
lazyEval();
|
||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f",
|
||||
getLocation(), mBestGenotype, mRefGenotype, mRefBase, mReads.size(), getNegLog10PError());
|
||||
}
|
||||
|
||||
private void lazyEval() {
|
||||
if (mBestGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mPosteriors);
|
||||
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the confidence we have
|
||||
*
|
||||
* @return get the one minus error value
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]);
|
||||
}
|
||||
|
||||
// get the best genotype
|
||||
private DiploidGenotype getBestGenotype() {
|
||||
lazyEval();
|
||||
return mBestGenotype;
|
||||
}
|
||||
|
||||
// get the alternate genotype
|
||||
private DiploidGenotype getNextBest() {
|
||||
lazyEval();
|
||||
return mNextGenotype;
|
||||
}
|
||||
|
||||
// get the ref genotype
|
||||
private DiploidGenotype getRefGenotype() {
|
||||
lazyEval();
|
||||
return mRefGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() {
|
||||
return getBestGenotype().toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
public boolean isHom() {
|
||||
return getBestGenotype().isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return this.mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant(char ref) {
|
||||
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a variant? (non-ref)
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
return isVariant(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return return this genotype as a variant
|
||||
*/
|
||||
public Variation toVariation() {
|
||||
double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
|
||||
return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the SAM records that back this genotype call
|
||||
*
|
||||
* @return a list of SAM records
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
return mReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the count of reads
|
||||
*
|
||||
* @return the number of reads we're backed by
|
||||
*/
|
||||
public int getReadCount() {
|
||||
return mReads.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference character
|
||||
*
|
||||
* @return the reference character
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the posteriors
|
||||
*
|
||||
* @return the posteriors
|
||||
*/
|
||||
public double[] getPosteriors() {
|
||||
return mPosteriors;
|
||||
}
|
||||
}
|
||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.genotype.geli;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -46,48 +45,41 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
public final static String headerLine = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod AA AC AG AT CC CG CT GG GT TT";
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
* Add a genotype, given a call
|
||||
*
|
||||
* @param locus the locus to add
|
||||
* @param call the call to add
|
||||
*/
|
||||
public void addGenotypeCall(Genotype locus) {
|
||||
double posteriors[];
|
||||
int readDepth = -1;
|
||||
double nextVrsBest = 0;
|
||||
public void addGenotypeCall(Genotype call) {
|
||||
if ( !(call instanceof GeliGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GeliGenotypeCall gCall = (GeliGenotypeCall)call;
|
||||
|
||||
char ref = gCall.getReference();
|
||||
|
||||
double[] posteriors = gCall.getPosteriors();
|
||||
double[] lks;
|
||||
lks = Arrays.copyOf(posteriors, posteriors.length);
|
||||
Arrays.sort(lks);
|
||||
|
||||
double nextVrsBest = lks[9] - lks[8];
|
||||
double nextVrsRef = 0;
|
||||
if (ref != 'X')
|
||||
nextVrsRef = lks[9] - posteriors[DiploidGenotype.createHomGenotype(ref).ordinal()];
|
||||
|
||||
char ref = locus.getReference();
|
||||
|
||||
|
||||
if (!(locus instanceof PosteriorsBacked)) {
|
||||
posteriors = new double[10];
|
||||
Arrays.fill(posteriors, 0.0);
|
||||
} else {
|
||||
posteriors = ((PosteriorsBacked) locus).getPosteriors();
|
||||
double[] lks;
|
||||
lks = Arrays.copyOf(posteriors, posteriors.length);
|
||||
Arrays.sort(lks);
|
||||
nextVrsBest = lks[9] - lks[8];
|
||||
if (ref != 'X') {
|
||||
int index = (DiploidGenotype.valueOf(Utils.dupString(ref, 2)).ordinal());
|
||||
nextVrsRef = lks[9] - posteriors[index];
|
||||
}
|
||||
}
|
||||
double maxMappingQual = 0;
|
||||
if (locus instanceof ReadBacked) {
|
||||
List<SAMRecord> recs = ((ReadBacked) locus).getReads();
|
||||
readDepth = recs.size();
|
||||
for (SAMRecord rec : recs) {
|
||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||
}
|
||||
List<SAMRecord> recs = gCall.getReads();
|
||||
int readDepth = recs.size();
|
||||
for (SAMRecord rec : recs) {
|
||||
if (maxMappingQual < rec.getMappingQuality()) maxMappingQual = rec.getMappingQuality();
|
||||
}
|
||||
|
||||
mWriter.println(String.format("%s %16d %c %8d %.0f %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
locus.getLocation().getContig(),
|
||||
locus.getLocation().getStart(),
|
||||
gCall.getLocation().getContig(),
|
||||
gCall.getLocation().getStart(),
|
||||
ref,
|
||||
readDepth,
|
||||
maxMappingQual,
|
||||
locus.getBases(),
|
||||
gCall.getBases(),
|
||||
nextVrsRef,
|
||||
nextVrsBest,
|
||||
posteriors[0],
|
||||
|
|
@ -107,13 +99,11 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
*
|
||||
* @param position the position to add the no call at
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position) {
|
||||
throw new UnsupportedOperationException("Geli text format doesn't support a no-call call.");
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
@Override
|
||||
public void close() {
|
||||
mWriter.close();
|
||||
}
|
||||
|
|
@ -121,15 +111,13 @@ public class GeliTextWriter implements GenotypeWriter {
|
|||
/**
|
||||
* add a multi-sample call if we support it
|
||||
*
|
||||
* @param genotypes the list of genotypes, that are backed by sample information
|
||||
* @param genotypes the list of genotypes
|
||||
*/
|
||||
@Override
|
||||
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
|
||||
throw new UnsupportedOperationException("Geli text doesn't support multisample calls");
|
||||
}
|
||||
|
||||
/** @return true if we support multisample, false otherwise */
|
||||
@Override
|
||||
public boolean supportsMultiSample() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,208 @@
|
|||
package org.broadinstitute.sting.utils.genotype.glf;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GLFGenotypeCall
|
||||
* <p/>
|
||||
* The implementation of the genotype interface, specific to GLF
|
||||
*/
|
||||
public class GLFGenotypeCall implements Genotype, ReadBacked, LikelihoodsBacked {
|
||||
private char mRefBase;
|
||||
private GenomeLoc mLocation;
|
||||
|
||||
private List<SAMRecord> mReads;
|
||||
private double[] mLikelihoods;
|
||||
|
||||
private double mNegLog10PError;
|
||||
private String mGenotype;
|
||||
|
||||
/**
|
||||
* Generate a single sample genotype object, containing everything we need to represent calls out of a genotyper object
|
||||
*
|
||||
*/
|
||||
public GLFGenotypeCall() {
|
||||
// fill in empty data
|
||||
mRefBase = 'N';
|
||||
mGenotype = "NN";
|
||||
mLikelihoods = new double[10];
|
||||
Arrays.fill(mLikelihoods, Double.MIN_VALUE);
|
||||
mReads = new ArrayList<SAMRecord>();
|
||||
mNegLog10PError = Double.MIN_VALUE;
|
||||
}
|
||||
|
||||
public void setReference(char refBase) {
|
||||
mRefBase = Character.toUpperCase(refBase);
|
||||
}
|
||||
|
||||
public void setLocation(GenomeLoc loc) {
|
||||
mLocation = loc;
|
||||
}
|
||||
|
||||
public void setReads(List<SAMRecord> reads) {
|
||||
mReads = new ArrayList<SAMRecord>(reads);
|
||||
}
|
||||
|
||||
public void setLikelihoods(double[] likelihoods) {
|
||||
mLikelihoods = likelihoods;
|
||||
}
|
||||
|
||||
public void setNegLog10PError(double negLog10PError) {
|
||||
mNegLog10PError = negLog10PError;
|
||||
}
|
||||
|
||||
public void setGenotype(String genotype) {
|
||||
mGenotype = genotype;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null || !(other instanceof GLFGenotypeCall))
|
||||
return false;
|
||||
return (this.mRefBase == ((GLFGenotypeCall)other).mRefBase);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return String.format("%s ref=%s depth=%d negLog10PError=%.2f",
|
||||
getLocation(), mRefBase, mReads.size(), getNegLog10PError());
|
||||
}
|
||||
|
||||
/**
|
||||
* get the confidence we have
|
||||
*
|
||||
* @return get the one minus error value
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return mNegLog10PError;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() {
|
||||
return Character.toString(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
public boolean isHom() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return return this genotype as a variant
|
||||
*/
|
||||
public Variation toVariation() {
|
||||
throw new UnsupportedOperationException("GLF call doesn't support conversion to Variation");
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return this.mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant(char ref) {
|
||||
return !Utils.dupString(mRefBase, 2).equals(mGenotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a variant? (non-ref)
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
return isVariant(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the SAM records that back this genotype call
|
||||
*
|
||||
* @return a list of SAM records
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
return mReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the count of reads
|
||||
*
|
||||
* @return the number of reads we're backed by
|
||||
*/
|
||||
public int getReadCount() {
|
||||
return mReads.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference character
|
||||
*
|
||||
* @return the reference character
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the posteriors
|
||||
*
|
||||
* @return the posteriors
|
||||
*/
|
||||
public double[] getLikelihoods() {
|
||||
return mLikelihoods;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -11,7 +11,6 @@ import org.broadinstitute.sting.utils.genotype.*;
|
|||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -118,33 +117,25 @@ public class GLFWriter implements GenotypeWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
* Add a genotype, given a genotype call
|
||||
*
|
||||
* @param locus the genotype called at a locus
|
||||
* @param call the genotype call
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(Genotype locus) {
|
||||
char ref = locus.getReference();
|
||||
public void addGenotypeCall(Genotype call) {
|
||||
if ( !(call instanceof GLFGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only GeliGenotypeCalls should be passed in to the Geli writers");
|
||||
GLFGenotypeCall gCall = (GLFGenotypeCall)call;
|
||||
|
||||
char ref = gCall.getReference();
|
||||
|
||||
// get likelihood information if available
|
||||
LikelihoodObject obj;
|
||||
if (locus instanceof LikelihoodsBacked) {
|
||||
obj = new LikelihoodObject(((LikelihoodsBacked) locus).getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
} else {
|
||||
double values[] = new double[10];
|
||||
Arrays.fill(values,-255.0);
|
||||
obj = new LikelihoodObject(values, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
}
|
||||
LikelihoodObject obj = new LikelihoodObject(gCall.getLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGATIVE_LOG); // transform! ... to negitive log likelihoods
|
||||
|
||||
double rms = 0;
|
||||
int readCount = 0;
|
||||
// calculate the RMS mapping qualities and the read depth
|
||||
if (locus instanceof ReadBacked) {
|
||||
rms = calculateRMS(((ReadBacked)locus).getReads());
|
||||
readCount = ((ReadBacked)locus).getReadCount();
|
||||
}
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)rms,ref,readCount,obj);
|
||||
double rms = calculateRMS(gCall.getReads());
|
||||
int readCount = gCall.getReadCount();
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(gCall.getLocation().getContig()),(int)gCall.getLocation().getStart(),(float)rms,ref,readCount,obj);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -207,7 +198,6 @@ public class GLFWriter implements GenotypeWriter {
|
|||
*
|
||||
* @param position the position
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position) {
|
||||
// glf doesn't support this operation
|
||||
throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
|
||||
|
|
@ -277,7 +267,6 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* close the file. You must close the file to ensure any remaining data gets
|
||||
* written out.
|
||||
*/
|
||||
@Override
|
||||
public void close() {
|
||||
writeEndRecord();
|
||||
outputBinaryCodec.close();
|
||||
|
|
@ -286,7 +275,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
/**
|
||||
* get the reference sequence
|
||||
*
|
||||
* @return
|
||||
* @return the reference sequence
|
||||
*/
|
||||
public String getReferenceSequenceName() {
|
||||
return referenceSequenceName;
|
||||
|
|
@ -296,15 +285,13 @@ public class GLFWriter implements GenotypeWriter {
|
|||
/**
|
||||
* add a multi-sample call if we support it
|
||||
*
|
||||
* @param genotypes the list of genotypes, that are backed by sample information
|
||||
* @param genotypes the list of genotypes
|
||||
*/
|
||||
@Override
|
||||
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
|
||||
throw new UnsupportedOperationException("GLF writer doesn't support multisample calls");
|
||||
}
|
||||
|
||||
/** @return true if we support multisample, false otherwise */
|
||||
@Override
|
||||
public boolean supportsMultiSample() {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,255 @@
|
|||
package org.broadinstitute.sting.utils.genotype.vcf;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* <p/>
|
||||
* Class VCFGenotypeCall
|
||||
* <p/>
|
||||
* The implementation of the genotype interface, specific to VCF
|
||||
*/
|
||||
public class VCFGenotypeCall implements Genotype, ReadBacked, PosteriorsBacked, SampleBacked {
|
||||
private char mRefBase;
|
||||
private GenomeLoc mLocation;
|
||||
|
||||
private List<SAMRecord> mReads;
|
||||
private double[] mPosteriors;
|
||||
|
||||
|
||||
// the reference genotype, the best genotype, and the next best genotype, lazy loaded
|
||||
private DiploidGenotype mRefGenotype = null;
|
||||
private DiploidGenotype mBestGenotype = null;
|
||||
private DiploidGenotype mNextGenotype = null;
|
||||
|
||||
// the sample name, used to propulate the SampleBacked interface
|
||||
private String mSampleName;
|
||||
|
||||
/**
|
||||
* Generate a single sample genotype object
|
||||
*
|
||||
*/
|
||||
public VCFGenotypeCall() {
|
||||
// fill in empty data
|
||||
mRefBase = 'N';
|
||||
mPosteriors = new double[10];
|
||||
Arrays.fill(mPosteriors, Double.MIN_VALUE);
|
||||
mSampleName = "";
|
||||
mReads = new ArrayList<SAMRecord>();
|
||||
}
|
||||
|
||||
public void setReference(char refBase) {
|
||||
mRefBase = Character.toUpperCase(refBase);
|
||||
}
|
||||
|
||||
public void setLocation(GenomeLoc loc) {
|
||||
mLocation = loc;
|
||||
}
|
||||
|
||||
public void setReads(List<SAMRecord> reads) {
|
||||
mReads = new ArrayList<SAMRecord>(reads);
|
||||
}
|
||||
|
||||
public void setPosteriors(double[] posteriors) {
|
||||
mPosteriors = posteriors;
|
||||
}
|
||||
|
||||
public void setSampleName(String name) {
|
||||
mSampleName = name;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
lazyEval();
|
||||
|
||||
if (other == null)
|
||||
return false;
|
||||
if (other instanceof VCFGenotypeCall) {
|
||||
VCFGenotypeCall otherCall = (VCFGenotypeCall) other;
|
||||
|
||||
if (!this.mBestGenotype.equals(otherCall.mBestGenotype))
|
||||
return false;
|
||||
return (this.mRefBase == otherCall.mRefBase);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
lazyEval();
|
||||
return String.format("%s best=%s cmp=%s ref=%s depth=%d negLog10PError=%.2f",
|
||||
getLocation(), mBestGenotype, mRefGenotype, mRefBase, mReads.size(), getNegLog10PError());
|
||||
}
|
||||
|
||||
private void lazyEval() {
|
||||
if (mBestGenotype == null) {
|
||||
Integer sorted[] = Utils.SortPermutation(mPosteriors);
|
||||
mBestGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 1]];
|
||||
mNextGenotype = DiploidGenotype.values()[sorted[DiploidGenotype.values().length - 2]];
|
||||
mRefGenotype = DiploidGenotype.createHomGenotype(this.getReference());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* get the confidence we have
|
||||
*
|
||||
* @return get the one minus error value
|
||||
*/
|
||||
public double getNegLog10PError() {
|
||||
return Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getNextBest().ordinal()]);
|
||||
}
|
||||
|
||||
// get the best genotype
|
||||
private DiploidGenotype getBestGenotype() {
|
||||
lazyEval();
|
||||
return mBestGenotype;
|
||||
}
|
||||
|
||||
// get the alternate genotype
|
||||
private DiploidGenotype getNextBest() {
|
||||
lazyEval();
|
||||
return mNextGenotype;
|
||||
}
|
||||
|
||||
// get the ref genotype
|
||||
private DiploidGenotype getRefGenotype() {
|
||||
lazyEval();
|
||||
return mRefGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return the bases, as a string
|
||||
*/
|
||||
public String getBases() {
|
||||
return getBestGenotype().toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy
|
||||
*
|
||||
* @return the ploidy value
|
||||
*/
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're homozygous, false otherwise
|
||||
*/
|
||||
public boolean isHom() {
|
||||
return getBestGenotype().isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return true if we're het, false otherwise
|
||||
*/
|
||||
public boolean isHet() {
|
||||
return !isHom();
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenomeLoc getLocation() {
|
||||
return this.mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the genotype is a point genotype, false if it's a indel / deletion
|
||||
*
|
||||
* @return true is a SNP
|
||||
*/
|
||||
public boolean isPointGenotype() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* given the reference, are we a variant? (non-ref)
|
||||
*
|
||||
* @param ref the reference base or bases
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant(char ref) {
|
||||
return !Utils.dupString(this.getReference(), 2).equals(getBestGenotype().toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* are we a variant? (non-ref)
|
||||
*
|
||||
* @return true if we're a variant
|
||||
*/
|
||||
public boolean isVariant() {
|
||||
return isVariant(mRefBase);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return return this genotype as a variant
|
||||
*/
|
||||
public Variation toVariation() {
|
||||
double bestRef = Math.abs(mPosteriors[getBestGenotype().ordinal()] - mPosteriors[getRefGenotype().ordinal()]);
|
||||
return new BasicVariation(this.getBases(), String.valueOf(this.getReference()), 0, this.mLocation, bestRef);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the SAM records that back this genotype call
|
||||
*
|
||||
* @return a list of SAM records
|
||||
*/
|
||||
public List<SAMRecord> getReads() {
|
||||
return mReads;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the count of reads
|
||||
*
|
||||
* @return the number of reads we're backed by
|
||||
*/
|
||||
public int getReadCount() {
|
||||
return mReads.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference character
|
||||
*
|
||||
* @return the reference character
|
||||
*/
|
||||
public char getReference() {
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the posteriors
|
||||
*
|
||||
* @return the posteriors
|
||||
*/
|
||||
public double[] getPosteriors() {
|
||||
return mPosteriors;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return returns the sample name for this genotype
|
||||
*/
|
||||
public String getSampleName() {
|
||||
return mSampleName;
|
||||
}
|
||||
}
|
||||
|
|
@ -52,10 +52,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
/**
|
||||
* initialize this VCF writer
|
||||
*
|
||||
* @param genotypes the genotypes
|
||||
* @param file the file location to write to
|
||||
* @param stream the output stream
|
||||
*/
|
||||
private void lazyInitialize(List<Genotype> genotypes, File file, OutputStream stream) {
|
||||
private void lazyInitialize(File file, OutputStream stream) {
|
||||
Map<String, String> hInfo = new HashMap<String, String>();
|
||||
|
||||
// setup the header fields
|
||||
|
|
@ -82,9 +82,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
private static List<String> getSampleNames(List<Genotype> genotypes) {
|
||||
List<String> strings = new ArrayList<String>();
|
||||
for (Genotype genotype : genotypes) {
|
||||
if (!(genotype instanceof SampleBacked))
|
||||
if (!(genotype instanceof VCFGenotypeCall))
|
||||
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||
strings.add(((SampleBacked) genotype).getSampleName());
|
||||
strings.add(((VCFGenotypeCall) genotype).getSampleName());
|
||||
}
|
||||
return strings;
|
||||
}
|
||||
|
|
@ -94,9 +94,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
*
|
||||
* @param call the locus to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(Genotype call) {
|
||||
addMultiSampleCall(Arrays.asList(call), null);
|
||||
throw new UnsupportedOperationException("VCF calls require metadata; use the addMultiSampleCall method instead");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -104,13 +103,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
*
|
||||
* @param position the position to add the no call at
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position) {
|
||||
throw new UnsupportedOperationException("We don't currently support no-calls in VCF");
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
@Override
|
||||
public void close() {
|
||||
if (mInitialized)
|
||||
mWriter.close();
|
||||
|
|
@ -119,12 +116,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
/**
|
||||
* add a multi-sample call if we support it
|
||||
*
|
||||
* @param genotypes the list of genotypes, that are backed by sample information
|
||||
* @param genotypes the list of genotypes
|
||||
*/
|
||||
@Override
|
||||
public void addMultiSampleCall(List<Genotype> genotypes, GenotypeMetaData metadata) {
|
||||
if (!mInitialized)
|
||||
lazyInitialize(genotypes, mFile, mStream);
|
||||
lazyInitialize(mFile, mStream);
|
||||
|
||||
|
||||
VCFParameters params = new VCFParameters();
|
||||
|
|
@ -132,22 +128,22 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
|
||||
// mapping of our sample names to genotypes
|
||||
if (genotypes.size() < 1) {
|
||||
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must at least contain one entry");
|
||||
throw new IllegalArgumentException("Unable to parse out the current location: genotype array must contain at least one entry");
|
||||
}
|
||||
|
||||
// get the location and reference
|
||||
params.setLocations(genotypes.get(0).getLocation(), genotypes.get(0).getReference());
|
||||
|
||||
Map<String, Genotype> genotypeMap = genotypeListToSampleNameMap(genotypes);
|
||||
Map<String, VCFGenotypeCall> genotypeMap = genotypeListToSampleNameMap(genotypes);
|
||||
|
||||
for (String name : mHeader.getGenotypeSamples()) {
|
||||
if (genotypeMap.containsKey(name)) {
|
||||
Genotype gtype = genotypeMap.get(name);
|
||||
VCFGenotypeRecord record = createVCFGenotypeRecord(params, gtype);
|
||||
VCFGenotypeRecord record = createVCFGenotypeRecord(params, (VCFGenotypeCall)gtype);
|
||||
params.addGenotypeRecord(record);
|
||||
genotypeMap.remove(name);
|
||||
} else {
|
||||
VCFGenotypeRecord record = createNoCallRecord(params, name);
|
||||
VCFGenotypeRecord record = createNoCallRecord(name);
|
||||
params.addGenotypeRecord(record);
|
||||
}
|
||||
}
|
||||
|
|
@ -161,14 +157,9 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
Map<String, String> infoFields = getInfoFields(metadata, params);
|
||||
|
||||
double qual = (metadata == null) ? 0 : (metadata.getLOD()) * 10;
|
||||
|
||||
/**
|
||||
* TODO: Eric fix the next line when our LOD scores are 0->Inf based instead
|
||||
* of -3 to Inf based.
|
||||
*/
|
||||
if (qual < 0.0) {
|
||||
qual = 0.0;
|
||||
}
|
||||
// maintain 0-99 based Q-scores
|
||||
qual = Math.min(qual, 99);
|
||||
qual = Math.max(qual, 0);
|
||||
|
||||
VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(),
|
||||
params.getContig(),
|
||||
|
|
@ -176,7 +167,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
".",
|
||||
params.getAlternateBases(),
|
||||
qual,
|
||||
".",
|
||||
"0",
|
||||
infoFields,
|
||||
params.getFormatString(),
|
||||
params.getGenotypesRecords());
|
||||
|
|
@ -210,18 +201,13 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
*
|
||||
* @return a VCFGenotypeRecord
|
||||
*/
|
||||
private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, Genotype gtype) {
|
||||
private VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, VCFGenotypeCall gtype) {
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
if (!(gtype instanceof SampleBacked)) {
|
||||
throw new IllegalArgumentException("Genotypes passed to VCF must be backed by SampledBacked interface");
|
||||
}
|
||||
|
||||
// calculate the RMS mapping qualities and the read depth
|
||||
if (gtype instanceof ReadBacked) {
|
||||
int readDepth = ((ReadBacked) gtype).getReadCount();
|
||||
map.put("RD", String.valueOf(readDepth));
|
||||
params.addFormatItem("RD");
|
||||
}
|
||||
int readDepth = gtype.getReadCount();
|
||||
map.put("RD", String.valueOf(readDepth));
|
||||
params.addFormatItem("RD");
|
||||
double qual = gtype.getNegLog10PError();
|
||||
map.put("GQ", String.format("%.2f", qual));
|
||||
params.addFormatItem("GQ");
|
||||
|
|
@ -231,7 +217,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
params.addAlternateBase(allele);
|
||||
}
|
||||
|
||||
VCFGenotypeRecord record = new VCFGenotypeRecord(((SampleBacked) gtype).getSampleName(),
|
||||
VCFGenotypeRecord record = new VCFGenotypeRecord(gtype.getSampleName(),
|
||||
alleles,
|
||||
VCFGenotypeRecord.PHASE.UNPHASED,
|
||||
map);
|
||||
|
|
@ -241,12 +227,11 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
/**
|
||||
* create a no call record
|
||||
*
|
||||
* @param params the VCF parameters object
|
||||
* @param sampleName the sample name
|
||||
*
|
||||
* @return a VCFGenotypeRecord for the no call situation
|
||||
*/
|
||||
private VCFGenotypeRecord createNoCallRecord(VCFParameters params, String sampleName) {
|
||||
private VCFGenotypeRecord createNoCallRecord(String sampleName) {
|
||||
Map<String, String> map = new HashMap<String, String>();
|
||||
|
||||
|
||||
|
|
@ -277,24 +262,24 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter {
|
|||
}
|
||||
|
||||
/** @return true if we support multisample, false otherwise */
|
||||
@Override
|
||||
public boolean supportsMultiSample() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genotype mapping from a list and their sample names
|
||||
* while we're at it, checks that all genotypes are VCF-based
|
||||
*
|
||||
* @param list a list of genotype samples
|
||||
*
|
||||
* @return a mapping of the sample name to genotype fields
|
||||
*/
|
||||
private static Map<String, Genotype> genotypeListToSampleNameMap(List<Genotype> list) {
|
||||
Map<String, Genotype> map = new HashMap<String, Genotype>();
|
||||
private static Map<String, VCFGenotypeCall> genotypeListToSampleNameMap(List<Genotype> list) {
|
||||
Map<String, VCFGenotypeCall> map = new HashMap<String, VCFGenotypeCall>();
|
||||
for (Genotype rec : list) {
|
||||
if (!(rec instanceof SampleBacked))
|
||||
throw new IllegalArgumentException("Genotype must be backed by sample information");
|
||||
map.put(((SampleBacked) rec).getSampleName(), rec);
|
||||
if ( !(rec instanceof VCFGenotypeCall) )
|
||||
throw new IllegalArgumentException("Only VCFGenotypeCalls should be passed in to the VCF writers");
|
||||
map.put(((VCFGenotypeCall) rec).getSampleName(), (VCFGenotypeCall) rec);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,121 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class SSGenotypeCallTest
|
||||
* <p/>
|
||||
* test the SS Genotype call class
|
||||
*/
|
||||
public class GenotypeCallTest extends BaseTest {
|
||||
|
||||
// we need a fake GenotypeLikelihoods class
|
||||
public class GenotypeLikelihoodsImpl extends GenotypeLikelihoods {
|
||||
public boolean cacheIsEnabled() { return false; }
|
||||
|
||||
protected GenotypeLikelihoods getSetCache( char observedBase, byte qualityScore, int ploidy,
|
||||
SAMRecord read, int offset, GenotypeLikelihoods val ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
*
|
||||
* @param observedBase
|
||||
* @param chromBase
|
||||
* @param read
|
||||
* @param offset
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
protected double log10PofTrueBaseGivenMiscall(char observedBase, char chromBase, SAMRecord read, int offset) {
|
||||
return 0; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
public GenotypeLikelihoodsImpl() {
|
||||
this.posteriors = new double[10];
|
||||
for (int x = 0; x < 10; x++) {
|
||||
posteriors[x] = -(x);
|
||||
}
|
||||
this.likelihoods = new double[10];
|
||||
for (int x = 0; x < 10; x++) {
|
||||
likelihoods[x] = -(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// make a fake read pile-up
|
||||
public Pair<ReadBackedPileup, GenomeLoc> makePileup() {
|
||||
List<SAMRecord> reads = new ArrayList<SAMRecord>();
|
||||
List<Integer> offset = new ArrayList<Integer>();
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10000);
|
||||
for (int x = 0; x < 10; x++) {
|
||||
reads.add(ArtificialSAMUtils.createArtificialRead(header, "test", 0, 1, 100));
|
||||
offset.add(10 - x);
|
||||
}
|
||||
GenomeLocParser.setupRefContigOrdering(header.getSequenceDictionary());
|
||||
GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1", 1, 10);
|
||||
return new Pair(new ReadBackedPileup(loc, 'A', reads, offset), loc);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testBestVrsRefSame() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(0, call.toVariation().getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsRef2() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'T', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(9, call2.toVariation().getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsRef3() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(4, call3.toVariation().getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testBestVrsNextSame() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
GenotypeCall call = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(1, call.getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsNext2() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
GenotypeCall call2 = new GenotypeCall("TESTSAMPLE",myPair.second, 'A', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(1, call2.getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBestVrsNext3() {
|
||||
Pair<ReadBackedPileup, GenomeLoc> myPair = makePileup();
|
||||
GenotypeCall call3 = new GenotypeCall("TESTSAMPLE",myPair.second, 'C', new GenotypeLikelihoodsImpl(), myPair.first);
|
||||
Assert.assertEquals(1, call3.getNegLog10PError(), 0.0000001);
|
||||
}
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,000,000-10,001,000 -bm empirical -gm EM_POINT_ESTIMATE -lod 5", 1,
|
||||
Arrays.asList("beb07ff5cbf60febfb451e4248f2b013"));
|
||||
Arrays.asList("d41d8cd98f00b204e9800998ecf8427e"));
|
||||
executeTest("testMultiSamplePilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -lod 5", 1,
|
||||
Arrays.asList("8cde0c2aa4d9ad51c603db4b64f3933f"));
|
||||
Arrays.asList("394f009c9bad34eb584fa10d133d79e0"));
|
||||
executeTest("testMultiSamplePilot2", spec);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ public class GLFWriterTest extends BaseTest {
|
|||
|
||||
}
|
||||
|
||||
class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparable<FakeGenotype> {
|
||||
class FakeGenotype extends GLFGenotypeCall implements Comparable<FakeGenotype> {
|
||||
|
||||
private double[] likelihoods;
|
||||
|
||||
|
|
@ -145,8 +145,12 @@ class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparabl
|
|||
* @param negLog10PError the confidence score
|
||||
*/
|
||||
public FakeGenotype(GenomeLoc location, String genotype, char ref, double negLog10PError, double likelihoods[]) {
|
||||
super(location, genotype, ref, negLog10PError);
|
||||
this.likelihoods = likelihoods;
|
||||
setLocation(location);
|
||||
setReference(ref);
|
||||
setLikelihoods(likelihoods);
|
||||
setGenotype(genotype);
|
||||
setNegLog10PError(negLog10PError);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -159,6 +163,10 @@ class FakeGenotype extends BasicGenotype implements LikelihoodsBacked, Comparabl
|
|||
return likelihoods;
|
||||
}
|
||||
|
||||
public void setLikelihoods(double[] likelihoods) {
|
||||
this.likelihoods = likelihoods;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(FakeGenotype that) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue