Adding the intial changes for the new Genotyping interface. The bullet points are:

- SSG is much simpler now
- GeliText has been added as a GenotypeWriter
- AlleleFrequencyWalker will be deleted when I untangle the AlleleMetric's dependance on it
- GenotypeLikelihoods now implements GenotypeGenerator, but could still use cleanup

There is still a lot more work to do, but this is a good initial check-in.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1335 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
aaron 2009-07-29 19:43:59 +00:00
parent c5c11d5d1c
commit bca894ebce
24 changed files with 1154 additions and 632 deletions

View File

@ -1,22 +1,21 @@
package org.broadinstitute.sting.playground.gatk.walkers;
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.*;
import org.apache.log4j.Logger;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.List;
import java.util.Arrays;
import java.util.Random;
import java.io.PrintStream;
import java.io.DataInputStream;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
{
@ -140,7 +139,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
logger.debug(String.format(" => result is %s", alleleFreq));
if (LOG_METRICS) metrics.nextPosition(alleleFreq, tracker);
//if (LOG_METRICS) metrics.nextPosition(alleleFreq, tracker);
return alleleFreq;
}

View File

@ -1,19 +1,21 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.ListUtils;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import java.util.List;
import java.util.ArrayList;
import java.io.PrintStream;
import java.io.FileNotFoundException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
/**
* Created by IntelliJ IDEA.
@ -49,7 +51,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
System.exit(-1);
}
String header = GELI_OUTPUT_FORMAT ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader();
String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT";
variantsOut.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1));
}
@ -86,10 +88,11 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
LocusContext subContext = new LocusContext(context.getLocation(), sub_reads, sub_offsets);
AlleleFrequencyEstimate alleleFreq = SSG.map(tracker, ref, subContext);
GenotypeCall call = SSG.map(tracker, ref, subContext);
if (alleleFreq != null) {
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+alleleFreq.callType()+"\t"+alleleFreq.asGeliString());
String callType = (call.isVariation()) ? ((call.getBestVrsRef().first.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
if (call != null) {
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
}
}
}
@ -110,6 +113,41 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
return "";
}
// a method to support getting the geli string, since the AlleleFrequencyEstimate is going away
public String toGeliString (GenotypeCall locus) {
if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)");
// this is to perserve the format string that we used to use
double[] likelihoods = new double[10];
int index = 0;
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
for (Genotype G: lt) {
likelihoods[index] = G.getLikelihood();
index++;
}
return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(),
locus.getLocation().getStart(),
locus.getReferencebase(),
locus.getReadDepth(),
-1,
locus.getGenotypes().get(0).getBases(),
locus.getBestVrsRef().second.getScore(),
locus.getBestVrsNext().second.getScore(),
likelihoods[0],
likelihoods[1],
likelihoods[2],
likelihoods[3],
likelihoods[4],
likelihoods[5],
likelihoods[6],
likelihoods[7],
likelihoods[8],
likelihoods[9]);
}
}

View File

@ -1,124 +0,0 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import java.io.File;
import java.util.List;
/**
*
* @author aaron
*
* Class LikelihoodWalker
*
* a simple walker to calculate the genotype likelihoods
*/
@ReadFilters(ZeroMappingQualityReadFilter.class)
public class GenotypeLikelihoodsWalker extends LocusWalker<LikelihoodWrapper, GenotypeWriter> {
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
@Override
public LikelihoodWrapper map(RefMetaDataTracker tracker, char ref, LocusContext context) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases();
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
double rmsSum = 0.0;
// Handle single-base polymorphisms.
GenotypeLikelihoods G = new GenotypeLikelihoods();
for (int i = 0; i < reads.size(); i++) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
rmsSum += (read.getMappingQuality() * read.getMappingQuality());
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
}
// our return
LikelihoodWrapper wrap = new LikelihoodWrapper();
return wrapLikelihoods(ref, context, reads, rmsSum, G, wrap);
}
/**
* wrap the likelihood values in with the other data we'll need
* @param ref the ref base
* @param context the locus context
* @param reads the reads
* @param rmsSum the rms square total (not the actual rms yet)
* @param g the genotypeLikelihoods
* @param wrap the object to place the values into
* @return a likelihood wrapper
*/
private LikelihoodWrapper wrapLikelihoods(char ref, LocusContext context, List<SAMRecord> reads, double rmsSum, GenotypeLikelihoods g, LikelihoodWrapper wrap) {
wrap.obj = new LikelihoodObject();
wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.LOG);
for (int x = 0; x < GenotypeLikelihoods.genotypes.length; x++) {
wrap.obj.setLikelihood(LikelihoodObject.GENOTYPE.valueOf(GenotypeLikelihoods.genotypes[x]), g.likelihoods[x]*10.0);
}
wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
wrap.loc = GenomeLocParser.getContigInfo(context.getContig());
wrap.readDepth = reads.size();
float rms = (float)(Math.sqrt(rmsSum/reads.size()));
wrap.rms = (rms > 255) ? 255 : rms;
wrap.ref = ref;
wrap.position = context.getLocation().getStart();
return wrap;
}
/**
* Provide an initial value for reduce computations.
*
* @return Initial value of reduce.
*/
@Override
public GenotypeWriter reduceInit() {
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(),VARIANTS_FILE);
}
/**
* Reduces a single map with the accumulator provided as the ReduceType.
*
* @param value result of the map.
* @param sum accumulator for the reduce.
*
* @return accumulator with result of the map taken into account.
*/
@Override
public GenotypeWriter reduce(LikelihoodWrapper value, GenotypeWriter sum) {
sum.addGenotypeCall(value.loc,(int)value.position,value.rms,value.ref,value.readDepth,value.obj);
return sum;
}
/** Close the variant file. */
public void onTraversalDone(GenotypeWriter result) {
result.close();
}
}
class LikelihoodWrapper {
public LikelihoodObject obj;
public SAMSequenceRecord loc;
public long position;
public float rms;
public char ref;
public int readDepth;
}

View File

@ -2,32 +2,27 @@ package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
import org.broadinstitute.sting.playground.utils.IndelLikelihood;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.BasicPileup;
import org.broadinstitute.sting.utils.ReadBackedPileup;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.List;
import java.util.ArrayList;
@ReadFilters(ZeroMappingQualityReadFilter.class)
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, String> {
public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWriter> {
// Control output settings
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
@ -35,11 +30,9 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
// Control what goes into the variants file and what format that file should have
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)public Double LOD_THRESHOLD = Double.MIN_VALUE;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false)
public boolean GENOTYPE = false;
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false) public boolean GENOTYPE = false;
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
public boolean THREE_BASE_ERRORS = false;
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false) public boolean THREE_BASE_ERRORS = false;
// Control periodic reporting features
@Argument(fullName = "metrics_interval", shortName = "metint", doc = "Number of loci to process between metrics reports", required = false) public Integer METRICS_INTERVAL = 50000;
@ -63,9 +56,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
public boolean keepQ0Bases = false;
public AlleleMetrics metricsOut;
public PrintStream variantsOut;
public String sampleName;
private GenotypeWriter mGenotypeWriter;
public double[] plocus;
public double[] phapmap;
@ -118,23 +109,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
/** Initialize the walker with some sensible defaults */
public void initialize() {
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
mGenotypeWriter = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GLF, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
} else {
try {
variantsOut = new PrintStream(VARIANTS_FILE);
} catch (FileNotFoundException e) {
err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.\n", VARIANTS_FILE.getAbsolutePath());
System.exit(-1);
}
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
variantsOut.println(AlleleFrequencyEstimate.geliHeaderString());
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
variantsOut.println(AlleleFrequencyEstimate.asTabularStringHeader());
} else {
throw new StingException("Unsupported single sample genotyper output format: " + this.VAR_FORMAT.toString());
}
}
plocus = priorsArray(PRIORS_ANY_LOCUS);
phapmap = priorsArray(PRIORS_HAPMAP);
pdbsnp = priorsArray(PRIORS_DBSNP);
@ -151,19 +126,22 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
*
* @return an AlleleFrequencyEstimate object
*/
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
public GenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
rationalizeSampleName(context.getReads().get(0));
AlleleFrequencyEstimate freq = getAlleleFrequency(tracker, Character.toUpperCase(ref), context, sampleName);
if (freq != null) {
metricsOut.nextPosition(freq, tracker);
if (context.getLocation().getStart() == 73) {
int stop = 1;
}
GenotypeLocus genotype = getGenotype(tracker, Character.toUpperCase(ref), context, sampleName);
GenotypeCall call = null;
if (genotype != null) {
call = new GenotypeCallImpl(genotype, ref,
new ConfidenceScore(this.LOD_THRESHOLD, (GENOTYPE ? ConfidenceScore.SCORE_METHOD.BEST_NEXT : ConfidenceScore.SCORE_METHOD.BEST_REF)));
metricsOut.nextPosition(call, tracker);
}
if (!SUPPRESS_METRICS) {
metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL);
}
return freq;
return call;
}
/**
@ -204,69 +182,32 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
*
* @return the allele frequency estimate
*/
private AlleleFrequencyEstimate getAlleleFrequency(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
private GenotypeLocus getGenotype(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
String bases = pileup.getBases();
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
// Handle indels, but don't do anything with the result yet.
IndelLikelihood I = (CALL_INDELS) ? callIndel(context, reads, offsets) : null;
//IndelLikelihood I = (CALL_INDELS) ? callIndel(context, context.getReads(), context.getOffsets()) : null;
// Handle single-base polymorphisms.
GenotypeLikelihoods G = callGenotype(tracker, ref, pileup, reads, offsets);
//GenotypeLikelihoods G = callGenotype(tracker, ref, context);
GenotypeLikelihoods G = callGenotype(tracker);
GenotypeLocus geno = G.callGenotypes(tracker, ref, pileup);
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods, sample_name);
return geno;
}
/**
* Calls the underlying, single locus genotype of the sample
*
* @param tracker the meta data tracker
* @param ref the reference base
* @param pileup the pileup object for the given locus
* @param reads the reads that overlap this locus
* @param offsets the offsets per read that identify the base at this locus
*
* @return the likelihoods per genotype
*/
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup, List<SAMRecord> reads, List<Integer> offsets) {
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker) {
GenotypeLikelihoods G = null;
if (isHapmapSite(tracker)) {
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff);
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff, keepQ0Bases);
} else if (isDbSNPSite(tracker)) {
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff);
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff, keepQ0Bases);
} else {
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff);
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff, keepQ0Bases);
}
G.filterQ0Bases(! keepQ0Bases); // Set the filtering / keeping flag
for (int i = 0; i < reads.size(); i++) {
SAMRecord read = reads.get(i);
int offset = offsets.get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
int nBasesAdded = G.add(ref, base, qual);
if ( nBasesAdded == 0 ) {
nFilteredQ0Bases++;
//System.out.printf("Filtering Q0 base from %s at %s %d: %c %c %d%n",
// read.getReadName(), GenomeLocParser.createGenomeLoc(read), offset, ref, base, qual);
}
}
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
//if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) {
// G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
//}
G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
return G;
}
@ -312,69 +253,38 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
return tracker.lookup("dbsnp", null) != null;
}
double allele_frequency_prior = -1;
char alt_allele;
/**
* Accessor for PoolCaller to set the allele frequency prior for this sample.
*
* @param freq the allele frequency
* @param alt the alternate allele
*/
public void setAlleleFrequencyPrior(double freq, char alt) {
this.allele_frequency_prior = freq;
this.alt_allele = alt;
}
/**
* Initialize values appropriately for the reduce step.
*
* @return an empty string
*/
public String reduceInit() {
return "";
public GenotypeWriter reduceInit() {
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
}
/**
* If we've found a LOD >= 5 variant, output it to disk.
*
* @param alleleFreq an AlleleFrequencyEstimage object for the variant.
* @param sum accumulator for the reduce.
* @param call an GenotypeCall object for the variant.
* @param sum accumulator for the reduce.
*
* @return an empty string
*/
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) {
//System.out.printf("AlleleFreqEstimate %s %f %f %f%n", alleleFreq,alleleFreq.lodVsNextBest, alleleFreq.lodVsRef, LOD_THRESHOLD );
if (alleleFreq != null && (GENOTYPE ? alleleFreq.lodVsNextBest : alleleFreq.lodVsRef) >= LOD_THRESHOLD) {
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
variantsOut.println(alleleFreq.asGeliString());
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
variantsOut.println(alleleFreq.asTabularString());
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
SAMSequenceRecord rec = GenomeLocParser.getContigInfo(alleleFreq.location.getContig());
double[] likelihoods = new double[10];
for (int x = 0; x < alleleFreq.posteriors.length; x++) {
likelihoods[x] = GLFRecord.LIKELIHOOD_SCALE_FACTOR * alleleFreq.genotypeLikelihoods.likelihoods[x];
}
LikelihoodObject obj = new LikelihoodObject(likelihoods, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
this.mGenotypeWriter.addGenotypeCall(rec,(int)alleleFreq.location.getStart(),0.0f,alleleFreq.ref,alleleFreq.depth,obj);
}
public GenotypeWriter reduce(GenotypeCall call, GenotypeWriter sum) {
if (call != null && call.isVariation()) {
if ((GENOTYPE && call.getBestVrsNext().second.getScore() > LOD_THRESHOLD) ||
(call.getBestVrsRef().second.getScore() > LOD_THRESHOLD))
sum.addGenotypeCall(call);
}
return "";
return sum;
}
/** Close the variant file. */
public void onTraversalDone(String sum) {
public void onTraversalDone(GenotypeWriter sum) {
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
mGenotypeWriter.close();
} else {
this.variantsOut.close();
}
sum.close();
}
}

View File

@ -1,15 +1,17 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.genotype.ConfidenceScore;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import java.util.List;
import java.io.PrintStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintStream;
public class AlleleMetrics {
private double LOD_cutoff = 5;
@ -54,7 +56,7 @@ public class AlleleMetrics {
this.LOD_cutoff = lodThresold;
}
public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) {
public void nextPosition(GenotypeCall call, RefMetaDataTracker tracker) {
num_loci_total += 1;
boolean is_dbSNP_SNP = false;
@ -78,10 +80,10 @@ public class AlleleMetrics {
}
}
}
Pair<Genotype, ConfidenceScore> result = call.getBestVrsRef();
if (Math.abs(call.getBestVrsNext().second.getScore()) >= LOD_cutoff) { num_loci_confident += 1; }
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
if (call.isVariation() && result.second.getScore() >= LOD_cutoff)
{
// Confident variant.
@ -99,10 +101,12 @@ public class AlleleMetrics {
String hapmap_genotype = hapmap_chip_genotype.getFeature();
long refs=0, alts=0;
double hapmap_q;
String str = call.getBestVrsRef().first.getBases();
char alt = str.charAt(0);
if (str.charAt(0) == call.getReferencebase()) alt = str.charAt(1);
for (char c : hapmap_genotype.toCharArray()) {
if (c == alleleFreq.ref) { refs++; }
if (c == alleleFreq.alt) { alts++; }
if (c == call.getReferencebase()) { refs++; }
if (c == alt) { alts++; }
}
if (refs+alts > 0) {
@ -117,7 +121,7 @@ public class AlleleMetrics {
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
//System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString());
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
if (call.getBestVrsNext().second.getScore() >= LOD_cutoff) {
/*
System.out.printf("DBG %f %f %f %f\n",
@ -129,22 +133,22 @@ public class AlleleMetrics {
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
//if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) {
/*if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) {
hapmap_genotype_correct++;
}else{
hapmap_genotype_incorrect++;
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
//out.printf(" INCORRECT GENOTYPE");
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
}*/
}
if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) {
if (result.second.getScore() >= LOD_cutoff || -1 * result.second.getScore() >= LOD_cutoff) {
// Now calculate ref / var performance - did we correctly classify the site as
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
boolean hapmap_var = hapmap_q != 0.0;
boolean called_var = alleleFreq.qstar != 0.0;
boolean called_var = call.isVariation();
//if (hapmap_q != -1 && hapmap_var != called_var) {
if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && hapmap_var != called_var) {
hapmap_refvar_incorrect++;

View File

@ -1,14 +1,15 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.Utils;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.genotype.*;
import static java.lang.Math.log10;
import static java.lang.Math.pow;
import java.util.HashMap;
public class GenotypeLikelihoods {
public class GenotypeLikelihoods implements GenotypeGenerator {
// precalculate these for performance (pow/log10 is expensive!)
/**
@ -20,7 +21,7 @@ public class GenotypeLikelihoods {
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
private final boolean keepQ0Bases;
private static final double log10Of1_3 = log10(1.0 / 3);
private static final double log10Of2_3 = log10(2.0 / 3);
@ -62,7 +63,7 @@ public class GenotypeLikelihoods {
genotypes[8] = "GT";
genotypes[9] = "TT";
}
public int coverage;
public int coverage;
// The genotype priors;
private double priorHomRef;
@ -80,18 +81,19 @@ public class GenotypeLikelihoods {
public GenotypeLikelihoods() {
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
keepQ0Bases = true;
initialize(false, 1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
}
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar) {
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
keepQ0Bases = true;
initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff, boolean keepQ0Bases) {
this.keepQ0Bases = keepQ0Bases;
initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
}
@ -276,62 +278,35 @@ public class GenotypeLikelihoods {
return s;
}
public void ApplyPrior(char ref, double[] allele_likelihoods)
{
int k = 0;
for (int i = 0; i < 4; i++)
{
for (int j = i; j < 4; j++)
{
if (i == j)
{
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
}
else
{
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void ApplyPrior(char ref, double[] allele_likelihoods) {
int k = 0;
for (int i = 0; i < 4; i++) {
for (int j = i; j < 4; j++) {
if (i == j) {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
} else {
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
}
k++;
}
}
this.sort();
}
public void ApplyPrior(char ref, char alt, double p_alt) {
public void ApplyPrior(char ref) {
for (int i = 0; i < genotypes.length; i++) {
if ((p_alt == -1) || (p_alt <= 1e-6)) {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += Math.log10(priorHomRef);
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
// hom-nonref
likelihoods[i] += Math.log10(priorHomVar);
} else {
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
// hom-ref
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
} else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
// hom-nonref
likelihoods[i] += 2.0 * Math.log10(p_alt);
} else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
// het
likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
} else {
// something else (noise!)
likelihoods[i] += Math.log10(1e-5);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
// het
likelihoods[i] += Math.log10(priorHet);
}
if (Double.isInfinite(likelihoods[i])) {
likelihoods[i] = -1000;
}
}
this.sort();
@ -410,51 +385,66 @@ public class GenotypeLikelihoods {
return this.sorted_likelihoods[0];
}
public double RefPosterior(char ref)
{
this.LodVsRef(ref);
return this.ref_likelihood;
}
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors, String sample_name) {
this.sort();
double qhat = Double.NaN;
double qstar = Double.NaN;
char alt = 'N';
if ((sorted_genotypes[0].charAt(0) == ref) && (sorted_genotypes[0].charAt(1) == ref)) {
// hom-ref
qhat = 0.0;
qstar = 0.0;
alt = 'N';
} else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref)) {
// hom-nonref
qhat = 1.0;
qstar = 1.0;
alt = sorted_genotypes[0].charAt(0);
} else {
// het
qhat = 0.5;
qstar = 0.5;
if (sorted_genotypes[0].charAt(0) != ref) {
alt = sorted_genotypes[0].charAt(0);
}
if (sorted_genotypes[0].charAt(1) != ref) {
alt = sorted_genotypes[0].charAt(1);
}
}
this.LodVsRef(ref); //HACK
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
AlleleFrequencyEstimate AFE = new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name);
AFE.genotypeLikelihoods = this;
return AFE;
public double RefPosterior(char ref) {
this.LodVsRef(ref);
return this.ref_likelihood;
}
private IndelLikelihood indel_likelihood;
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
private IndelLikelihood indel_likelihood;
public void addIndelLikelihood(IndelLikelihood indel_likelihood) {
this.indel_likelihood = indel_likelihood;
}
public IndelLikelihood getIndelLikelihood() {
return this.indel_likelihood;
}
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
*
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
@Override
public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
// for calculating the rms of the mapping qualities
double squared = 0.0;
for (int i = 0; i < pileup.getReads().size(); i++) {
SAMRecord read = pileup.getReads().get(i);
squared += read.getMappingQuality() * read.getMappingQuality();
int offset = pileup.getOffsets().get(i);
char base = read.getReadString().charAt(offset);
byte qual = read.getBaseQualities()[offset];
add(ref, base, qual);
}
// save off the likelihoods
if (likelihoods == null || likelihoods.length == 0) return null;
double lklihoods[] = new double[likelihoods.length];
System.arraycopy(likelihoods, 0, lklihoods, 0, likelihoods.length);
ApplyPrior(ref);
applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
// lets setup the locus
GenotypeLocus locus = new GenotypeLocusImpl(pileup.getLocation(), pileup.getReads().size(),Math.sqrt(squared/pileup.getReads().size()));
for (int x = 0; x < this.likelihoods.length; x++) {
try {
locus.addGenotype(new Genotype(this.genotypes[x],lklihoods[x],this.likelihoods[x]));
} catch (InvalidGenotypeException e) {
throw new StingException("Invalid Genotype value",e);
}
}
return locus;
}
}

View File

@ -0,0 +1,65 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* <p/>
* Class ConfidenceScore
* <p/>
* this class represents the confidence in a genotype, and the method we used to obtain it
*/
public class ConfidenceScore implements Comparable<ConfidenceScore> {
private static final Double EPSILON = 1.0e-15;
public enum SCORE_METHOD {
BEST_NEXT, BEST_REF, OTHER;
}
private Double mScore;
private SCORE_METHOD mMethod;
public ConfidenceScore(double score, SCORE_METHOD method) {
this.mScore = score;
this.mMethod = method;
}
/**
* generate a confidence score, given the two likelihoods, and the method used
*
* @param likelihoodOne the first likelihood
* @param likelihoodTwo the second likelihood
* @param method the method used to determine the likelihood
*/
public ConfidenceScore(double likelihoodOne, double likelihoodTwo, SCORE_METHOD method) {
this.mScore = likelihoodOne / likelihoodTwo;
this.mMethod = method;
}
/**
* compare this ConfidenceScore to another, throwing an exception if they're not the same scoring method
* @param o the other confidence score if
* @return 0 if equal
*/
@Override
public int compareTo(ConfidenceScore o) {
if (o.mMethod != this.mMethod) {
throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods");
}
double diff = mScore - o.mScore;
if (Math.abs(diff) < (EPSILON * Math.abs(mScore)))
return 0;
else if (diff < 0)
return -1;
else
return 1;
}
public Double getScore() {
return mScore;
}
public SCORE_METHOD getMethod() {
return mMethod;
}
}

View File

@ -0,0 +1,113 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* <p/>
* Class GenotypeLikelihood
* <p/>
* This class encompasses all the information that is associated with a genotype
* and it's likelihood, mainly:
* <p/>
* Likelihood value
*/
public class Genotype {
private double mLikelihood = 0.0;
private double mPosteriorProb = 0.0;
private String mBases = "";
private int mPloidy = 2; // assume diploid
/**
* construct a genotypeLikelihood, given the bases, the posterior, and the likelihood
*
* @param bases the bases that make up this genotype
* @param posterior the posterior probability of this genotype
* @param likelihood the likelihood of this genotype
* @param ploidy the ploidy of this genotype
*/
public Genotype(String bases, double posterior, double likelihood, int ploidy) {
this.mPloidy = ploidy;
if (bases.length() != ploidy) {
throw new IllegalArgumentException("The number of bases should match the ploidy");
}
this.mLikelihood = likelihood;
this.mBases = bases;
this.mPosteriorProb = posterior;
}
/**
* construct a genotypeLikelihood, given the bases, the posterior, and the likelihood
*
* @param bases the bases that make up this genotype
* @param posterior the posterior probability of this genotype
* @param likelihood the likelihood of this genotype
*/
public Genotype(String bases, double posterior, double likelihood) {
if (bases.length() != mPloidy) {
throw new IllegalArgumentException("The number of bases should match the ploidy");
}
this.mLikelihood = likelihood;
this.mBases = bases;
this.mPosteriorProb = posterior;
}
/**
* get the likelihood value
*
* @return a double, representing the likelihood
*/
public double getLikelihood() {
return mLikelihood;
}
/**
* get the posterior value
*
* @return a double, representing the posterior
*/
public double getPosteriorProb() {
return mPosteriorProb;
}
/**
* get the bases that represent this
*
* @return
*/
public String getBases() {
return mBases;
}
public int getPloidy() {
return mPloidy;
}
/**
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
*
* @return
*/
public boolean isHom() {
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
char first = mBases.charAt(0);
for (char c: mBases.toCharArray()) {
if (c != first) return false;
}
return true;
}
/**
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
*
* @return
*/
public boolean isHet() {
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
char first = mBases.charAt(0);
for (char c: mBases.toCharArray()) {
if (c != first) return true;
}
return false;
}
}

View File

@ -0,0 +1,52 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.Pair;
/**
* @author aaron
* <p/>
* Interface GenotypeCall
* <p/>
* Genotype call interface, for indicating that a genotype is
* also a genotype call.
*/
public interface GenotypeCall extends GenotypeLocus{
/**
* get the confidence
*
* @return a ConfidenceScore representing the LOD score that this genotype was called with
*/
public ConfidenceScore getConfidence();
/**
* gets the reference base
*
* @return the reference base we represent
*/
public char getReferencebase();
/**
* get the best vrs the next best genotype LOD score
* @return the genotype, and a LOD for best - next
*/
public Pair<Genotype,ConfidenceScore> getBestVrsNext();
/**
* get the best vrs the reference allele.
* @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked
* with is variation
*/
public Pair<Genotype,ConfidenceScore> getBestVrsRef();
/**
* check to see if this call is a variant, i.e. not homozygous reference
* @return true if it's not hom ref, false otherwise
*/
public boolean isVariation();
/**
* return genotype locus, with our data
*/
public GenotypeLocus toGenotypeLocus();
}

View File

@ -0,0 +1,225 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.Pair;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
/**
* @author aaron
* <p/>
* Class GenotypeCallImpl
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class GenotypeCallImpl implements GenotypeCall {
// our stored genotype locus
private final GenotypeLocus mLocus;
private final char mRefBase;
private final ConfidenceScore mScore;
/**
* generate a GenotypeCall object with the specified locus info and reference base
*
* @param mLocus the locus
* @param mRefBase the reference base to use
*/
public GenotypeCallImpl(GenotypeLocus mLocus, char mRefBase, ConfidenceScore mScore) {
if (mLocus.getGenotypes().size() < 1) throw new StingException("Genotype Locus is empty");
this.mLocus = mLocus;
this.mRefBase = String.valueOf(mRefBase).toUpperCase().charAt(0);
this.mScore = mScore;
}
/**
* get the confidence
*
* @return a ConfidenceScore representing the LOD score that this genotype was called with
*/
@Override
public ConfidenceScore getConfidence() {
return mScore;
}
/**
* gets the reference base
*
* @return the reference base we represent
*/
@Override
public char getReferencebase() {
return mRefBase;
}
/**
* get the best vrs the next best genotype LOD score
*
* @return the genotype, and a LOD for best - next
*/
@Override
public Pair<Genotype, ConfidenceScore> getBestVrsNext() {
List<Genotype> genos = this.mLocus.getGenotypes();
if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes");
return new Pair<Genotype, ConfidenceScore>(genos.get(0),
new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - genos.get(1).getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT));
}
/**
* get the best vrs the reference allele.
*
* @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked
* with is variation
*/
@Override
public Pair<Genotype, ConfidenceScore> getBestVrsRef() {
List<Genotype> genos = this.mLocus.getGenotypes();
// find the reference allele
String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase();
Genotype refGenotype = findRefGenotype(ref, genos);
if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes");
return new Pair<Genotype, ConfidenceScore>(genos.get(0),
new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - refGenotype.getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT));
}
/**
* get the reference genotype object
*
* @param ref the reference as a ploidy count homozygous string
* @param genos the genotype list
*
* @return a genotype for the
*/
private static Genotype findRefGenotype(String ref, List<Genotype> genos) {
Genotype refGenotype = null;
for (Genotype g : genos) {
if (g.getBases().equals(ref)) refGenotype = g;
}
if (refGenotype == null) {
for (Genotype g : genos) {
System.err.println(g.getBases());
}
throw new StingException("Unable to find the reference genotype + " + ref + " size of genotype list = " + genos.size());
}
return refGenotype;
}
/**
* check to see if this call is a variant, i.e. not homozygous reference
*
* @return true if it's not hom ref, false otherwise
*/
@Override
public boolean isVariation() {
List<Genotype> genos = this.mLocus.getGenotypes();
String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase();
return !(genos.get(0).getBases().equals(ref));
}
/** return genotype locus, with our data */
@Override
public GenotypeLocus toGenotypeLocus() {
return mLocus;
}
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
@Override
public GenomeLoc getLocation() {
return mLocus.getLocation();
}
/**
* get the ploidy at this locus
*
* @return an integer representing the genotype ploidy at this location
*/
@Override
public int getPloidy() {
return mLocus.getPloidy();
}
/**
* get the genotypes, sorted in asscending order by their likelihoods (the best
* to the worst likelihoods)
*
* @return a list of the likelihoods
*/
@Override
public List<Genotype> getGenotypes() {
return mLocus.getGenotypes();
}
/**
* get the genotypes and their posteriors
*
* @return a list of the poseriors
*/
@Override
public List<Genotype> getPosteriors() {
return mLocus.getPosteriors();
}
/**
* get the genotypes sorted lexigraphically
*
* @return a list of the genotypes sorted lexi
*/
@Override
public List<Genotype> getLexigraphicallySortedGenotypes() {
return mLocus.getLexigraphicallySortedGenotypes();
}
/**
* get the read depth at this position
*
* @return the read depth, -1 if it is unknown
*/
@Override
public int getReadDepth() {
return mLocus.getReadDepth();
}
/**
* add a genotype to the collection
*
* @param genotype
*
* @throws InvalidGenotypeException
*/
@Override
public void addGenotype(Genotype genotype) throws InvalidGenotypeException {
mLocus.addGenotype(genotype);
}
/**
* get the root mean square (RMS) of the mapping qualities
*
* @return the RMS, or a value < 0 if it's not available
*/
@Override
public double getRMSMappingQuals() {
return mLocus.getRMSMappingQuals();
}
/**
* create a variant, given the reference, and a lod score
*
* @param refBase the reference base
* @param score the threshold to use to determine if it's a variant or not
*
* @return a variant object, or null if no genotypes meet the criteria
*/
@Override
public Variant toGenotypeCall(char refBase, ConfidenceScore score) {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
}

View File

@ -0,0 +1,26 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.ReadBackedPileup;
/**
*
* @author aaron
*
* Class GenotypeFactory
*
* Create genotypes, given certain pileup information
*/
public interface GenotypeGenerator {
/**
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
*
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
* @param ref the reference base
* @param pileup a pileup of the reads, containing the reads and their offsets
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
*/
public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup);
}

View File

@ -0,0 +1,132 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.List;
import java.util.Comparator;
/**
* @author aaron
* <p/>
* Interface Genotype
* <p/>
* This interface represents the collection of genotypes at a specific locus
*/
public interface GenotypeLocus {
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
public GenomeLoc getLocation();
/**
* get the ploidy at this locus
*
* @return an integer representing the genotype ploidy at this location
*/
public int getPloidy();
/**
* get the genotypes, sorted in asscending order by their likelihoods (the best
* to the worst likelihoods)
*
* @return a list of the genotypes, sorted by likelihoods
*/
public List<Genotype> getGenotypes();
/**
* get the genotypes and their posteriors
*
* @return a list of the genotypes, sorted by poseriors
*/
public List<Genotype> getPosteriors();
/**
* get the genotypes sorted lexigraphically
*
* @return a list of the genotypes sorted lexi
*/
public List<Genotype> getLexigraphicallySortedGenotypes();
/**
* get the read depth at this position
*
* @return the read depth, -1 if it is unknown
*/
public int getReadDepth();
/**
* add a genotype to the collection
*
* @param genotype
*
* @throws InvalidGenotypeException
*/
public void addGenotype(Genotype genotype) throws InvalidGenotypeException;
/**
* get the root mean square (RMS) of the mapping qualities
*
* @return the RMS, or a value < 0 if it's not available
*/
public double getRMSMappingQuals();
/**
* create a variant, given the reference, and a lod score
*
* @param refBase the reference base
* @param score the threshold to use to determine if it's a variant or not
*
* @return a variant object, or null if no genotypes meet the criteria
*/
public Variant toGenotypeCall(char refBase, ConfidenceScore score);
}
/**
* the following are helper Comparator classes for the above sort orders, that may be useful
* for anyone implementing the GenotypeLocus interface
*/
class PosteriorComparator implements Comparator<Genotype> {
private final Double EPSILON = 1.0e-15;
@Override
public int compare(Genotype genotype, Genotype genotype1) {
double diff = genotype.getPosteriorProb() - genotype1.getPosteriorProb();
if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getPosteriorProb())))
return 0;
else if (diff < 0)
return 1;
else
return -1; // TODO: BACKWARD NOW
}
}
class LexigraphicalComparator implements Comparator<Genotype> {
private final Double EPSILON = 1.0e-15;
@Override
public int compare(Genotype genotype, Genotype genotype1) {
return genotype.getBases().compareTo(genotype1.getBases());
}
}
class LikelihoodComparator implements Comparator<Genotype> {
private final Double EPSILON = 1.0e-15;
@Override
public int compare(Genotype genotype, Genotype genotype1) {
double diff = genotype.getLikelihood() - genotype1.getLikelihood();
if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getLikelihood())))
return 0;
else if (diff < 0)
return 1; // TODO: BACKWARD NOW
else
return -1;
}
}

View File

@ -0,0 +1,133 @@
package org.broadinstitute.sting.utils.genotype;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* @author aaron
* <p/>
* Class GenotypeBucket
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class GenotypeLocusImpl implements GenotypeLocus {
private final List<Genotype> mGenotypes = new ArrayList<Genotype>();
private GenomeLoc mLocation = null;
private int mReadDepth = -1;
private double mRMSMappingQual = -1;
public GenotypeLocusImpl(GenomeLoc location, int readDepth, double rmsMappingQual) {
this.mLocation = location;
mReadDepth = readDepth;
mRMSMappingQual = rmsMappingQual;
}
/**
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
* is located right <i>after</i> the specified location
*
* @return position on the genome wrapped in GenomeLoc object
*/
@Override
public GenomeLoc getLocation() {
return mLocation;
}
/**
* get the ploidy at this locus
*
* @return an integer representing the genotype ploidy at this location
*/
@Override
public int getPloidy() {
return 2;
}
/**
* get the genotypes, sorted in asscending order by their likelihoods (the best
* to the worst likelihoods)
*
* @return a list of the likelihoods
*/
@Override
public List<Genotype> getGenotypes() {
Collections.sort(this.mGenotypes, new LikelihoodComparator());
return this.mGenotypes;
}
/**
* get the genotypes and their posteriors
*
* @return a list of the poseriors
*/
@Override
public List<Genotype> getPosteriors() {
Collections.sort(this.mGenotypes, new PosteriorComparator());
return this.mGenotypes;
}
/**
* get the genotypes sorted lexigraphically
*
* @return a list of the genotypes sorted lexi
*/
@Override
public List<Genotype> getLexigraphicallySortedGenotypes() {
Collections.sort(this.mGenotypes, new LexigraphicalComparator());
return this.mGenotypes;
}
/**
* get the read depth at this position
*
* @return the read depth, -1 if it is unknown
*/
@Override
public int getReadDepth() {
return mReadDepth;
}
/**
* add a genotype to the collection
*
* @param genotype
*
* @throws InvalidGenotypeException
*/
@Override
public void addGenotype(Genotype genotype) throws InvalidGenotypeException {
this.mGenotypes.add(genotype);
}
/**
* get the root mean square (RMS) of the mapping qualities
*
* @return the RMS, or a value < 0 if it's not available
*/
@Override
public double getRMSMappingQuals() {
return mRMSMappingQual;
}
/**
* create a variant, given the reference, and a lod score
*
* @param refBase the reference base
* @param score the threshold to use to determine if it's a variant or not
*
* @return a variant object, or null if no genotypes meet the criteria
*/
@Override
public Variant toGenotypeCall(char refBase, ConfidenceScore score) {
return null; //To change body of implemented methods use File | Settings | File Templates.
}
}

View File

@ -1,7 +1,5 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMSequenceRecord;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -37,50 +35,17 @@ import net.sf.samtools.SAMSequenceRecord;
public interface GenotypeWriter {
/**
* add a single point genotype call to the
*
* @param contig the contig you're calling in
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
* Add a genotype, given a genotype locus
* @param locus the locus to add
*/
public void addGenotypeCall(SAMSequenceRecord contig,
int position,
float rmsMapQuals,
char referenceBase,
int readDepth,
LikelihoodObject likelihoods);
/**
* add a variable length call to the genotyper
*
* @param contig the contig you're calling in
* @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth
* @param refBase the reference base
* @param firstHomZyg the first homozygous indel
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
public void addVariableLengthCall(SAMSequenceRecord contig,
int position,
float rmsMapQuals,
int readDepth,
char refBase,
IndelLikelihood firstHomZyg,
IndelLikelihood secondHomZyg,
byte hetLikelihood);
public void addGenotypeCall(GenotypeCall locus);
/**
* add a no call to the genotype file, if supported.
*
* @param position
* @param readDepth
* @param position the position to add the no call at
*/
public void addNoCall(int position,
int readDepth);
public void addNoCall(int position);
/** finish writing, closing any open files. */
public void close();

View File

@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import java.io.File;
@ -17,7 +19,7 @@ import java.io.File;
public class GenotypeWriterFactory {
/** available genotype writers */
public enum GENOTYPE_FORMAT {
GELI, GLF, GFF, TABULAR;
GELI, GLF, GFF, TABULAR, GELI_BINARY;
}
/**
@ -32,6 +34,8 @@ public class GenotypeWriterFactory {
case GLF:
return new GLFWriter(header.toString(), destination);
case GELI:
return new GeliTextWriter(destination);
case GELI_BINARY:
return new GeliAdapter(destination, header);
default:
throw new StingException("Genotype writer " + format.toString() + " is not implemented");

View File

@ -0,0 +1,20 @@
package org.broadinstitute.sting.utils.genotype;
/**
*
* @author aaron
*
* Class GenotypeException
*
* This exception is thrown when a genotype call is passed in that cannot be processed, i.e. invalid.
*/
public class InvalidGenotypeException extends Exception {
public InvalidGenotypeException(String msg) {
super(msg);
}
public InvalidGenotypeException(String message, Throwable throwable) {
super(message, throwable);
}
}

View File

@ -1,6 +1,5 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.StingException;
import java.io.File;
@ -37,55 +36,33 @@ public class TabularLFWriter implements GenotypeWriter {
}
/**
* add a single point genotype call to the
* Add a genotype, given a genotype locus
*
* @param contig the contig you're calling in
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) {
/**return String.format("%s %s %c %c %s %f %f %f %f %d %s",
location,
contig.getSpecies(),
ref,
alt,
public void addGenotypeCall(GenotypeCall locus) {
/*outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
locus.getLocation(),
"NOT OUTPUTED",
locus.getReferencebase(),
locus.getGenotypes().get(1).getBases().,
genotype(),
qhat,
qstar,
lodVsRef,
lodVsNextBest,
depth,
bases);*/
}
/**
* add a variable length call to the genotyper
*
* @param contig the contig you're calling in
* @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth
* @param refBase the reference base
* @param firstHomZyg the first homozygous indel
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
@Override
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
throw new StingException("TabularLFWriter doesn't support variable length calls");
bases); */
}
/**
* add a no call to the genotype file, if supported.
*
* @param position
* @param readDepth
*/
@Override
public void addNoCall(int position, int readDepth) {
public void addNoCall(int position) {
throw new StingException("TabularLFWriter doesn't support no-calls");
}

View File

@ -0,0 +1,11 @@
package org.broadinstitute.sting.utils.genotype;
/**
* @author aaron
* <p/>
* Interface Variant
* <p/>
* This class represents a variant
*/
public interface Variant {
}

View File

@ -1,8 +1,12 @@
package org.broadinstitute.sting.utils.genotype;
package org.broadinstitute.sting.utils.genotype.geli;
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import java.io.File;
@ -65,7 +69,6 @@ public class GeliAdapter implements GenotypeWriter {
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
*/
@Override
public void addGenotypeCall(SAMSequenceRecord contig,
int position,
float rmsMapQuals,
@ -87,19 +90,27 @@ public class GeliAdapter implements GenotypeWriter {
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
@Override
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
}
/**
* Add a genotype, given a genotype locus
*
* @param locus the locus to add
*/
@Override
public void addGenotypeCall(GenotypeCall locus) {
//To change body of implemented methods use File | Settings | File Templates.
}
/**
* add a no call to the genotype file, if supported.
*
* @param position
* @param readDepth
*/
@Override
public void addNoCall(int position, int readDepth) {
public void addNoCall(int position) {
throw new UnsupportedOperationException("Geli format does not support no-calls");
}

View File

@ -0,0 +1,93 @@
package org.broadinstitute.sting.utils.genotype.geli;
import org.broadinstitute.sting.utils.StingException;
import org.broadinstitute.sting.utils.genotype.Genotype;
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.List;
/**
*
* @author aaron
*
* Class GeliTextWriter
*
* A descriptions should go here. Blame aaron if it's missing.
*/
public class GeliTextWriter implements GenotypeWriter {
// where we write to
PrintWriter mWriter;
/**
* create a geli text writer
* @param file
*/
public GeliTextWriter(File file) {
try {
mWriter = new PrintWriter(file);
} catch (FileNotFoundException e) {
throw new StingException("Unable to open file " + file.toURI());
}
mWriter.println("#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT");
}
/**
* Add a genotype, given a genotype locus
*
* @param locus the locus to add
*/
public void addGenotypeCall(GenotypeCall locus) {
if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)");
// this is to perserve the format string that we used to use
double[] likelihoods = new double[10];
int index = 0;
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
for (Genotype G: lt) {
likelihoods[index] = G.getLikelihood();
index++;
}
mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
locus.getLocation().getContig(),
locus.getLocation().getStart(),
locus.getReferencebase(),
locus.getReadDepth(),
-1,
locus.getGenotypes().get(0).getBases(),
locus.getBestVrsRef().second.getScore(),
locus.getBestVrsNext().second.getScore(),
likelihoods[0],
likelihoods[1],
likelihoods[2],
likelihoods[3],
likelihoods[4],
likelihoods[5],
likelihoods[6],
likelihoods[7],
likelihoods[8],
likelihoods[9]));
}
/**
* add a no call to the genotype file, if supported.
*
* @param position the position to add the no call at
*/
@Override
public void addNoCall(int position) {
throw new UnsupportedOperationException("Geli text format doesn't support a no-call call.");
}
/** finish writing, closing any open files. */
@Override
public void close() {
mWriter.close();
}
}

View File

@ -1,66 +0,0 @@
package org.broadinstitute.sting.utils.genotype.gff;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import net.sf.samtools.SAMSequenceRecord;
/**
* @author aaron
* <p/>
* Class GFFWriter
* <p/>
* A descriptions should go here. Blame aaron if it's missing.
*/
public class GFFWriter implements GenotypeWriter {
/**
* add a single point genotype call to the file
*
* @param contig the contig you're calling in
* @param position the position on the contig
* @param referenceBase the reference base
* @param readDepth the read depth at the specified position
* @param likelihoods the likelihoods of each of the possible alleles
*/
@Override
public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) {
//To change body of implemented methods use File | Settings | File Templates.
}
/**
* add a variable length call to the genotype file
*
* @param contig the contig you're calling in
* @param position the position on the genome
* @param rmsMapQuals the root mean square of the mapping qualities
* @param readDepth the read depth
* @param refBase the reference base
* @param firstHomZyg the first homozygous indel
* @param secondHomZyg the second homozygous indel (if present, null if not)
* @param hetLikelihood the heterozygous likelihood
*/
@Override
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
//To change body of implemented methods use File | Settings | File Templates.
}
/**
* add a no call to the genotype file, if supported.
*
* @param position the position
* @param readDepth the read depth
*/
@Override
public void addNoCall(int position, int readDepth) {
//To change body of implemented methods use File | Settings | File Templates.
}
/** finish writing, closing any open files. */
@Override
public void close() {
//To change body of implemented methods use File | Settings | File Templates.
}
}

View File

@ -1,14 +1,14 @@
package org.broadinstitute.sting.utils.genotype.glf;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.util.BinaryCodec;
import net.sf.samtools.util.BlockCompressedOutputStream;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.*;
import java.io.DataOutputStream;
import java.io.File;
import java.util.List;
/*
* Copyright (c) 2009 The Broad Institute
*
@ -80,7 +80,6 @@ public class GLFWriter implements GenotypeWriter {
* @param rmsMapQ the root mean square of the mapping quality
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
*/
@Override
public void addGenotypeCall(SAMSequenceRecord contig,
int genomicLoc,
float rmsMapQ,
@ -100,6 +99,27 @@ public class GLFWriter implements GenotypeWriter {
call.write(this.outputBinaryCodec);
}
/**
* Add a genotype, given a genotype locus
*
* @param locus
*/
@Override
public void addGenotypeCall(GenotypeCall locus) {
//TODO: CODEWORD
// this is to perserve the format string that we used to use
double[] posteriors = new double[10];
int index = 0;
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
for (Genotype G: lt) {
posteriors[index] = G.getLikelihood();
index++;
}
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRMSMappingQuals(),locus.getReferencebase(),locus.getReadDepth(),obj);
}
/**
* add a variable length (indel, deletion, etc) to the genotype writer
*
@ -112,7 +132,6 @@ public class GLFWriter implements GenotypeWriter {
* @param secondHomZyg the second homozygous call
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
*/
@Override
public void addVariableLengthCall(SAMSequenceRecord contig,
int genomicLoc,
float rmsMapQ,
@ -145,10 +164,10 @@ public class GLFWriter implements GenotypeWriter {
* add a no call to the genotype file, if supported.
*
* @param position the position
* @param readDepth the read depth
*
*/
@Override
public void addNoCall(int position, int readDepth) {
public void addNoCall(int position) {
// glf doesn't support this operation
throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
}

View File

@ -5,9 +5,7 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
import org.junit.Assert;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
@ -190,7 +188,7 @@ public class RodGLFTest extends BaseTest {
/**
* create the example glf file for the test, you can uncomment the above test line to have this
* test run, regenerating the file.
*/
*
public void createRodFile() {
GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf"));
int location = 1;
@ -199,7 +197,7 @@ public class RodGLFTest extends BaseTest {
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T'));
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C'));
writer.close();
}
}*/
/**
* create a likelihood object, given the appropriate reference base

View File

@ -1,73 +0,0 @@
package org.broadinstitute.sting.utils.genotype;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
import org.junit.Test;
import java.io.File;
/*
* Copyright (c) 2009 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
/**
*
* @author aaron
*
* Class GeliAdapterTest
*
* Tests the GeliAdapter class
*/
public class GeliAdapterTest extends BaseTest {
// private our Geli adapter
private GenotypeWriter adapter = null;
/**
* test out the likelihood object
*/
@Test
public void test1() {
File fl = new File("testFile.txt");
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
adapter = new GeliAdapter(fl,header);
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
SAMSequenceRecord rec = new SAMSequenceRecord("chr1",10);
adapter.addGenotypeCall(rec,100,100,'A',100,obj);
adapter.close();
}
public double[] createFakeLikelihoods() {
double ret[] = new double[10];
for (int x = 0; x < 10; x++) {
ret[x] = (double)(10.0-x) * 10.0;
}
return ret;
}
}