Adding the intial changes for the new Genotyping interface. The bullet points are:
- SSG is much simpler now - GeliText has been added as a GenotypeWriter - AlleleFrequencyWalker will be deleted when I untangle the AlleleMetric's dependance on it - GenotypeLikelihoods now implements GenotypeGenerator, but could still use cleanup There is still a lot more work to do, but this is a good initial check-in. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1335 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c5c11d5d1c
commit
bca894ebce
|
|
@ -1,22 +1,21 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
import java.io.PrintStream;
|
||||
import java.io.DataInputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate, String>// implements AllelicVariant
|
||||
{
|
||||
|
|
@ -140,7 +139,7 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
logger.debug(String.format(" => result is %s", alleleFreq));
|
||||
|
||||
if (LOG_METRICS) metrics.nextPosition(alleleFreq, tracker);
|
||||
//if (LOG_METRICS) metrics.nextPosition(alleleFreq, tracker);
|
||||
|
||||
return alleleFreq;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,19 +1,21 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.ListUtils;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -49,7 +51,7 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
System.exit(-1);
|
||||
}
|
||||
|
||||
String header = GELI_OUTPUT_FORMAT ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader();
|
||||
String header = "#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT";
|
||||
variantsOut.println("DownsampledCoverage\tAvailableCoverage\tHapmapChipGenotype\tGenotypeCallType\t"+header.substring(1));
|
||||
}
|
||||
|
||||
|
|
@ -86,10 +88,11 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
List<Integer> sub_offsets = ListUtils.sliceListByIndices(subset_indices, offsets);
|
||||
|
||||
LocusContext subContext = new LocusContext(context.getLocation(), sub_reads, sub_offsets);
|
||||
AlleleFrequencyEstimate alleleFreq = SSG.map(tracker, ref, subContext);
|
||||
GenotypeCall call = SSG.map(tracker, ref, subContext);
|
||||
|
||||
if (alleleFreq != null) {
|
||||
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+alleleFreq.callType()+"\t"+alleleFreq.asGeliString());
|
||||
String callType = (call.isVariation()) ? ((call.getBestVrsRef().first.isHom()) ? "HomozygousSNP" : "HeterozygousSNP") : "HomozygousReference";
|
||||
if (call != null) {
|
||||
GenotypeCalls.add(coverage+"\t"+coverage_available+"\t"+hc_genotype+"\t"+callType+"\t"+toGeliString(call));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -110,6 +113,41 @@ public class CoverageEvalWalker extends LocusWalker<List<String>, String> {
|
|||
|
||||
return "";
|
||||
}
|
||||
|
||||
// a method to support getting the geli string, since the AlleleFrequencyEstimate is going away
|
||||
public String toGeliString (GenotypeCall locus) {
|
||||
if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)");
|
||||
|
||||
|
||||
// this is to perserve the format string that we used to use
|
||||
double[] likelihoods = new double[10];
|
||||
int index = 0;
|
||||
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
|
||||
for (Genotype G: lt) {
|
||||
likelihoods[index] = G.getLikelihood();
|
||||
index++;
|
||||
}
|
||||
|
||||
return String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
locus.getLocation().getContig(),
|
||||
locus.getLocation().getStart(),
|
||||
locus.getReferencebase(),
|
||||
locus.getReadDepth(),
|
||||
-1,
|
||||
locus.getGenotypes().get(0).getBases(),
|
||||
locus.getBestVrsRef().second.getScore(),
|
||||
locus.getBestVrsNext().second.getScore(),
|
||||
likelihoods[0],
|
||||
likelihoods[1],
|
||||
likelihoods[2],
|
||||
likelihoods[3],
|
||||
likelihoods[4],
|
||||
likelihoods[5],
|
||||
likelihoods[6],
|
||||
likelihoods[7],
|
||||
likelihoods[8],
|
||||
likelihoods[9]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,124 +0,0 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class LikelihoodWalker
|
||||
*
|
||||
* a simple walker to calculate the genotype likelihoods
|
||||
*/
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class GenotypeLikelihoodsWalker extends LocusWalker<LikelihoodWrapper, GenotypeWriter> {
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
||||
|
||||
|
||||
@Override
|
||||
public LikelihoodWrapper map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
String bases = pileup.getBases();
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
double rmsSum = 0.0;
|
||||
// Handle single-base polymorphisms.
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
rmsSum += (read.getMappingQuality() * read.getMappingQuality());
|
||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||
}
|
||||
|
||||
// our return
|
||||
LikelihoodWrapper wrap = new LikelihoodWrapper();
|
||||
|
||||
return wrapLikelihoods(ref, context, reads, rmsSum, G, wrap);
|
||||
}
|
||||
|
||||
/**
|
||||
* wrap the likelihood values in with the other data we'll need
|
||||
* @param ref the ref base
|
||||
* @param context the locus context
|
||||
* @param reads the reads
|
||||
* @param rmsSum the rms square total (not the actual rms yet)
|
||||
* @param g the genotypeLikelihoods
|
||||
* @param wrap the object to place the values into
|
||||
* @return a likelihood wrapper
|
||||
*/
|
||||
private LikelihoodWrapper wrapLikelihoods(char ref, LocusContext context, List<SAMRecord> reads, double rmsSum, GenotypeLikelihoods g, LikelihoodWrapper wrap) {
|
||||
wrap.obj = new LikelihoodObject();
|
||||
wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
for (int x = 0; x < GenotypeLikelihoods.genotypes.length; x++) {
|
||||
wrap.obj.setLikelihood(LikelihoodObject.GENOTYPE.valueOf(GenotypeLikelihoods.genotypes[x]), g.likelihoods[x]*10.0);
|
||||
}
|
||||
wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||
wrap.loc = GenomeLocParser.getContigInfo(context.getContig());
|
||||
wrap.readDepth = reads.size();
|
||||
float rms = (float)(Math.sqrt(rmsSum/reads.size()));
|
||||
wrap.rms = (rms > 255) ? 255 : rms;
|
||||
wrap.ref = ref;
|
||||
wrap.position = context.getLocation().getStart();
|
||||
return wrap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public GenotypeWriter reduceInit() {
|
||||
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(),VARIANTS_FILE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
*
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public GenotypeWriter reduce(LikelihoodWrapper value, GenotypeWriter sum) {
|
||||
|
||||
sum.addGenotypeCall(value.loc,(int)value.position,value.rms,value.ref,value.readDepth,value.obj);
|
||||
return sum;
|
||||
}
|
||||
|
||||
/** Close the variant file. */
|
||||
public void onTraversalDone(GenotypeWriter result) {
|
||||
result.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class LikelihoodWrapper {
|
||||
public LikelihoodObject obj;
|
||||
public SAMSequenceRecord loc;
|
||||
public long position;
|
||||
public float rms;
|
||||
public char ref;
|
||||
public int readDepth;
|
||||
}
|
||||
|
|
@ -2,32 +2,27 @@ package org.broadinstitute.sting.playground.gatk.walkers;
|
|||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.playground.utils.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.BasicPileup;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, String> {
|
||||
public class SingleSampleGenotyper extends LocusWalker<GenotypeCall, GenotypeWriter> {
|
||||
// Control output settings
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
|
||||
|
|
@ -35,11 +30,9 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
// Control what goes into the variants file and what format that file should have
|
||||
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)public Double LOD_THRESHOLD = Double.MIN_VALUE;
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false)
|
||||
public boolean GENOTYPE = false;
|
||||
@Argument(fullName = "genotype", shortName = "genotype", doc = "Should we output confidient genotypes or just the variants?", required = false) public boolean GENOTYPE = false;
|
||||
|
||||
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false)
|
||||
public boolean THREE_BASE_ERRORS = false;
|
||||
@Argument(fullName = "3BaseErrors", shortName = "3BaseErrors", doc = "Should we use a 3-base error mode (so that P(b_true != b_called | e) == e / 3?", required = false) public boolean THREE_BASE_ERRORS = false;
|
||||
|
||||
// Control periodic reporting features
|
||||
@Argument(fullName = "metrics_interval", shortName = "metint", doc = "Number of loci to process between metrics reports", required = false) public Integer METRICS_INTERVAL = 50000;
|
||||
|
|
@ -63,9 +56,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
public boolean keepQ0Bases = false;
|
||||
|
||||
public AlleleMetrics metricsOut;
|
||||
public PrintStream variantsOut;
|
||||
public String sampleName;
|
||||
private GenotypeWriter mGenotypeWriter;
|
||||
|
||||
public double[] plocus;
|
||||
public double[] phapmap;
|
||||
|
|
@ -118,23 +109,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
/** Initialize the walker with some sensible defaults */
|
||||
public void initialize() {
|
||||
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
||||
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||
mGenotypeWriter = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GLF, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
|
||||
} else {
|
||||
try {
|
||||
variantsOut = new PrintStream(VARIANTS_FILE);
|
||||
} catch (FileNotFoundException e) {
|
||||
err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.\n", VARIANTS_FILE.getAbsolutePath());
|
||||
System.exit(-1);
|
||||
}
|
||||
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
|
||||
variantsOut.println(AlleleFrequencyEstimate.geliHeaderString());
|
||||
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
|
||||
variantsOut.println(AlleleFrequencyEstimate.asTabularStringHeader());
|
||||
} else {
|
||||
throw new StingException("Unsupported single sample genotyper output format: " + this.VAR_FORMAT.toString());
|
||||
}
|
||||
}
|
||||
|
||||
plocus = priorsArray(PRIORS_ANY_LOCUS);
|
||||
phapmap = priorsArray(PRIORS_HAPMAP);
|
||||
pdbsnp = priorsArray(PRIORS_DBSNP);
|
||||
|
|
@ -151,19 +126,22 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
*
|
||||
* @return an AlleleFrequencyEstimate object
|
||||
*/
|
||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
public GenotypeCall map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
rationalizeSampleName(context.getReads().get(0));
|
||||
|
||||
AlleleFrequencyEstimate freq = getAlleleFrequency(tracker, Character.toUpperCase(ref), context, sampleName);
|
||||
|
||||
if (freq != null) {
|
||||
metricsOut.nextPosition(freq, tracker);
|
||||
if (context.getLocation().getStart() == 73) {
|
||||
int stop = 1;
|
||||
}
|
||||
GenotypeLocus genotype = getGenotype(tracker, Character.toUpperCase(ref), context, sampleName);
|
||||
GenotypeCall call = null;
|
||||
if (genotype != null) {
|
||||
call = new GenotypeCallImpl(genotype, ref,
|
||||
new ConfidenceScore(this.LOD_THRESHOLD, (GENOTYPE ? ConfidenceScore.SCORE_METHOD.BEST_NEXT : ConfidenceScore.SCORE_METHOD.BEST_REF)));
|
||||
metricsOut.nextPosition(call, tracker);
|
||||
}
|
||||
if (!SUPPRESS_METRICS) {
|
||||
metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL);
|
||||
}
|
||||
|
||||
return freq;
|
||||
return call;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -204,69 +182,32 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
*
|
||||
* @return the allele frequency estimate
|
||||
*/
|
||||
private AlleleFrequencyEstimate getAlleleFrequency(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
|
||||
private GenotypeLocus getGenotype(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
String bases = pileup.getBases();
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
|
||||
// Handle indels, but don't do anything with the result yet.
|
||||
IndelLikelihood I = (CALL_INDELS) ? callIndel(context, reads, offsets) : null;
|
||||
//IndelLikelihood I = (CALL_INDELS) ? callIndel(context, context.getReads(), context.getOffsets()) : null;
|
||||
|
||||
// Handle single-base polymorphisms.
|
||||
GenotypeLikelihoods G = callGenotype(tracker, ref, pileup, reads, offsets);
|
||||
//GenotypeLikelihoods G = callGenotype(tracker, ref, context);
|
||||
GenotypeLikelihoods G = callGenotype(tracker);
|
||||
GenotypeLocus geno = G.callGenotypes(tracker, ref, pileup);
|
||||
|
||||
return G.toAlleleFrequencyEstimate(context.getLocation(), ref, bases.length(), bases, G.likelihoods, sample_name);
|
||||
return geno;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the underlying, single locus genotype of the sample
|
||||
*
|
||||
* @param tracker the meta data tracker
|
||||
* @param ref the reference base
|
||||
* @param pileup the pileup object for the given locus
|
||||
* @param reads the reads that overlap this locus
|
||||
* @param offsets the offsets per read that identify the base at this locus
|
||||
*
|
||||
* @return the likelihoods per genotype
|
||||
*/
|
||||
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup, List<SAMRecord> reads, List<Integer> offsets) {
|
||||
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker) {
|
||||
GenotypeLikelihoods G = null;
|
||||
|
||||
if (isHapmapSite(tracker)) {
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff);
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, phapmap[0], phapmap[1], phapmap[2], p2ndon, p2ndoff, keepQ0Bases);
|
||||
} else if (isDbSNPSite(tracker)) {
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff);
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, pdbsnp[0], pdbsnp[1], pdbsnp[2], p2ndon, p2ndoff, keepQ0Bases);
|
||||
} else {
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff);
|
||||
G = new GenotypeLikelihoods(THREE_BASE_ERRORS, plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff, keepQ0Bases);
|
||||
}
|
||||
|
||||
G.filterQ0Bases(! keepQ0Bases); // Set the filtering / keeping flag
|
||||
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
|
||||
char base = read.getReadString().charAt(offset);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
int nBasesAdded = G.add(ref, base, qual);
|
||||
if ( nBasesAdded == 0 ) {
|
||||
nFilteredQ0Bases++;
|
||||
//System.out.printf("Filtering Q0 base from %s at %s %d: %c %c %d%n",
|
||||
// read.getReadName(), GenomeLocParser.createGenomeLoc(read), offset, ref, base, qual);
|
||||
}
|
||||
}
|
||||
|
||||
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
|
||||
|
||||
//if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) {
|
||||
// G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
//}
|
||||
|
||||
G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
|
||||
return G;
|
||||
}
|
||||
|
||||
|
|
@ -312,69 +253,38 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
return tracker.lookup("dbsnp", null) != null;
|
||||
}
|
||||
|
||||
double allele_frequency_prior = -1;
|
||||
char alt_allele;
|
||||
|
||||
/**
|
||||
* Accessor for PoolCaller to set the allele frequency prior for this sample.
|
||||
*
|
||||
* @param freq the allele frequency
|
||||
* @param alt the alternate allele
|
||||
*/
|
||||
public void setAlleleFrequencyPrior(double freq, char alt) {
|
||||
this.allele_frequency_prior = freq;
|
||||
this.alt_allele = alt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize values appropriately for the reduce step.
|
||||
*
|
||||
* @return an empty string
|
||||
*/
|
||||
public String reduceInit() {
|
||||
return "";
|
||||
public GenotypeWriter reduceInit() {
|
||||
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* If we've found a LOD >= 5 variant, output it to disk.
|
||||
*
|
||||
* @param alleleFreq an AlleleFrequencyEstimage object for the variant.
|
||||
* @param sum accumulator for the reduce.
|
||||
* @param call an GenotypeCall object for the variant.
|
||||
* @param sum accumulator for the reduce.
|
||||
*
|
||||
* @return an empty string
|
||||
*/
|
||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) {
|
||||
//System.out.printf("AlleleFreqEstimate %s %f %f %f%n", alleleFreq,alleleFreq.lodVsNextBest, alleleFreq.lodVsRef, LOD_THRESHOLD );
|
||||
if (alleleFreq != null && (GENOTYPE ? alleleFreq.lodVsNextBest : alleleFreq.lodVsRef) >= LOD_THRESHOLD) {
|
||||
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
|
||||
variantsOut.println(alleleFreq.asGeliString());
|
||||
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
|
||||
variantsOut.println(alleleFreq.asTabularString());
|
||||
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||
SAMSequenceRecord rec = GenomeLocParser.getContigInfo(alleleFreq.location.getContig());
|
||||
|
||||
double[] likelihoods = new double[10];
|
||||
for (int x = 0; x < alleleFreq.posteriors.length; x++) {
|
||||
likelihoods[x] = GLFRecord.LIKELIHOOD_SCALE_FACTOR * alleleFreq.genotypeLikelihoods.likelihoods[x];
|
||||
}
|
||||
|
||||
LikelihoodObject obj = new LikelihoodObject(likelihoods, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
|
||||
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||
this.mGenotypeWriter.addGenotypeCall(rec,(int)alleleFreq.location.getStart(),0.0f,alleleFreq.ref,alleleFreq.depth,obj);
|
||||
}
|
||||
public GenotypeWriter reduce(GenotypeCall call, GenotypeWriter sum) {
|
||||
if (call != null && call.isVariation()) {
|
||||
if ((GENOTYPE && call.getBestVrsNext().second.getScore() > LOD_THRESHOLD) ||
|
||||
(call.getBestVrsRef().second.getScore() > LOD_THRESHOLD))
|
||||
sum.addGenotypeCall(call);
|
||||
}
|
||||
|
||||
return "";
|
||||
return sum;
|
||||
}
|
||||
|
||||
/** Close the variant file. */
|
||||
public void onTraversalDone(String sum) {
|
||||
public void onTraversalDone(GenotypeWriter sum) {
|
||||
logger.info(String.format("SingleSampleGenotyper filtered %d Q0 bases", nFilteredQ0Bases));
|
||||
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||
mGenotypeWriter.close();
|
||||
} else {
|
||||
this.variantsOut.close();
|
||||
}
|
||||
sum.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,17 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodGFF;
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.ConfidenceScore;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
|
||||
import java.util.List;
|
||||
import java.io.PrintStream;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
|
||||
public class AlleleMetrics {
|
||||
private double LOD_cutoff = 5;
|
||||
|
|
@ -54,7 +56,7 @@ public class AlleleMetrics {
|
|||
this.LOD_cutoff = lodThresold;
|
||||
}
|
||||
|
||||
public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) {
|
||||
public void nextPosition(GenotypeCall call, RefMetaDataTracker tracker) {
|
||||
num_loci_total += 1;
|
||||
|
||||
boolean is_dbSNP_SNP = false;
|
||||
|
|
@ -78,10 +80,10 @@ public class AlleleMetrics {
|
|||
}
|
||||
}
|
||||
}
|
||||
Pair<Genotype, ConfidenceScore> result = call.getBestVrsRef();
|
||||
if (Math.abs(call.getBestVrsNext().second.getScore()) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
|
||||
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
|
||||
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
|
||||
if (call.isVariation() && result.second.getScore() >= LOD_cutoff)
|
||||
{
|
||||
// Confident variant.
|
||||
|
||||
|
|
@ -99,10 +101,12 @@ public class AlleleMetrics {
|
|||
String hapmap_genotype = hapmap_chip_genotype.getFeature();
|
||||
long refs=0, alts=0;
|
||||
double hapmap_q;
|
||||
|
||||
String str = call.getBestVrsRef().first.getBases();
|
||||
char alt = str.charAt(0);
|
||||
if (str.charAt(0) == call.getReferencebase()) alt = str.charAt(1);
|
||||
for (char c : hapmap_genotype.toCharArray()) {
|
||||
if (c == alleleFreq.ref) { refs++; }
|
||||
if (c == alleleFreq.alt) { alts++; }
|
||||
if (c == call.getReferencebase()) { refs++; }
|
||||
if (c == alt) { alts++; }
|
||||
}
|
||||
|
||||
if (refs+alts > 0) {
|
||||
|
|
@ -117,7 +121,7 @@ public class AlleleMetrics {
|
|||
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
|
||||
|
||||
//System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString());
|
||||
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
|
||||
if (call.getBestVrsNext().second.getScore() >= LOD_cutoff) {
|
||||
|
||||
/*
|
||||
System.out.printf("DBG %f %f %f %f\n",
|
||||
|
|
@ -129,22 +133,22 @@ public class AlleleMetrics {
|
|||
|
||||
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
|
||||
//if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
|
||||
if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) {
|
||||
/*if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) {
|
||||
hapmap_genotype_correct++;
|
||||
}else{
|
||||
hapmap_genotype_incorrect++;
|
||||
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
|
||||
//out.printf(" INCORRECT GENOTYPE");
|
||||
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
|
||||
}
|
||||
}*/
|
||||
}
|
||||
|
||||
if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) {
|
||||
if (result.second.getScore() >= LOD_cutoff || -1 * result.second.getScore() >= LOD_cutoff) {
|
||||
|
||||
// Now calculate ref / var performance - did we correctly classify the site as
|
||||
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
|
||||
boolean hapmap_var = hapmap_q != 0.0;
|
||||
boolean called_var = alleleFreq.qstar != 0.0;
|
||||
boolean called_var = call.isVariation();
|
||||
//if (hapmap_q != -1 && hapmap_var != called_var) {
|
||||
if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && hapmap_var != called_var) {
|
||||
hapmap_refvar_incorrect++;
|
||||
|
|
|
|||
|
|
@ -1,14 +1,15 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
public class GenotypeLikelihoods implements GenotypeGenerator {
|
||||
// precalculate these for performance (pow/log10 is expensive!)
|
||||
|
||||
/**
|
||||
|
|
@ -20,7 +21,7 @@ public class GenotypeLikelihoods {
|
|||
private static final double[] oneMinusData = new double[Byte.MAX_VALUE];
|
||||
private static final double[] oneHalfMinusDataArachne = new double[Byte.MAX_VALUE];
|
||||
private static final double[] oneHalfMinusData3Base = new double[Byte.MAX_VALUE];
|
||||
|
||||
private final boolean keepQ0Bases;
|
||||
private static final double log10Of1_3 = log10(1.0 / 3);
|
||||
private static final double log10Of2_3 = log10(2.0 / 3);
|
||||
|
||||
|
|
@ -62,7 +63,7 @@ public class GenotypeLikelihoods {
|
|||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
}
|
||||
public int coverage;
|
||||
public int coverage;
|
||||
|
||||
// The genotype priors;
|
||||
private double priorHomRef;
|
||||
|
|
@ -80,18 +81,19 @@ public class GenotypeLikelihoods {
|
|||
public GenotypeLikelihoods() {
|
||||
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
|
||||
keepQ0Bases = true;
|
||||
initialize(false, 1.0 - 1e-3, 1e-3, 1e-5, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar) {
|
||||
double[] p2ndon = {0.000, 0.302, 0.366, 0.142, 0.000, 0.548, 0.370, 0.000, 0.319, 0.000};
|
||||
double[] p2ndoff = {0.480, 0.769, 0.744, 0.538, 0.575, 0.727, 0.768, 0.589, 0.762, 0.505};
|
||||
|
||||
keepQ0Bases = true;
|
||||
initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff) {
|
||||
public GenotypeLikelihoods(boolean threeBaseErrors , double priorHomRef, double priorHet, double priorHomVar, double[] p2ndon, double[] p2ndoff, boolean keepQ0Bases) {
|
||||
this.keepQ0Bases = keepQ0Bases;
|
||||
initialize(threeBaseErrors, priorHomRef, priorHet, priorHomVar, p2ndon, p2ndoff);
|
||||
}
|
||||
|
||||
|
|
@ -276,62 +278,35 @@ public class GenotypeLikelihoods {
|
|||
return s;
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, double[] allele_likelihoods)
|
||||
{
|
||||
int k = 0;
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
for (int j = i; j < 4; j++)
|
||||
{
|
||||
if (i == j)
|
||||
{
|
||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
|
||||
}
|
||||
else
|
||||
{
|
||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
|
||||
}
|
||||
k++;
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
}
|
||||
public void ApplyPrior(char ref, double[] allele_likelihoods) {
|
||||
int k = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (int j = i; j < 4; j++) {
|
||||
if (i == j) {
|
||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]);
|
||||
} else {
|
||||
this.likelihoods[k] += Math.log10(allele_likelihoods[i]) + Math.log10(allele_likelihoods[j]) + Math.log10(2);
|
||||
}
|
||||
k++;
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
}
|
||||
|
||||
public void ApplyPrior(char ref, char alt, double p_alt) {
|
||||
public void ApplyPrior(char ref) {
|
||||
for (int i = 0; i < genotypes.length; i++) {
|
||||
if ((p_alt == -1) || (p_alt <= 1e-6)) {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(priorHomRef);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(priorHomVar);
|
||||
} else {
|
||||
// het
|
||||
likelihoods[i] += Math.log10(priorHet);
|
||||
}
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += Math.log10(priorHomRef);
|
||||
} else if ((genotypes[i].charAt(0) != ref) && (genotypes[i].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += Math.log10(priorHomVar);
|
||||
} else {
|
||||
if ((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
likelihoods[i] += 2.0 * Math.log10(1.0 - p_alt);
|
||||
} else if ((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == alt)) {
|
||||
// hom-nonref
|
||||
likelihoods[i] += 2.0 * Math.log10(p_alt);
|
||||
} else if (((genotypes[i].charAt(0) == alt) && (genotypes[i].charAt(1) == ref)) ||
|
||||
((genotypes[i].charAt(0) == ref) && (genotypes[i].charAt(1) == alt))) {
|
||||
// het
|
||||
likelihoods[i] += Math.log10((1.0 - p_alt) * p_alt * 2.0);
|
||||
} else {
|
||||
// something else (noise!)
|
||||
likelihoods[i] += Math.log10(1e-5);
|
||||
}
|
||||
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
// het
|
||||
likelihoods[i] += Math.log10(priorHet);
|
||||
}
|
||||
if (Double.isInfinite(likelihoods[i])) {
|
||||
likelihoods[i] = -1000;
|
||||
}
|
||||
}
|
||||
this.sort();
|
||||
|
|
@ -410,51 +385,66 @@ public class GenotypeLikelihoods {
|
|||
return this.sorted_likelihoods[0];
|
||||
}
|
||||
|
||||
public double RefPosterior(char ref)
|
||||
{
|
||||
this.LodVsRef(ref);
|
||||
return this.ref_likelihood;
|
||||
}
|
||||
|
||||
public AlleleFrequencyEstimate toAlleleFrequencyEstimate(GenomeLoc location, char ref, int depth, String bases, double[] posteriors, String sample_name) {
|
||||
this.sort();
|
||||
double qhat = Double.NaN;
|
||||
double qstar = Double.NaN;
|
||||
char alt = 'N';
|
||||
|
||||
if ((sorted_genotypes[0].charAt(0) == ref) && (sorted_genotypes[0].charAt(1) == ref)) {
|
||||
// hom-ref
|
||||
qhat = 0.0;
|
||||
qstar = 0.0;
|
||||
alt = 'N';
|
||||
} else if ((sorted_genotypes[0].charAt(0) != ref) && (sorted_genotypes[0].charAt(1) != ref)) {
|
||||
// hom-nonref
|
||||
qhat = 1.0;
|
||||
qstar = 1.0;
|
||||
alt = sorted_genotypes[0].charAt(0);
|
||||
} else {
|
||||
// het
|
||||
qhat = 0.5;
|
||||
qstar = 0.5;
|
||||
|
||||
if (sorted_genotypes[0].charAt(0) != ref) {
|
||||
alt = sorted_genotypes[0].charAt(0);
|
||||
}
|
||||
if (sorted_genotypes[0].charAt(1) != ref) {
|
||||
alt = sorted_genotypes[0].charAt(1);
|
||||
}
|
||||
}
|
||||
|
||||
this.LodVsRef(ref); //HACK
|
||||
//System.out.printf("DBG: %f %f\n", sorted_likelihoods[0], ref_likelihood);
|
||||
|
||||
AlleleFrequencyEstimate AFE = new AlleleFrequencyEstimate(location, ref, alt, 2, qhat, qstar, this.LodVsRef(ref), this.LodVsNextBest(), sorted_likelihoods[0], ref_likelihood, depth, bases, (double[][]) null, this.likelihoods, sample_name);
|
||||
AFE.genotypeLikelihoods = this;
|
||||
return AFE;
|
||||
public double RefPosterior(char ref) {
|
||||
this.LodVsRef(ref);
|
||||
return this.ref_likelihood;
|
||||
}
|
||||
|
||||
private IndelLikelihood indel_likelihood;
|
||||
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
|
||||
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
|
||||
private IndelLikelihood indel_likelihood;
|
||||
|
||||
public void addIndelLikelihood(IndelLikelihood indel_likelihood) {
|
||||
this.indel_likelihood = indel_likelihood;
|
||||
}
|
||||
|
||||
public IndelLikelihood getIndelLikelihood() {
|
||||
return this.indel_likelihood;
|
||||
}
|
||||
|
||||
/**
|
||||
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
||||
*
|
||||
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
|
||||
* @param ref the reference base
|
||||
* @param pileup a pileup of the reads, containing the reads and their offsets
|
||||
*
|
||||
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||
*/
|
||||
@Override
|
||||
public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup) {
|
||||
//filterQ0Bases(!keepQ0Bases); // Set the filtering / keeping flag
|
||||
|
||||
|
||||
// for calculating the rms of the mapping qualities
|
||||
double squared = 0.0;
|
||||
for (int i = 0; i < pileup.getReads().size(); i++) {
|
||||
SAMRecord read = pileup.getReads().get(i);
|
||||
squared += read.getMappingQuality() * read.getMappingQuality();
|
||||
int offset = pileup.getOffsets().get(i);
|
||||
char base = read.getReadString().charAt(offset);
|
||||
byte qual = read.getBaseQualities()[offset];
|
||||
add(ref, base, qual);
|
||||
}
|
||||
// save off the likelihoods
|
||||
if (likelihoods == null || likelihoods.length == 0) return null;
|
||||
|
||||
double lklihoods[] = new double[likelihoods.length];
|
||||
|
||||
System.arraycopy(likelihoods, 0, lklihoods, 0, likelihoods.length);
|
||||
|
||||
|
||||
ApplyPrior(ref);
|
||||
|
||||
applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||
|
||||
// lets setup the locus
|
||||
GenotypeLocus locus = new GenotypeLocusImpl(pileup.getLocation(), pileup.getReads().size(),Math.sqrt(squared/pileup.getReads().size()));
|
||||
for (int x = 0; x < this.likelihoods.length; x++) {
|
||||
try {
|
||||
locus.addGenotype(new Genotype(this.genotypes[x],lklihoods[x],this.likelihoods[x]));
|
||||
} catch (InvalidGenotypeException e) {
|
||||
throw new StingException("Invalid Genotype value",e);
|
||||
}
|
||||
}
|
||||
return locus;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class ConfidenceScore
|
||||
* <p/>
|
||||
* this class represents the confidence in a genotype, and the method we used to obtain it
|
||||
*/
|
||||
public class ConfidenceScore implements Comparable<ConfidenceScore> {
|
||||
|
||||
private static final Double EPSILON = 1.0e-15;
|
||||
|
||||
public enum SCORE_METHOD {
|
||||
BEST_NEXT, BEST_REF, OTHER;
|
||||
}
|
||||
|
||||
private Double mScore;
|
||||
private SCORE_METHOD mMethod;
|
||||
|
||||
public ConfidenceScore(double score, SCORE_METHOD method) {
|
||||
this.mScore = score;
|
||||
this.mMethod = method;
|
||||
}
|
||||
|
||||
/**
|
||||
* generate a confidence score, given the two likelihoods, and the method used
|
||||
*
|
||||
* @param likelihoodOne the first likelihood
|
||||
* @param likelihoodTwo the second likelihood
|
||||
* @param method the method used to determine the likelihood
|
||||
*/
|
||||
public ConfidenceScore(double likelihoodOne, double likelihoodTwo, SCORE_METHOD method) {
|
||||
this.mScore = likelihoodOne / likelihoodTwo;
|
||||
this.mMethod = method;
|
||||
}
|
||||
|
||||
/**
|
||||
* compare this ConfidenceScore to another, throwing an exception if they're not the same scoring method
|
||||
* @param o the other confidence score if
|
||||
* @return 0 if equal
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(ConfidenceScore o) {
|
||||
if (o.mMethod != this.mMethod) {
|
||||
throw new UnsupportedOperationException("Attemped to compare Confidence scores with different methods");
|
||||
}
|
||||
double diff = mScore - o.mScore;
|
||||
if (Math.abs(diff) < (EPSILON * Math.abs(mScore)))
|
||||
return 0;
|
||||
else if (diff < 0)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
public Double getScore() {
|
||||
return mScore;
|
||||
}
|
||||
|
||||
public SCORE_METHOD getMethod() {
|
||||
return mMethod;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeLikelihood
|
||||
* <p/>
|
||||
* This class encompasses all the information that is associated with a genotype
|
||||
* and it's likelihood, mainly:
|
||||
* <p/>
|
||||
* Likelihood value
|
||||
*/
|
||||
public class Genotype {
|
||||
private double mLikelihood = 0.0;
|
||||
private double mPosteriorProb = 0.0;
|
||||
private String mBases = "";
|
||||
private int mPloidy = 2; // assume diploid
|
||||
|
||||
/**
|
||||
* construct a genotypeLikelihood, given the bases, the posterior, and the likelihood
|
||||
*
|
||||
* @param bases the bases that make up this genotype
|
||||
* @param posterior the posterior probability of this genotype
|
||||
* @param likelihood the likelihood of this genotype
|
||||
* @param ploidy the ploidy of this genotype
|
||||
*/
|
||||
public Genotype(String bases, double posterior, double likelihood, int ploidy) {
|
||||
this.mPloidy = ploidy;
|
||||
if (bases.length() != ploidy) {
|
||||
throw new IllegalArgumentException("The number of bases should match the ploidy");
|
||||
}
|
||||
this.mLikelihood = likelihood;
|
||||
this.mBases = bases;
|
||||
this.mPosteriorProb = posterior;
|
||||
}
|
||||
|
||||
/**
|
||||
* construct a genotypeLikelihood, given the bases, the posterior, and the likelihood
|
||||
*
|
||||
* @param bases the bases that make up this genotype
|
||||
* @param posterior the posterior probability of this genotype
|
||||
* @param likelihood the likelihood of this genotype
|
||||
*/
|
||||
public Genotype(String bases, double posterior, double likelihood) {
|
||||
if (bases.length() != mPloidy) {
|
||||
throw new IllegalArgumentException("The number of bases should match the ploidy");
|
||||
}
|
||||
this.mLikelihood = likelihood;
|
||||
this.mBases = bases;
|
||||
this.mPosteriorProb = posterior;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the likelihood value
|
||||
*
|
||||
* @return a double, representing the likelihood
|
||||
*/
|
||||
public double getLikelihood() {
|
||||
return mLikelihood;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the posterior value
|
||||
*
|
||||
* @return a double, representing the posterior
|
||||
*/
|
||||
public double getPosteriorProb() {
|
||||
return mPosteriorProb;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the bases that represent this
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public String getBases() {
|
||||
return mBases;
|
||||
}
|
||||
|
||||
public int getPloidy() {
|
||||
return mPloidy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if both observed alleles are the same (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isHom() {
|
||||
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
|
||||
char first = mBases.charAt(0);
|
||||
for (char c: mBases.toCharArray()) {
|
||||
if (c != first) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if observed alleles differ (regardless of whether they are ref or alt)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isHet() {
|
||||
if (mBases.length() < 1) throw new UnsupportedOperationException("isHom requires at least one stored base");
|
||||
char first = mBases.charAt(0);
|
||||
for (char c: mBases.toCharArray()) {
|
||||
if (c != first) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,52 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface GenotypeCall
|
||||
* <p/>
|
||||
* Genotype call interface, for indicating that a genotype is
|
||||
* also a genotype call.
|
||||
*/
|
||||
public interface GenotypeCall extends GenotypeLocus{
|
||||
/**
|
||||
* get the confidence
|
||||
*
|
||||
* @return a ConfidenceScore representing the LOD score that this genotype was called with
|
||||
*/
|
||||
public ConfidenceScore getConfidence();
|
||||
|
||||
/**
|
||||
* gets the reference base
|
||||
*
|
||||
* @return the reference base we represent
|
||||
*/
|
||||
public char getReferencebase();
|
||||
|
||||
/**
|
||||
* get the best vrs the next best genotype LOD score
|
||||
* @return the genotype, and a LOD for best - next
|
||||
*/
|
||||
public Pair<Genotype,ConfidenceScore> getBestVrsNext();
|
||||
|
||||
/**
|
||||
* get the best vrs the reference allele.
|
||||
* @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked
|
||||
* with is variation
|
||||
*/
|
||||
public Pair<Genotype,ConfidenceScore> getBestVrsRef();
|
||||
|
||||
/**
|
||||
* check to see if this call is a variant, i.e. not homozygous reference
|
||||
* @return true if it's not hom ref, false otherwise
|
||||
*/
|
||||
public boolean isVariation();
|
||||
|
||||
/**
|
||||
* return genotype locus, with our data
|
||||
*/
|
||||
public GenotypeLocus toGenotypeLocus();
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,225 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.Pair;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeCallImpl
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GenotypeCallImpl implements GenotypeCall {
|
||||
|
||||
// our stored genotype locus
|
||||
private final GenotypeLocus mLocus;
|
||||
private final char mRefBase;
|
||||
private final ConfidenceScore mScore;
|
||||
|
||||
/**
|
||||
* generate a GenotypeCall object with the specified locus info and reference base
|
||||
*
|
||||
* @param mLocus the locus
|
||||
* @param mRefBase the reference base to use
|
||||
*/
|
||||
public GenotypeCallImpl(GenotypeLocus mLocus, char mRefBase, ConfidenceScore mScore) {
|
||||
if (mLocus.getGenotypes().size() < 1) throw new StingException("Genotype Locus is empty");
|
||||
this.mLocus = mLocus;
|
||||
this.mRefBase = String.valueOf(mRefBase).toUpperCase().charAt(0);
|
||||
this.mScore = mScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the confidence
|
||||
*
|
||||
* @return a ConfidenceScore representing the LOD score that this genotype was called with
|
||||
*/
|
||||
@Override
|
||||
public ConfidenceScore getConfidence() {
|
||||
return mScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the reference base
|
||||
*
|
||||
* @return the reference base we represent
|
||||
*/
|
||||
@Override
|
||||
public char getReferencebase() {
|
||||
return mRefBase;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the best vrs the next best genotype LOD score
|
||||
*
|
||||
* @return the genotype, and a LOD for best - next
|
||||
*/
|
||||
@Override
|
||||
public Pair<Genotype, ConfidenceScore> getBestVrsNext() {
|
||||
List<Genotype> genos = this.mLocus.getGenotypes();
|
||||
if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes");
|
||||
return new Pair<Genotype, ConfidenceScore>(genos.get(0),
|
||||
new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - genos.get(1).getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT));
|
||||
}
|
||||
|
||||
/**
|
||||
* get the best vrs the reference allele.
|
||||
*
|
||||
* @return the genotype, and a LOD for best - ref. The best may be ref, unless you've checked
|
||||
* with is variation
|
||||
*/
|
||||
@Override
|
||||
public Pair<Genotype, ConfidenceScore> getBestVrsRef() {
|
||||
List<Genotype> genos = this.mLocus.getGenotypes();
|
||||
|
||||
// find the reference allele
|
||||
String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase();
|
||||
Genotype refGenotype = findRefGenotype(ref, genos);
|
||||
if (mLocus.getGenotypes().size() < 2) throw new StingException("Genotype Locus does not contain two genotypes");
|
||||
return new Pair<Genotype, ConfidenceScore>(genos.get(0),
|
||||
new ConfidenceScore(Math.abs(genos.get(0).getLikelihood() - refGenotype.getLikelihood()), ConfidenceScore.SCORE_METHOD.BEST_NEXT));
|
||||
}
|
||||
|
||||
/**
|
||||
* get the reference genotype object
|
||||
*
|
||||
* @param ref the reference as a ploidy count homozygous string
|
||||
* @param genos the genotype list
|
||||
*
|
||||
* @return a genotype for the
|
||||
*/
|
||||
private static Genotype findRefGenotype(String ref, List<Genotype> genos) {
|
||||
Genotype refGenotype = null;
|
||||
for (Genotype g : genos) {
|
||||
if (g.getBases().equals(ref)) refGenotype = g;
|
||||
}
|
||||
if (refGenotype == null) {
|
||||
for (Genotype g : genos) {
|
||||
System.err.println(g.getBases());
|
||||
}
|
||||
throw new StingException("Unable to find the reference genotype + " + ref + " size of genotype list = " + genos.size());
|
||||
}
|
||||
return refGenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* check to see if this call is a variant, i.e. not homozygous reference
|
||||
*
|
||||
* @return true if it's not hom ref, false otherwise
|
||||
*/
|
||||
@Override
|
||||
public boolean isVariation() {
|
||||
List<Genotype> genos = this.mLocus.getGenotypes();
|
||||
String ref = Utils.dupString(this.mRefBase, mLocus.getPloidy()).toUpperCase();
|
||||
return !(genos.get(0).getBases().equals(ref));
|
||||
}
|
||||
|
||||
/** return genotype locus, with our data */
|
||||
@Override
|
||||
public GenotypeLocus toGenotypeLocus() {
|
||||
return mLocus;
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocus.getLocation();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy at this locus
|
||||
*
|
||||
* @return an integer representing the genotype ploidy at this location
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return mLocus.getPloidy();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their likelihoods (the best
|
||||
* to the worst likelihoods)
|
||||
*
|
||||
* @return a list of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
return mLocus.getGenotypes();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes and their posteriors
|
||||
*
|
||||
* @return a list of the poseriors
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getPosteriors() {
|
||||
return mLocus.getPosteriors();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes() {
|
||||
return mLocus.getLexigraphicallySortedGenotypes();
|
||||
}
|
||||
|
||||
/**
|
||||
* get the read depth at this position
|
||||
*
|
||||
* @return the read depth, -1 if it is unknown
|
||||
*/
|
||||
@Override
|
||||
public int getReadDepth() {
|
||||
return mLocus.getReadDepth();
|
||||
}
|
||||
|
||||
/**
|
||||
* add a genotype to the collection
|
||||
*
|
||||
* @param genotype
|
||||
*
|
||||
* @throws InvalidGenotypeException
|
||||
*/
|
||||
@Override
|
||||
public void addGenotype(Genotype genotype) throws InvalidGenotypeException {
|
||||
mLocus.addGenotype(genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the root mean square (RMS) of the mapping qualities
|
||||
*
|
||||
* @return the RMS, or a value < 0 if it's not available
|
||||
*/
|
||||
@Override
|
||||
public double getRMSMappingQuals() {
|
||||
return mLocus.getRMSMappingQuals();
|
||||
}
|
||||
|
||||
/**
|
||||
* create a variant, given the reference, and a lod score
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param score the threshold to use to determine if it's a variant or not
|
||||
*
|
||||
* @return a variant object, or null if no genotypes meet the criteria
|
||||
*/
|
||||
@Override
|
||||
public Variant toGenotypeCall(char refBase, ConfidenceScore score) {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GenotypeFactory
|
||||
*
|
||||
* Create genotypes, given certain pileup information
|
||||
*/
|
||||
public interface GenotypeGenerator {
|
||||
|
||||
/**
|
||||
* given all the data associated with a locus, make a genotypeLocus object containing the likelihoods and posterior probs
|
||||
*
|
||||
* @param tracker contains the reference meta data for this location, which may contain relevent information like dpSNP or hapmap information
|
||||
* @param ref the reference base
|
||||
* @param pileup a pileup of the reads, containing the reads and their offsets
|
||||
* @return a GenotypeLocus, containing each of the genotypes and their associated likelihood and posterior prob values
|
||||
*/
|
||||
public GenotypeLocus callGenotypes(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup);
|
||||
}
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface Genotype
|
||||
* <p/>
|
||||
* This interface represents the collection of genotypes at a specific locus
|
||||
*/
|
||||
public interface GenotypeLocus {
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
public GenomeLoc getLocation();
|
||||
|
||||
/**
|
||||
* get the ploidy at this locus
|
||||
*
|
||||
* @return an integer representing the genotype ploidy at this location
|
||||
*/
|
||||
public int getPloidy();
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their likelihoods (the best
|
||||
* to the worst likelihoods)
|
||||
*
|
||||
* @return a list of the genotypes, sorted by likelihoods
|
||||
*/
|
||||
public List<Genotype> getGenotypes();
|
||||
|
||||
/**
|
||||
* get the genotypes and their posteriors
|
||||
*
|
||||
* @return a list of the genotypes, sorted by poseriors
|
||||
*/
|
||||
public List<Genotype> getPosteriors();
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes();
|
||||
|
||||
|
||||
/**
|
||||
* get the read depth at this position
|
||||
*
|
||||
* @return the read depth, -1 if it is unknown
|
||||
*/
|
||||
public int getReadDepth();
|
||||
|
||||
/**
|
||||
* add a genotype to the collection
|
||||
*
|
||||
* @param genotype
|
||||
*
|
||||
* @throws InvalidGenotypeException
|
||||
*/
|
||||
public void addGenotype(Genotype genotype) throws InvalidGenotypeException;
|
||||
|
||||
/**
|
||||
* get the root mean square (RMS) of the mapping qualities
|
||||
*
|
||||
* @return the RMS, or a value < 0 if it's not available
|
||||
*/
|
||||
public double getRMSMappingQuals();
|
||||
|
||||
/**
|
||||
* create a variant, given the reference, and a lod score
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param score the threshold to use to determine if it's a variant or not
|
||||
*
|
||||
* @return a variant object, or null if no genotypes meet the criteria
|
||||
*/
|
||||
public Variant toGenotypeCall(char refBase, ConfidenceScore score);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the following are helper Comparator classes for the above sort orders, that may be useful
|
||||
* for anyone implementing the GenotypeLocus interface
|
||||
*/
|
||||
class PosteriorComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
double diff = genotype.getPosteriorProb() - genotype1.getPosteriorProb();
|
||||
if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getPosteriorProb())))
|
||||
return 0;
|
||||
else if (diff < 0)
|
||||
return 1;
|
||||
else
|
||||
return -1; // TODO: BACKWARD NOW
|
||||
}
|
||||
}
|
||||
|
||||
class LexigraphicalComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
return genotype.getBases().compareTo(genotype1.getBases());
|
||||
}
|
||||
}
|
||||
|
||||
class LikelihoodComparator implements Comparator<Genotype> {
|
||||
private final Double EPSILON = 1.0e-15;
|
||||
|
||||
@Override
|
||||
public int compare(Genotype genotype, Genotype genotype1) {
|
||||
double diff = genotype.getLikelihood() - genotype1.getLikelihood();
|
||||
if (Math.abs(diff) < (EPSILON * Math.abs(genotype.getLikelihood())))
|
||||
return 0;
|
||||
else if (diff < 0)
|
||||
return 1; // TODO: BACKWARD NOW
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GenotypeBucket
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GenotypeLocusImpl implements GenotypeLocus {
|
||||
|
||||
private final List<Genotype> mGenotypes = new ArrayList<Genotype>();
|
||||
private GenomeLoc mLocation = null;
|
||||
private int mReadDepth = -1;
|
||||
private double mRMSMappingQual = -1;
|
||||
|
||||
public GenotypeLocusImpl(GenomeLoc location, int readDepth, double rmsMappingQual) {
|
||||
this.mLocation = location;
|
||||
mReadDepth = readDepth;
|
||||
mRMSMappingQual = rmsMappingQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* Location of this genotype on the reference (on the forward strand). If the allele is insertion/deletion, the first inserted/deleted base
|
||||
* is located right <i>after</i> the specified location
|
||||
*
|
||||
* @return position on the genome wrapped in GenomeLoc object
|
||||
*/
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return mLocation;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the ploidy at this locus
|
||||
*
|
||||
* @return an integer representing the genotype ploidy at this location
|
||||
*/
|
||||
@Override
|
||||
public int getPloidy() {
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes, sorted in asscending order by their likelihoods (the best
|
||||
* to the worst likelihoods)
|
||||
*
|
||||
* @return a list of the likelihoods
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getGenotypes() {
|
||||
Collections.sort(this.mGenotypes, new LikelihoodComparator());
|
||||
return this.mGenotypes;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* get the genotypes and their posteriors
|
||||
*
|
||||
* @return a list of the poseriors
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getPosteriors() {
|
||||
Collections.sort(this.mGenotypes, new PosteriorComparator());
|
||||
return this.mGenotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the genotypes sorted lexigraphically
|
||||
*
|
||||
* @return a list of the genotypes sorted lexi
|
||||
*/
|
||||
@Override
|
||||
public List<Genotype> getLexigraphicallySortedGenotypes() {
|
||||
Collections.sort(this.mGenotypes, new LexigraphicalComparator());
|
||||
return this.mGenotypes;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the read depth at this position
|
||||
*
|
||||
* @return the read depth, -1 if it is unknown
|
||||
*/
|
||||
@Override
|
||||
public int getReadDepth() {
|
||||
return mReadDepth;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* add a genotype to the collection
|
||||
*
|
||||
* @param genotype
|
||||
*
|
||||
* @throws InvalidGenotypeException
|
||||
*/
|
||||
@Override
|
||||
public void addGenotype(Genotype genotype) throws InvalidGenotypeException {
|
||||
this.mGenotypes.add(genotype);
|
||||
}
|
||||
|
||||
/**
|
||||
* get the root mean square (RMS) of the mapping qualities
|
||||
*
|
||||
* @return the RMS, or a value < 0 if it's not available
|
||||
*/
|
||||
@Override
|
||||
public double getRMSMappingQuals() {
|
||||
return mRMSMappingQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a variant, given the reference, and a lod score
|
||||
*
|
||||
* @param refBase the reference base
|
||||
* @param score the threshold to use to determine if it's a variant or not
|
||||
*
|
||||
* @return a variant object, or null if no genotypes meet the criteria
|
||||
*/
|
||||
@Override
|
||||
public Variant toGenotypeCall(char refBase, ConfidenceScore score) {
|
||||
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,5 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -37,50 +35,17 @@ import net.sf.samtools.SAMSequenceRecord;
|
|||
public interface GenotypeWriter {
|
||||
|
||||
/**
|
||||
* add a single point genotype call to the
|
||||
*
|
||||
* @param contig the contig you're calling in
|
||||
* @param position the position on the contig
|
||||
* @param referenceBase the reference base
|
||||
* @param readDepth the read depth at the specified position
|
||||
* @param likelihoods the likelihoods of each of the possible alleles
|
||||
* Add a genotype, given a genotype locus
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||
int position,
|
||||
float rmsMapQuals,
|
||||
char referenceBase,
|
||||
int readDepth,
|
||||
LikelihoodObject likelihoods);
|
||||
|
||||
/**
|
||||
* add a variable length call to the genotyper
|
||||
*
|
||||
* @param contig the contig you're calling in
|
||||
* @param position the position on the genome
|
||||
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||
* @param readDepth the read depth
|
||||
* @param refBase the reference base
|
||||
* @param firstHomZyg the first homozygous indel
|
||||
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||
* @param hetLikelihood the heterozygous likelihood
|
||||
*/
|
||||
public void addVariableLengthCall(SAMSequenceRecord contig,
|
||||
int position,
|
||||
float rmsMapQuals,
|
||||
int readDepth,
|
||||
char refBase,
|
||||
IndelLikelihood firstHomZyg,
|
||||
IndelLikelihood secondHomZyg,
|
||||
byte hetLikelihood);
|
||||
public void addGenotypeCall(GenotypeCall locus);
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position
|
||||
* @param readDepth
|
||||
* @param position the position to add the no call at
|
||||
*/
|
||||
public void addNoCall(int position,
|
||||
int readDepth);
|
||||
public void addNoCall(int position);
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
public void close();
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ package org.broadinstitute.sting.utils.genotype;
|
|||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliAdapter;
|
||||
import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -17,7 +19,7 @@ import java.io.File;
|
|||
public class GenotypeWriterFactory {
|
||||
/** available genotype writers */
|
||||
public enum GENOTYPE_FORMAT {
|
||||
GELI, GLF, GFF, TABULAR;
|
||||
GELI, GLF, GFF, TABULAR, GELI_BINARY;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -32,6 +34,8 @@ public class GenotypeWriterFactory {
|
|||
case GLF:
|
||||
return new GLFWriter(header.toString(), destination);
|
||||
case GELI:
|
||||
return new GeliTextWriter(destination);
|
||||
case GELI_BINARY:
|
||||
return new GeliAdapter(destination, header);
|
||||
default:
|
||||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,20 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GenotypeException
|
||||
*
|
||||
* This exception is thrown when a genotype call is passed in that cannot be processed, i.e. invalid.
|
||||
*/
|
||||
public class InvalidGenotypeException extends Exception {
|
||||
public InvalidGenotypeException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public InvalidGenotypeException(String message, Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -37,55 +36,33 @@ public class TabularLFWriter implements GenotypeWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* add a single point genotype call to the
|
||||
* Add a genotype, given a genotype locus
|
||||
*
|
||||
* @param contig the contig you're calling in
|
||||
* @param position the position on the contig
|
||||
* @param referenceBase the reference base
|
||||
* @param readDepth the read depth at the specified position
|
||||
* @param likelihoods the likelihoods of each of the possible alleles
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) {
|
||||
/**return String.format("%s %s %c %c %s %f %f %f %f %d %s",
|
||||
location,
|
||||
contig.getSpecies(),
|
||||
ref,
|
||||
alt,
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
/*outStream.println(String.format("%s %s %c %s %s %f %f %f %f %d %s",
|
||||
locus.getLocation(),
|
||||
"NOT OUTPUTED",
|
||||
locus.getReferencebase(),
|
||||
locus.getGenotypes().get(1).getBases().,
|
||||
genotype(),
|
||||
qhat,
|
||||
qstar,
|
||||
lodVsRef,
|
||||
lodVsNextBest,
|
||||
depth,
|
||||
bases);*/
|
||||
}
|
||||
|
||||
/**
|
||||
* add a variable length call to the genotyper
|
||||
*
|
||||
* @param contig the contig you're calling in
|
||||
* @param position the position on the genome
|
||||
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||
* @param readDepth the read depth
|
||||
* @param refBase the reference base
|
||||
* @param firstHomZyg the first homozygous indel
|
||||
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||
* @param hetLikelihood the heterozygous likelihood
|
||||
*/
|
||||
@Override
|
||||
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
||||
throw new StingException("TabularLFWriter doesn't support variable length calls");
|
||||
bases); */
|
||||
}
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position
|
||||
* @param readDepth
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position, int readDepth) {
|
||||
public void addNoCall(int position) {
|
||||
throw new StingException("TabularLFWriter doesn't support no-calls");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface Variant
|
||||
* <p/>
|
||||
* This class represents a variant
|
||||
*/
|
||||
public interface Variant {
|
||||
}
|
||||
|
|
@ -1,8 +1,12 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
|
@ -65,7 +69,6 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
* @param readDepth the read depth at the specified position
|
||||
* @param likelihoods the likelihoods of each of the possible alleles
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||
int position,
|
||||
float rmsMapQuals,
|
||||
|
|
@ -87,19 +90,27 @@ public class GeliAdapter implements GenotypeWriter {
|
|||
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||
* @param hetLikelihood the heterozygous likelihood
|
||||
*/
|
||||
@Override
|
||||
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
||||
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
*
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position
|
||||
* @param readDepth
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position, int readDepth) {
|
||||
public void addNoCall(int position) {
|
||||
throw new UnsupportedOperationException("Geli format does not support no-calls");
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,93 @@
|
|||
package org.broadinstitute.sting.utils.genotype.geli;
|
||||
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.Genotype;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeCall;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GeliTextWriter
|
||||
*
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GeliTextWriter implements GenotypeWriter {
|
||||
// where we write to
|
||||
PrintWriter mWriter;
|
||||
|
||||
/**
|
||||
* create a geli text writer
|
||||
* @param file
|
||||
*/
|
||||
public GeliTextWriter(File file) {
|
||||
try {
|
||||
mWriter = new PrintWriter(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new StingException("Unable to open file " + file.toURI());
|
||||
}
|
||||
mWriter.println("#Sequence Position ReferenceBase NumberOfReads MaxMappingQuality BestGenotype BtrLod BtnbLod dbSNP AA AC AG AT CC CG CT GG GT TT");
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
*
|
||||
* @param locus the locus to add
|
||||
*/
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
if (locus.getPosteriors().size() != 10) throw new IllegalArgumentException("Geli text only supports SNP calls, with a diploid organism (i.e. posterior array size of 10)");
|
||||
|
||||
|
||||
// this is to perserve the format string that we used to use
|
||||
double[] likelihoods = new double[10];
|
||||
int index = 0;
|
||||
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
|
||||
for (Genotype G: lt) {
|
||||
likelihoods[index] = G.getLikelihood();
|
||||
index++;
|
||||
}
|
||||
|
||||
mWriter.println( String.format("%s %16d %c %8d %d %s %.6f %.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f %6.6f",
|
||||
locus.getLocation().getContig(),
|
||||
locus.getLocation().getStart(),
|
||||
locus.getReferencebase(),
|
||||
locus.getReadDepth(),
|
||||
-1,
|
||||
locus.getGenotypes().get(0).getBases(),
|
||||
locus.getBestVrsRef().second.getScore(),
|
||||
locus.getBestVrsNext().second.getScore(),
|
||||
likelihoods[0],
|
||||
likelihoods[1],
|
||||
likelihoods[2],
|
||||
likelihoods[3],
|
||||
likelihoods[4],
|
||||
likelihoods[5],
|
||||
likelihoods[6],
|
||||
likelihoods[7],
|
||||
likelihoods[8],
|
||||
likelihoods[9]));
|
||||
}
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position the position to add the no call at
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position) {
|
||||
throw new UnsupportedOperationException("Geli text format doesn't support a no-call call.");
|
||||
}
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
@Override
|
||||
public void close() {
|
||||
mWriter.close();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,66 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype.gff;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Class GFFWriter
|
||||
* <p/>
|
||||
* A descriptions should go here. Blame aaron if it's missing.
|
||||
*/
|
||||
public class GFFWriter implements GenotypeWriter {
|
||||
|
||||
/**
|
||||
* add a single point genotype call to the file
|
||||
*
|
||||
* @param contig the contig you're calling in
|
||||
* @param position the position on the contig
|
||||
* @param referenceBase the reference base
|
||||
* @param readDepth the read depth at the specified position
|
||||
* @param likelihoods the likelihoods of each of the possible alleles
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) {
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* add a variable length call to the genotype file
|
||||
*
|
||||
* @param contig the contig you're calling in
|
||||
* @param position the position on the genome
|
||||
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||
* @param readDepth the read depth
|
||||
* @param refBase the reference base
|
||||
* @param firstHomZyg the first homozygous indel
|
||||
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||
* @param hetLikelihood the heterozygous likelihood
|
||||
*/
|
||||
@Override
|
||||
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
/**
|
||||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position the position
|
||||
* @param readDepth the read depth
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position, int readDepth) {
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
|
||||
/** finish writing, closing any open files. */
|
||||
@Override
|
||||
public void close() {
|
||||
//To change body of implemented methods use File | Settings | File Templates.
|
||||
}
|
||||
}
|
||||
|
|
@ -1,14 +1,14 @@
|
|||
package org.broadinstitute.sting.utils.genotype.glf;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import net.sf.samtools.util.BinaryCodec;
|
||||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.genotype.*;
|
||||
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
|
|
@ -80,7 +80,6 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* @param rmsMapQ the root mean square of the mapping quality
|
||||
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||
int genomicLoc,
|
||||
float rmsMapQ,
|
||||
|
|
@ -100,6 +99,27 @@ public class GLFWriter implements GenotypeWriter {
|
|||
call.write(this.outputBinaryCodec);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a genotype, given a genotype locus
|
||||
*
|
||||
* @param locus
|
||||
*/
|
||||
@Override
|
||||
public void addGenotypeCall(GenotypeCall locus) {
|
||||
//TODO: CODEWORD
|
||||
// this is to perserve the format string that we used to use
|
||||
double[] posteriors = new double[10];
|
||||
int index = 0;
|
||||
List<Genotype> lt = locus.getLexigraphicallySortedGenotypes();
|
||||
for (Genotype G: lt) {
|
||||
posteriors[index] = G.getLikelihood();
|
||||
index++;
|
||||
}
|
||||
|
||||
LikelihoodObject obj = new LikelihoodObject(posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
this.addGenotypeCall(GenomeLocParser.getContigInfo(locus.getLocation().getContig()),(int)locus.getLocation().getStart(),(float)locus.getRMSMappingQuals(),locus.getReferencebase(),locus.getReadDepth(),obj);
|
||||
}
|
||||
|
||||
/**
|
||||
* add a variable length (indel, deletion, etc) to the genotype writer
|
||||
*
|
||||
|
|
@ -112,7 +132,6 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* @param secondHomZyg the second homozygous call
|
||||
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
|
||||
*/
|
||||
@Override
|
||||
public void addVariableLengthCall(SAMSequenceRecord contig,
|
||||
int genomicLoc,
|
||||
float rmsMapQ,
|
||||
|
|
@ -145,10 +164,10 @@ public class GLFWriter implements GenotypeWriter {
|
|||
* add a no call to the genotype file, if supported.
|
||||
*
|
||||
* @param position the position
|
||||
* @param readDepth the read depth
|
||||
*
|
||||
*/
|
||||
@Override
|
||||
public void addNoCall(int position, int readDepth) {
|
||||
public void addNoCall(int position) {
|
||||
// glf doesn't support this operation
|
||||
throw new UnsupportedOperationException("GLF doesn't support a 'no call' call.");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,9 +5,7 @@ import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
|||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
import org.junit.Assert;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
|
|
@ -190,7 +188,7 @@ public class RodGLFTest extends BaseTest {
|
|||
/**
|
||||
* create the example glf file for the test, you can uncomment the above test line to have this
|
||||
* test run, regenerating the file.
|
||||
*/
|
||||
*
|
||||
public void createRodFile() {
|
||||
GenotypeWriter writer = new GLFWriter("", new File("glfTestFile.glf"));
|
||||
int location = 1;
|
||||
|
|
@ -199,7 +197,7 @@ public class RodGLFTest extends BaseTest {
|
|||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 2, 20, 'A', 5, createLikelihood('T'));
|
||||
writer.addGenotypeCall(r.getSequenceDictionary().getSequence(0), 3, 20, 'A', 5, createLikelihood('C'));
|
||||
writer.close();
|
||||
}
|
||||
}*/
|
||||
|
||||
/**
|
||||
* create a likelihood object, given the appropriate reference base
|
||||
|
|
|
|||
|
|
@ -1,73 +0,0 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GeliAdapterTest
|
||||
*
|
||||
* Tests the GeliAdapter class
|
||||
*/
|
||||
public class GeliAdapterTest extends BaseTest {
|
||||
|
||||
|
||||
// private our Geli adapter
|
||||
private GenotypeWriter adapter = null;
|
||||
|
||||
/**
|
||||
* test out the likelihood object
|
||||
*/
|
||||
@Test
|
||||
public void test1() {
|
||||
File fl = new File("testFile.txt");
|
||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
|
||||
adapter = new GeliAdapter(fl,header);
|
||||
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
SAMSequenceRecord rec = new SAMSequenceRecord("chr1",10);
|
||||
adapter.addGenotypeCall(rec,100,100,'A',100,obj);
|
||||
adapter.close();
|
||||
}
|
||||
|
||||
|
||||
public double[] createFakeLikelihoods() {
|
||||
double ret[] = new double[10];
|
||||
for (int x = 0; x < 10; x++) {
|
||||
ret[x] = (double)(10.0-x) * 10.0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue