adding the GenotypeLikelihoodsWalker, which generates GLF genotype likelihoods that are pretty much identical to the samtools calls.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1235 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
99f9cd84ed
commit
01fc8da270
|
|
@ -0,0 +1,125 @@
|
|||
package org.broadinstitute.sting.playground.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class LikelihoodWalker
|
||||
*
|
||||
* a simple walker to calculate the genotype likelihoods
|
||||
*/
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
public class GenotypeLikelihoodsWalker extends LocusWalker<LikelihoodWrapper, GenotypeWriter> {
|
||||
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
|
||||
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
||||
|
||||
|
||||
@Override
|
||||
public LikelihoodWrapper map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||
ReadBackedPileup pileup = new ReadBackedPileup(ref, context);
|
||||
String bases = pileup.getBases();
|
||||
List<SAMRecord> reads = context.getReads();
|
||||
List<Integer> offsets = context.getOffsets();
|
||||
double rmsSum = 0.0;
|
||||
// Handle single-base polymorphisms.
|
||||
GenotypeLikelihoods G = new GenotypeLikelihoods();
|
||||
for (int i = 0; i < reads.size(); i++) {
|
||||
SAMRecord read = reads.get(i);
|
||||
int offset = offsets.get(i);
|
||||
rmsSum += (read.getMappingQuality() * read.getMappingQuality());
|
||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||
}
|
||||
|
||||
// our return
|
||||
LikelihoodWrapper wrap = new LikelihoodWrapper();
|
||||
|
||||
return wrapLikelihoods(ref, context, reads, rmsSum, G, wrap);
|
||||
}
|
||||
|
||||
/**
|
||||
* wrap the likelihood values in with the other data we'll need
|
||||
* @param ref the ref base
|
||||
* @param context the locus context
|
||||
* @param reads the reads
|
||||
* @param rmsSum the rms square total (not the actual rms yet)
|
||||
* @param g the genotypeLikelihoods
|
||||
* @param wrap the object to place the values into
|
||||
* @return a likelihood wrapper
|
||||
*/
|
||||
private LikelihoodWrapper wrapLikelihoods(char ref, LocusContext context, List<SAMRecord> reads, double rmsSum, GenotypeLikelihoods g, LikelihoodWrapper wrap) {
|
||||
wrap.obj = new LikelihoodObject();
|
||||
wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||
for (int x = 0; x < GenotypeLikelihoods.genotypes.length; x++) {
|
||||
wrap.obj.setLikelihood(LikelihoodObject.GENOTYPE.valueOf(GenotypeLikelihoods.genotypes[x]), g.likelihoods[x]*10.0);
|
||||
}
|
||||
wrap.obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||
wrap.loc = GenomeLocParser.getContigInfo(context.getContig());
|
||||
wrap.readDepth = reads.size();
|
||||
float rms = (float)(Math.sqrt(rmsSum/reads.size()));
|
||||
wrap.rms = (rms > 255) ? 255 : rms;
|
||||
wrap.ref = ref;
|
||||
wrap.position = context.getLocation().getStart();
|
||||
return wrap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
*
|
||||
* @return Initial value of reduce.
|
||||
*/
|
||||
@Override
|
||||
public GenotypeWriter reduceInit() {
|
||||
return GenotypeWriterFactory.create(VAR_FORMAT, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(),VARIANTS_FILE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduces a single map with the accumulator provided as the ReduceType.
|
||||
*
|
||||
* @param value result of the map.
|
||||
* @param sum accumulator for the reduce.
|
||||
*
|
||||
* @return accumulator with result of the map taken into account.
|
||||
*/
|
||||
@Override
|
||||
public GenotypeWriter reduce(LikelihoodWrapper value, GenotypeWriter sum) {
|
||||
|
||||
sum.addGenotypeCall(value.loc,(int)value.position,value.rms,value.ref,value.readDepth,value.obj);
|
||||
return sum;
|
||||
}
|
||||
|
||||
/** Close the variant file. */
|
||||
public void onTraversalDone(GenotypeWriter result) {
|
||||
result.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
class LikelihoodWrapper {
|
||||
public LikelihoodObject obj;
|
||||
public SAMSequenceRecord loc;
|
||||
public long position;
|
||||
public float rms;
|
||||
public char ref;
|
||||
public int readDepth;
|
||||
}
|
||||
|
|
@ -9,7 +9,6 @@ import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||
|
|
@ -21,8 +20,8 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
|||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.List;
|
||||
|
||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||
|
|
@ -106,10 +105,8 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
/** Initialize the walker with some sensible defaults */
|
||||
public void initialize() {
|
||||
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
||||
|
||||
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||
GenotypeLikelihoods.toGLFGenotypePattern();
|
||||
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
||||
mGenotypeWriter = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GLF, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
|
||||
} else {
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
package org.broadinstitute.sting.playground.utils;
|
||||
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import static java.lang.Math.log10;
|
||||
import static java.lang.Math.pow;
|
||||
|
|
@ -408,17 +408,4 @@ public class GenotypeLikelihoods {
|
|||
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
|
||||
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
|
||||
|
||||
// TODO: this is bad, but the formats disagree now.
|
||||
public static void toGLFGenotypePattern() {
|
||||
genotypes[0] = "AA";
|
||||
genotypes[1] = "AT";
|
||||
genotypes[2] = "AC";
|
||||
genotypes[3] = "AG";
|
||||
genotypes[4] = "CC";
|
||||
genotypes[5] = "CT";
|
||||
genotypes[6] = "CG";
|
||||
genotypes[7] = "GG";
|
||||
genotypes[8] = "GT";
|
||||
genotypes[9] = "TT";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.utils.genotype;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.utils.StingException;
|
||||
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -32,7 +33,8 @@ public class GenotypeWriterFactory {
|
|||
return new GLFWriter(header.toString(), destination);
|
||||
case GELI:
|
||||
return new GeliAdapter(destination, header);
|
||||
default:
|
||||
throw new StingException("Genotype writer " + format.toString() + " is not implemented");
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public class LikelihoodObject {
|
|||
|
||||
// our possible genotypes, in order according to GLFv3
|
||||
public enum GENOTYPE {
|
||||
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
|
||||
AA, AC, AG, AT, CC, CG, CT, GG, GT, TT
|
||||
}
|
||||
|
||||
// our pileup of bases
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@ abstract class GLFRecord {
|
|||
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||
out.writeUInt(((Long) offset).intValue());
|
||||
int write = ((new Long(readDepth).intValue()) | this.minimumLikelihood << 24);
|
||||
int write = ((new Long(readDepth).intValue() & 0xffffff) | this.minimumLikelihood & 0xff << 24);
|
||||
out.writeUInt(write);
|
||||
out.writeUByte((short) rmsMapQ);
|
||||
}
|
||||
|
|
@ -233,14 +233,14 @@ abstract class GLFRecord {
|
|||
*
|
||||
* @return
|
||||
*/
|
||||
protected static double findMin(double vals[]) {
|
||||
protected static short findMin(double vals[]) {
|
||||
if (vals.length < 1) throw new StingException("findMin: an array of size < 1 was passed in");
|
||||
|
||||
double min = vals[0];
|
||||
for (double d : vals) {
|
||||
if (d < min) min = d;
|
||||
}
|
||||
return min;
|
||||
return (min > 255) ? 255 : (short)min;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class GLFWriter implements GenotypeWriter {
|
|||
private long referenceSequenceLength = 0;
|
||||
|
||||
// the last position written
|
||||
private int lastPos = 0;
|
||||
private int lastPos = 1;
|
||||
|
||||
/**
|
||||
* The public constructor for creating a GLF object
|
||||
|
|
|
|||
Loading…
Reference in New Issue