Initial changes to the SSG to output GLF by default
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1231 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0f6bfaaf73
commit
36819ed908
|
|
@ -2,57 +2,61 @@ package org.broadinstitute.sting.playground.gatk.walkers;
|
||||||
|
|
||||||
import net.sf.samtools.SAMReadGroupRecord;
|
import net.sf.samtools.SAMReadGroupRecord;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ZeroMappingQualityReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
import org.broadinstitute.sting.gatk.walkers.ReadFilters;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||||
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
||||||
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
import org.broadinstitute.sting.playground.utils.AlleleMetrics;
|
||||||
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
import org.broadinstitute.sting.playground.utils.GenotypeLikelihoods;
|
||||||
import org.broadinstitute.sting.playground.utils.IndelLikelihood;
|
import org.broadinstitute.sting.playground.utils.IndelLikelihood;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.BasicPileup;
|
|
||||||
import org.broadinstitute.sting.utils.ReadBackedPileup;
|
|
||||||
import org.broadinstitute.sting.utils.StingException;
|
|
||||||
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
import org.broadinstitute.sting.utils.cmdLine.Argument;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
@ReadFilters(ZeroMappingQualityReadFilter.class)
|
||||||
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, String> {
|
public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate, String> {
|
||||||
// Control output settings
|
// Control output settings
|
||||||
@Argument(fullName="variants_out", shortName="varout", doc="File to which variants should be written", required=true) public File VARIANTS_FILE;
|
@Argument(fullName = "variants_out", shortName = "varout", doc = "File to which variants should be written", required = true) public File VARIANTS_FILE;
|
||||||
@Argument(fullName="metrics_out", shortName="metout", doc="File to which metrics should be written", required=false) public File METRICS_FILE = new File("/dev/stderr");
|
@Argument(fullName = "metrics_out", shortName = "metout", doc = "File to which metrics should be written", required = false) public File METRICS_FILE = new File("/dev/stderr");
|
||||||
|
@Argument(fullName = "variant_output_format", shortName = "vf", doc = "File to which metrics should be written", required = false) public GenotypeWriterFactory.GENOTYPE_FORMAT VAR_FORMAT = GenotypeWriterFactory.GENOTYPE_FORMAT.GLF;
|
||||||
|
|
||||||
// Control what goes into the variants file and what format that file should have
|
// Control what goes into the variants file and what format that file should have
|
||||||
@Argument(fullName="lod_threshold", shortName="lod", doc="The lod threshold on which variants should be filtered", required=false) public Double LOD_THRESHOLD = 5.0;
|
@Argument(fullName = "lod_threshold", shortName = "lod", doc = "The lod threshold on which variants should be filtered", required = false)public Double LOD_THRESHOLD = Double.MIN_VALUE;
|
||||||
@Argument(fullName="format_geli", shortName="geli", doc="Output variant calls in Geli/Picard format", required=false) public boolean GELI_OUTPUT_FORMAT = false;
|
|
||||||
|
|
||||||
// Control periodic reporting features
|
// Control periodic reporting features
|
||||||
@Argument(fullName="metrics_interval", shortName="metint", doc="Number of loci to process between metrics reports", required=false) public Integer METRICS_INTERVAL = 50000;
|
@Argument(fullName = "metrics_interval", shortName = "metint", doc = "Number of loci to process between metrics reports", required = false) public Integer METRICS_INTERVAL = 50000;
|
||||||
@Argument(fullName="suppress_metrics", shortName="printmets", doc="If specified, don't display metrics", required=false) public Boolean SUPPRESS_METRICS = false;
|
@Argument(fullName = "suppress_metrics", shortName = "printmets", doc = "If specified, don't display metrics", required = false) public Boolean SUPPRESS_METRICS = false;
|
||||||
|
|
||||||
// Control what features we use in calling variants
|
// Control what features we use in calling variants
|
||||||
@Argument(fullName="ignore_secondary_bases", shortName="nosb", doc="Ignore secondary base examination", required=false) public Boolean IGNORE_SECONDARY_BASES = false;
|
@Argument(fullName = "ignore_secondary_bases", shortName = "nosb", doc = "Ignore secondary base examination", required = false) public Boolean IGNORE_SECONDARY_BASES = false;
|
||||||
@Argument(fullName="call_indels", shortName="indels", doc="Call indels as well as point mutations", required=false) public Boolean CALL_INDELS = false;
|
@Argument(fullName = "call_indels", shortName = "indels", doc = "Call indels as well as point mutations", required = false) public Boolean CALL_INDELS = false;
|
||||||
|
|
||||||
// Control how the genotype hypotheses are weighed
|
// Control how the genotype hypotheses are weighed
|
||||||
@Argument(fullName="priors_any_locus", shortName="plocus", doc="Comma-separated prior likelihoods for any locus (homref,het,homvar)", required=false) public String PRIORS_ANY_LOCUS = "0.999,1e-3,1e-5";
|
@Argument(fullName = "priors_any_locus", shortName = "plocus", doc = "Comma-separated prior likelihoods for any locus (homref,het,homvar)", required = false) public String PRIORS_ANY_LOCUS = "0.999,1e-3,1e-5";
|
||||||
@Argument(fullName="priors_hapmap", shortName="phapmap", doc="Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required=false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
|
@Argument(fullName = "priors_hapmap", shortName = "phapmap", doc = "Comma-separated prior likelihoods for Hapmap loci (homref,het,homvar)", required = false) public String PRIORS_HAPMAP = "0.999,1e-3,1e-5";
|
||||||
@Argument(fullName="priors_dbsnp", shortName="pdbsnp", doc="Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required=false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
@Argument(fullName = "priors_dbsnp", shortName = "pdbsnp", doc = "Comma-separated prior likelihoods for dbSNP loci (homref,het,homvar)", required = false) public String PRIORS_DBSNP = "0.999,1e-3,1e-5";
|
||||||
@Argument(fullName="priors_2nd_on", shortName="p2ndon", doc="Comma-separated prior likelihoods for the secondary bases of primary on-genotype bases (AA,AC,AG,AT,CC,CG,CT,GG,GT,TT)", required=false) public String PRIORS_2ND_ON = "0.000,0.302,0.366,0.142,0.000,0.548,0.370,0.000,0.319,0.000";
|
@Argument(fullName = "priors_2nd_on", shortName = "p2ndon", doc = "Comma-separated prior likelihoods for the secondary bases of primary on-genotype bases (AA,AC,AG,AT,CC,CG,CT,GG,GT,TT)", required = false) public String PRIORS_2ND_ON = "0.000,0.302,0.366,0.142,0.000,0.548,0.370,0.000,0.319,0.000";
|
||||||
@Argument(fullName="priors_2nd_off", shortName="p2ndoff", doc="Comma-separated prior likelihoods for the secondary bases of primary off-genotype bases (AA,AC,AG,AT,CC,CG,CT,GG,GT,TT)", required=false) public String PRIORS_2ND_OFF = "0.480,0.769,0.744,0.538,0.575,0.727,0.768,0.589,0.762,0.505";
|
@Argument(fullName = "priors_2nd_off", shortName = "p2ndoff", doc = "Comma-separated prior likelihoods for the secondary bases of primary off-genotype bases (AA,AC,AG,AT,CC,CG,CT,GG,GT,TT)", required = false) public String PRIORS_2ND_OFF = "0.480,0.769,0.744,0.538,0.575,0.727,0.768,0.589,0.762,0.505";
|
||||||
|
|
||||||
// Control various sample-level settings
|
// Control various sample-level settings
|
||||||
@Argument(fullName="sample_name_regex", shortName="sample_name_regex", doc="Replaces the sample name specified in the BAM read group with the value supplied here", required=false) public String SAMPLE_NAME_REGEX = null;
|
@Argument(fullName = "sample_name_regex", shortName = "sample_name_regex", doc = "Replaces the sample name specified in the BAM read group with the value supplied here", required = false) public String SAMPLE_NAME_REGEX = null;
|
||||||
|
|
||||||
public AlleleMetrics metricsOut;
|
public AlleleMetrics metricsOut;
|
||||||
public PrintStream variantsOut;
|
public PrintStream variantsOut;
|
||||||
public String sampleName;
|
public String sampleName;
|
||||||
|
private GenotypeWriter mGenotypeWriter;
|
||||||
|
|
||||||
public double[] plocus;
|
public double[] plocus;
|
||||||
public double[] phapmap;
|
public double[] phapmap;
|
||||||
|
|
@ -65,7 +69,9 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
*
|
*
|
||||||
* @return true
|
* @return true
|
||||||
*/
|
*/
|
||||||
public boolean requiresReads() { return true; }
|
public boolean requiresReads() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
* Filter out loci to ignore (at an ambiguous base in the reference or a locus with zero coverage).
|
||||||
|
|
@ -73,6 +79,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
* @param ref the reference base
|
* @param ref the reference base
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
|
*
|
||||||
* @return true if we should look at this locus, false otherwise
|
* @return true if we should look at this locus, false otherwise
|
||||||
*/
|
*/
|
||||||
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public boolean filter(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
|
@ -83,6 +90,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* Convert an array string (value1,value2,...,valueN) to a double array
|
* Convert an array string (value1,value2,...,valueN) to a double array
|
||||||
*
|
*
|
||||||
* @param priorsString the array string of priors
|
* @param priorsString the array string of priors
|
||||||
|
*
|
||||||
* @return the same array, but each value is now a double
|
* @return the same array, but each value is now a double
|
||||||
*/
|
*/
|
||||||
private double[] priorsArray(String priorsString) {
|
private double[] priorsArray(String priorsString) {
|
||||||
|
|
@ -96,22 +104,28 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
return pdbls;
|
return pdbls;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Initialize the walker with some sensible defaults */
|
||||||
* Initialize the walker with some sensible defaults
|
|
||||||
*/
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
metricsOut = new AlleleMetrics(METRICS_FILE, LOD_THRESHOLD);
|
||||||
|
|
||||||
|
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||||
|
GenotypeLikelihoods.toGLFGenotypePattern();
|
||||||
|
mGenotypeWriter = GenotypeWriterFactory.create(GenotypeWriterFactory.GENOTYPE_FORMAT.GLF, GenomeAnalysisEngine.instance.getEngine().getSAMHeader(), VARIANTS_FILE);
|
||||||
|
} else {
|
||||||
try {
|
try {
|
||||||
variantsOut = new PrintStream(VARIANTS_FILE);
|
variantsOut = new PrintStream(VARIANTS_FILE);
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.\n", VARIANTS_FILE.getAbsolutePath());
|
err.format("Unable to open file '%s'. Perhaps the parent directory does not exist or is read-only.\n", VARIANTS_FILE.getAbsolutePath());
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
}
|
}
|
||||||
|
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
|
||||||
String header = GELI_OUTPUT_FORMAT ? AlleleFrequencyEstimate.geliHeaderString() : AlleleFrequencyEstimate.asTabularStringHeader();
|
variantsOut.println(AlleleFrequencyEstimate.geliHeaderString());
|
||||||
variantsOut.println(header);
|
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
|
||||||
|
variantsOut.println(AlleleFrequencyEstimate.asTabularStringHeader());
|
||||||
|
} else {
|
||||||
|
throw new StingException("Unsupported single sample genotyper output format: " + this.VAR_FORMAT.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
plocus = priorsArray(PRIORS_ANY_LOCUS);
|
plocus = priorsArray(PRIORS_ANY_LOCUS);
|
||||||
phapmap = priorsArray(PRIORS_HAPMAP);
|
phapmap = priorsArray(PRIORS_HAPMAP);
|
||||||
pdbsnp = priorsArray(PRIORS_DBSNP);
|
pdbsnp = priorsArray(PRIORS_DBSNP);
|
||||||
|
|
@ -125,6 +139,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
* @param ref the reference base
|
* @param ref the reference base
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
|
*
|
||||||
* @return an AlleleFrequencyEstimate object
|
* @return an AlleleFrequencyEstimate object
|
||||||
*/
|
*/
|
||||||
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context) {
|
||||||
|
|
@ -132,8 +147,12 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
|
|
||||||
AlleleFrequencyEstimate freq = getAlleleFrequency(tracker, Character.toUpperCase(ref), context, sampleName);
|
AlleleFrequencyEstimate freq = getAlleleFrequency(tracker, Character.toUpperCase(ref), context, sampleName);
|
||||||
|
|
||||||
if (freq != null) { metricsOut.nextPosition(freq, tracker); }
|
if (freq != null) {
|
||||||
if (!SUPPRESS_METRICS) { metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL); }
|
metricsOut.nextPosition(freq, tracker);
|
||||||
|
}
|
||||||
|
if (!SUPPRESS_METRICS) {
|
||||||
|
metricsOut.printMetricsAtLocusIntervals(METRICS_INTERVAL);
|
||||||
|
}
|
||||||
|
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
@ -142,17 +161,21 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* Sometimes the sample names in the BAM files get screwed up. Fix it here if we can.
|
* Sometimes the sample names in the BAM files get screwed up. Fix it here if we can.
|
||||||
*
|
*
|
||||||
* @param read a read from the pileup (assuming all the reads have the same sample name)
|
* @param read a read from the pileup (assuming all the reads have the same sample name)
|
||||||
|
*
|
||||||
* @return a repaired sample name
|
* @return a repaired sample name
|
||||||
*/
|
*/
|
||||||
private String rationalizeSampleName(SAMRecord read) {
|
private String rationalizeSampleName(SAMRecord read) {
|
||||||
String RG = (String)(read.getAttribute("RG"));
|
String RG = (String) (read.getAttribute("RG"));
|
||||||
SAMReadGroupRecord read_group_record = read.getHeader().getReadGroup(RG);
|
SAMReadGroupRecord read_group_record = read.getHeader().getReadGroup(RG);
|
||||||
|
|
||||||
if (read_group_record != null) {
|
if (read_group_record != null) {
|
||||||
String localSampleName = read.getHeader().getReadGroup(RG).getSample();
|
String localSampleName = read.getHeader().getReadGroup(RG).getSample();
|
||||||
if (SAMPLE_NAME_REGEX != null) { localSampleName = localSampleName.replaceAll(SAMPLE_NAME_REGEX, "$1"); }
|
if (SAMPLE_NAME_REGEX != null) {
|
||||||
if (sampleName == null) { sampleName = localSampleName; }
|
localSampleName = localSampleName.replaceAll(SAMPLE_NAME_REGEX, "$1");
|
||||||
else {
|
}
|
||||||
|
if (sampleName == null) {
|
||||||
|
sampleName = localSampleName;
|
||||||
|
} else {
|
||||||
if (!sampleName.equals(localSampleName)) {
|
if (!sampleName.equals(localSampleName)) {
|
||||||
throw new StingException(String.format("Samples appear to have been mixed up: expected '%s' but found '%s'.", sampleName, localSampleName));
|
throw new StingException(String.format("Samples appear to have been mixed up: expected '%s' but found '%s'.", sampleName, localSampleName));
|
||||||
}
|
}
|
||||||
|
|
@ -169,6 +192,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* @param ref the reference base
|
* @param ref the reference base
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
* @param sample_name the name of the sample
|
* @param sample_name the name of the sample
|
||||||
|
*
|
||||||
* @return the allele frequency estimate
|
* @return the allele frequency estimate
|
||||||
*/
|
*/
|
||||||
private AlleleFrequencyEstimate getAlleleFrequency(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
|
private AlleleFrequencyEstimate getAlleleFrequency(RefMetaDataTracker tracker, char ref, LocusContext context, String sample_name) {
|
||||||
|
|
@ -194,6 +218,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* @param pileup the pileup object for the given locus
|
* @param pileup the pileup object for the given locus
|
||||||
* @param reads the reads that overlap this locus
|
* @param reads the reads that overlap this locus
|
||||||
* @param offsets the offsets per read that identify the base at this locus
|
* @param offsets the offsets per read that identify the base at this locus
|
||||||
|
*
|
||||||
* @return the likelihoods per genotype
|
* @return the likelihoods per genotype
|
||||||
*/
|
*/
|
||||||
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup, List<SAMRecord> reads, List<Integer> offsets) {
|
private GenotypeLikelihoods callGenotype(RefMetaDataTracker tracker, char ref, ReadBackedPileup pileup, List<SAMRecord> reads, List<Integer> offsets) {
|
||||||
|
|
@ -207,18 +232,18 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
G = new GenotypeLikelihoods(plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff);
|
G = new GenotypeLikelihoods(plocus[0], plocus[1], plocus[2], p2ndon, p2ndoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
for ( int i = 0; i < reads.size(); i++ ) {
|
for (int i = 0; i < reads.size(); i++) {
|
||||||
SAMRecord read = reads.get(i);
|
SAMRecord read = reads.get(i);
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
|
|
||||||
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
G.add(ref, read.getReadString().charAt(offset), read.getBaseQualities()[offset]);
|
||||||
}
|
}
|
||||||
|
if (this.VAR_FORMAT != GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||||
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
|
G.ApplyPrior(ref, this.alt_allele, this.allele_frequency_prior);
|
||||||
|
|
||||||
if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) {
|
if (!IGNORE_SECONDARY_BASES && pileup.getBases().length() < 750) {
|
||||||
G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
G.applySecondBaseDistributionPrior(pileup.getBases(), pileup.getSecondaryBasePileup());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return G;
|
return G;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -228,13 +253,14 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* @param context contextual information around the locus
|
* @param context contextual information around the locus
|
||||||
* @param reads the reads that overlap this locus
|
* @param reads the reads that overlap this locus
|
||||||
* @param offsets the offsets per read that identify the base at this locus
|
* @param offsets the offsets per read that identify the base at this locus
|
||||||
|
*
|
||||||
* @return the likelihood of the indel at this location
|
* @return the likelihood of the indel at this location
|
||||||
*/
|
*/
|
||||||
private IndelLikelihood callIndel(LocusContext context, List<SAMRecord> reads, List<Integer> offsets) {
|
private IndelLikelihood callIndel(LocusContext context, List<SAMRecord> reads, List<Integer> offsets) {
|
||||||
String[] indels = BasicPileup.indelPileup(reads, offsets);
|
String[] indels = BasicPileup.indelPileup(reads, offsets);
|
||||||
IndelLikelihood indelCall = new IndelLikelihood(indels, 1e-4);
|
IndelLikelihood indelCall = new IndelLikelihood(indels, 1e-4);
|
||||||
|
|
||||||
if (! indelCall.getType().equals("ref")) {
|
if (!indelCall.getType().equals("ref")) {
|
||||||
System.out.printf("INDEL %s %s\n", context.getLocation(), indelCall);
|
System.out.printf("INDEL %s %s\n", context.getLocation(), indelCall);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -245,6 +271,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* Determine whether we're at a Hapmap site
|
* Determine whether we're at a Hapmap site
|
||||||
*
|
*
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
|
*
|
||||||
* @return true if we're at a Hapmap site, false if otherwise
|
* @return true if we're at a Hapmap site, false if otherwise
|
||||||
*/
|
*/
|
||||||
private boolean isHapmapSite(RefMetaDataTracker tracker) {
|
private boolean isHapmapSite(RefMetaDataTracker tracker) {
|
||||||
|
|
@ -255,6 +282,7 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
* Determine whether we're at a dbSNP site
|
* Determine whether we're at a dbSNP site
|
||||||
*
|
*
|
||||||
* @param tracker the meta data tracker
|
* @param tracker the meta data tracker
|
||||||
|
*
|
||||||
* @return true if we're at a dbSNP site, false if otherwise
|
* @return true if we're at a dbSNP site, false if otherwise
|
||||||
*/
|
*/
|
||||||
private boolean isDbSNPSite(RefMetaDataTracker tracker) {
|
private boolean isDbSNPSite(RefMetaDataTracker tracker) {
|
||||||
|
|
@ -289,21 +317,32 @@ public class SingleSampleGenotyper extends LocusWalker<AlleleFrequencyEstimate,
|
||||||
*
|
*
|
||||||
* @param alleleFreq an AlleleFrequencyEstimage object for the variant.
|
* @param alleleFreq an AlleleFrequencyEstimage object for the variant.
|
||||||
* @param sum accumulator for the reduce.
|
* @param sum accumulator for the reduce.
|
||||||
|
*
|
||||||
* @return an empty string
|
* @return an empty string
|
||||||
*/
|
*/
|
||||||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) {
|
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) {
|
||||||
if (alleleFreq != null && alleleFreq.lodVsRef >= LOD_THRESHOLD) {
|
if (alleleFreq != null && alleleFreq.lodVsRef >= LOD_THRESHOLD) {
|
||||||
String line = GELI_OUTPUT_FORMAT ? alleleFreq.asGeliString() : alleleFreq.asTabularString();
|
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GELI) {
|
||||||
variantsOut.println(line);
|
variantsOut.println(alleleFreq.asGeliString());
|
||||||
|
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.TABULAR) {
|
||||||
|
variantsOut.println(alleleFreq.asTabularString());
|
||||||
|
} else if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||||
|
SAMSequenceRecord rec = GenomeLocParser.getContigInfo(alleleFreq.location.getContig());
|
||||||
|
LikelihoodObject obj = new LikelihoodObject(alleleFreq.posteriors, LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||||
|
obj.setLikelihoodType(LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||||
|
this.mGenotypeWriter.addGenotypeCall(rec,(int)alleleFreq.location.getStart(),0.0f,alleleFreq.ref,alleleFreq.depth,obj);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/** Close the variant file. */
|
||||||
* Close the variant file.
|
|
||||||
*/
|
|
||||||
public void onTraversalDone() {
|
public void onTraversalDone() {
|
||||||
variantsOut.close();
|
if (this.VAR_FORMAT == GenotypeWriterFactory.GENOTYPE_FORMAT.GLF) {
|
||||||
|
mGenotypeWriter.close();
|
||||||
|
} else {
|
||||||
|
this.variantsOut.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,26 +1,12 @@
|
||||||
package org.broadinstitute.sting.playground.utils;
|
package org.broadinstitute.sting.playground.utils;
|
||||||
|
|
||||||
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.lang.Math;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class AlleleFrequencyEstimate {
|
public class AlleleFrequencyEstimate {
|
||||||
|
|
||||||
/*
|
|
||||||
static
|
|
||||||
{
|
|
||||||
boolean assertsEnabled = false;
|
|
||||||
assert assertsEnabled = true; // Intentional side effect!!!
|
|
||||||
if (!assertsEnabled)
|
|
||||||
{
|
|
||||||
System.err.printf("\n\n\nERROR: You must run with asserts enabled. \"java -ea\".\n\n\n");
|
|
||||||
throw new RuntimeException("Asserts must be enabled!");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
//AlleleFrequencyEstimate();
|
|
||||||
public GenomeLoc location;
|
public GenomeLoc location;
|
||||||
public char ref;
|
public char ref;
|
||||||
public char alt;
|
public char alt;
|
||||||
|
|
@ -34,7 +20,7 @@ public class AlleleFrequencyEstimate {
|
||||||
public int depth;
|
public int depth;
|
||||||
public String notes;
|
public String notes;
|
||||||
public String bases;
|
public String bases;
|
||||||
public double[][] quals;
|
//public double[][] quals;
|
||||||
public double[] posteriors;
|
public double[] posteriors;
|
||||||
public String sample_name;
|
public String sample_name;
|
||||||
public int n_ref;
|
public int n_ref;
|
||||||
|
|
@ -48,43 +34,6 @@ public class AlleleFrequencyEstimate {
|
||||||
|
|
||||||
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
|
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth, String bases, double[][] quals, double[] posteriors, String sample_name)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
if( Double.isNaN(lodVsRef)) { System.out.printf("%s: lodVsRef is NaN\n", location.toString()); }
|
|
||||||
if( Double.isNaN(lodVsNextBest)) { System.out.printf("%s lodVsNextBest is NaN\n", location.toString()); }
|
|
||||||
if( Double.isNaN(qhat)) { System.out.printf("%s qhat is NaN\n", location.toString()); }
|
|
||||||
if( Double.isNaN(qstar)) { System.out.printf("%s qstar is NaN\n", location.toString()); }
|
|
||||||
if( Double.isNaN(pBest)) { System.out.printf("%s pBest is NaN\n", location.toString()); }
|
|
||||||
if( Double.isNaN(pRef)) { System.out.printf("%s pRef is NaN (%c %s)\n", location.toString(), ref, bases); }
|
|
||||||
|
|
||||||
if( Double.isInfinite(lodVsRef))
|
|
||||||
{
|
|
||||||
System.out.printf("lodVsRef is Infinite: %s %c %s\n", location.toString(), ref, bases);
|
|
||||||
for (int i = 0; i < posteriors.length; i++)
|
|
||||||
{
|
|
||||||
System.out.printf("POSTERIOR %d %f\n", i, posteriors[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if( Double.isInfinite(lodVsNextBest)) { System.out.printf("lodVsNextBest is Infinite\n"); }
|
|
||||||
if( Double.isInfinite(qhat)) { System.out.printf("qhat is Infinite\n"); }
|
|
||||||
if( Double.isInfinite(qstar)) { System.out.printf("qstar is Infinite\n"); }
|
|
||||||
if( Double.isInfinite(pBest)) { System.out.printf("pBest is Infinite\n"); }
|
|
||||||
if( Double.isInfinite(pRef)) { System.out.printf("pRef is Infinite\n"); }
|
|
||||||
|
|
||||||
assert(! Double.isNaN(lodVsRef));
|
|
||||||
assert(! Double.isNaN(lodVsNextBest));
|
|
||||||
assert(! Double.isNaN(qhat));
|
|
||||||
assert(! Double.isNaN(qstar));
|
|
||||||
assert(! Double.isNaN(pBest));
|
|
||||||
assert(! Double.isNaN(pRef));
|
|
||||||
|
|
||||||
assert(! Double.isInfinite(lodVsRef));
|
|
||||||
assert(! Double.isInfinite(lodVsNextBest));
|
|
||||||
assert(! Double.isInfinite(qhat));
|
|
||||||
assert(! Double.isInfinite(qstar));
|
|
||||||
assert(! Double.isInfinite(pBest));
|
|
||||||
assert(! Double.isInfinite(pRef));
|
|
||||||
*/
|
|
||||||
|
|
||||||
this.location = location;
|
this.location = location;
|
||||||
this.ref = ref;
|
this.ref = ref;
|
||||||
this.alt = alt;
|
this.alt = alt;
|
||||||
|
|
@ -98,7 +47,7 @@ public class AlleleFrequencyEstimate {
|
||||||
this.depth = depth;
|
this.depth = depth;
|
||||||
this.notes = "";
|
this.notes = "";
|
||||||
this.bases = bases;
|
this.bases = bases;
|
||||||
this.quals = quals;
|
//this.quals = quals;
|
||||||
this.posteriors = posteriors;
|
this.posteriors = posteriors;
|
||||||
this.sample_name = sample_name;
|
this.sample_name = sample_name;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,19 @@ public class GenotypeLikelihoods {
|
||||||
}
|
}
|
||||||
|
|
||||||
public double[] likelihoods;
|
public double[] likelihoods;
|
||||||
public String[] genotypes;
|
public static String[] genotypes = new String[10];
|
||||||
|
static {
|
||||||
|
genotypes[0] = "AA";
|
||||||
|
genotypes[1] = "AC";
|
||||||
|
genotypes[2] = "AG";
|
||||||
|
genotypes[3] = "AT";
|
||||||
|
genotypes[4] = "CC";
|
||||||
|
genotypes[5] = "CG";
|
||||||
|
genotypes[6] = "CT";
|
||||||
|
genotypes[7] = "GG";
|
||||||
|
genotypes[8] = "GT";
|
||||||
|
genotypes[9] = "TT";
|
||||||
|
}
|
||||||
public int coverage;
|
public int coverage;
|
||||||
|
|
||||||
// The genotype priors;
|
// The genotype priors;
|
||||||
|
|
@ -75,21 +87,12 @@ public class GenotypeLikelihoods {
|
||||||
this.priorHomVar = priorHomVar;
|
this.priorHomVar = priorHomVar;
|
||||||
|
|
||||||
likelihoods = new double[10];
|
likelihoods = new double[10];
|
||||||
genotypes = new String[10];
|
|
||||||
coverage = 0;
|
coverage = 0;
|
||||||
|
|
||||||
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
for (int i = 0; i < likelihoods.length; i++) { likelihoods[i] = Math.log10(0.1); }
|
||||||
|
|
||||||
genotypes[0] = "AA";
|
|
||||||
genotypes[1] = "AC";
|
|
||||||
genotypes[2] = "AG";
|
|
||||||
genotypes[3] = "AT";
|
|
||||||
genotypes[4] = "CC";
|
|
||||||
genotypes[5] = "CG";
|
|
||||||
genotypes[6] = "CT";
|
|
||||||
genotypes[7] = "GG";
|
|
||||||
genotypes[8] = "GT";
|
|
||||||
genotypes[9] = "TT";
|
|
||||||
|
|
||||||
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
for (int genotypeIndex = 0; genotypeIndex < 10; genotypeIndex++) {
|
||||||
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
onNextBestBasePriors.put(genotypes[genotypeIndex], p2ndon[genotypeIndex]);
|
||||||
|
|
@ -405,4 +408,17 @@ public class GenotypeLikelihoods {
|
||||||
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
|
public void addIndelLikelihood(IndelLikelihood indel_likelihood) { this.indel_likelihood = indel_likelihood; }
|
||||||
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
|
public IndelLikelihood getIndelLikelihood() { return this.indel_likelihood; }
|
||||||
|
|
||||||
|
// TODO: this is bad, but the formats disagree now.
|
||||||
|
public static void toGLFGenotypePattern() {
|
||||||
|
genotypes[0] = "AA";
|
||||||
|
genotypes[1] = "AT";
|
||||||
|
genotypes[2] = "AC";
|
||||||
|
genotypes[3] = "AG";
|
||||||
|
genotypes[4] = "CC";
|
||||||
|
genotypes[5] = "CT";
|
||||||
|
genotypes[6] = "CG";
|
||||||
|
genotypes[7] = "GG";
|
||||||
|
genotypes[8] = "GT";
|
||||||
|
genotypes[9] = "TT";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
|
import edu.mit.broad.picard.genotype.geli.GeliFileWriter;
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
|
@ -56,18 +57,16 @@ public class GeliAdapter implements GenotypeWriter {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* add a single point genotype call to the
|
* add a single point genotype call to the genotype likelihood file
|
||||||
*
|
*
|
||||||
* @param contigName the name of the contig you're calling in
|
* @param contig the contig you're calling in
|
||||||
* @param contigLength the contig length
|
|
||||||
* @param position the position on the contig
|
* @param position the position on the contig
|
||||||
* @param referenceBase the reference base
|
* @param referenceBase the reference base
|
||||||
* @param readDepth the read depth at the specified position
|
* @param readDepth the read depth at the specified position
|
||||||
* @param likelihoods the likelihoods of each of the possible alleles
|
* @param likelihoods the likelihoods of each of the possible alleles
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void addGenotypeCall(String contigName,
|
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||||
int contigLength,
|
|
||||||
int position,
|
int position,
|
||||||
float rmsMapQuals,
|
float rmsMapQuals,
|
||||||
char referenceBase,
|
char referenceBase,
|
||||||
|
|
@ -79,8 +78,7 @@ public class GeliAdapter implements GenotypeWriter {
|
||||||
/**
|
/**
|
||||||
* add a variable length call to the genotyper
|
* add a variable length call to the genotyper
|
||||||
*
|
*
|
||||||
* @param contigName the name of the contig you're calling in
|
* @param contig the contig you're calling in
|
||||||
* @param contigLength the contig length
|
|
||||||
* @param position the position on the genome
|
* @param position the position on the genome
|
||||||
* @param rmsMapQuals the root mean square of the mapping qualities
|
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||||
* @param readDepth the read depth
|
* @param readDepth the read depth
|
||||||
|
|
@ -90,7 +88,7 @@ public class GeliAdapter implements GenotypeWriter {
|
||||||
* @param hetLikelihood the heterozygous likelihood
|
* @param hetLikelihood the heterozygous likelihood
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void addVariableLengthCall(String contigName, int contigLength, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
||||||
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
|
throw new UnsupportedOperationException("Geli format does not support variable length allele calls");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
*
|
*
|
||||||
|
|
@ -33,18 +35,17 @@ package org.broadinstitute.sting.utils.genotype;
|
||||||
* The interface for storing genotype calls.
|
* The interface for storing genotype calls.
|
||||||
*/
|
*/
|
||||||
public interface GenotypeWriter {
|
public interface GenotypeWriter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* add a single point genotype call to the
|
* add a single point genotype call to the
|
||||||
*
|
*
|
||||||
* @param contigName the name of the contig you're calling in
|
* @param contig the contig you're calling in
|
||||||
* @param contigLength the contig length
|
|
||||||
* @param position the position on the contig
|
* @param position the position on the contig
|
||||||
* @param referenceBase the reference base
|
* @param referenceBase the reference base
|
||||||
* @param readDepth the read depth at the specified position
|
* @param readDepth the read depth at the specified position
|
||||||
* @param likelihoods the likelihoods of each of the possible alleles
|
* @param likelihoods the likelihoods of each of the possible alleles
|
||||||
*/
|
*/
|
||||||
public void addGenotypeCall(String contigName,
|
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||||
int contigLength,
|
|
||||||
int position,
|
int position,
|
||||||
float rmsMapQuals,
|
float rmsMapQuals,
|
||||||
char referenceBase,
|
char referenceBase,
|
||||||
|
|
@ -54,8 +55,7 @@ public interface GenotypeWriter {
|
||||||
/**
|
/**
|
||||||
* add a variable length call to the genotyper
|
* add a variable length call to the genotyper
|
||||||
*
|
*
|
||||||
* @param contigName the name of the contig you're calling in
|
* @param contig the contig you're calling in
|
||||||
* @param contigLength the contig length
|
|
||||||
* @param position the position on the genome
|
* @param position the position on the genome
|
||||||
* @param rmsMapQuals the root mean square of the mapping qualities
|
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||||
* @param readDepth the read depth
|
* @param readDepth the read depth
|
||||||
|
|
@ -64,8 +64,7 @@ public interface GenotypeWriter {
|
||||||
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||||
* @param hetLikelihood the heterozygous likelihood
|
* @param hetLikelihood the heterozygous likelihood
|
||||||
*/
|
*/
|
||||||
public void addVariableLengthCall(String contigName,
|
public void addVariableLengthCall(SAMSequenceRecord contig,
|
||||||
int contigLength,
|
|
||||||
int position,
|
int position,
|
||||||
float rmsMapQuals,
|
float rmsMapQuals,
|
||||||
int readDepth,
|
int readDepth,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.glf.GLFWriter;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Class GenotypeWriterFactory
|
||||||
|
* <p/>
|
||||||
|
* A descriptions should go here. Blame aaron if it's missing.
|
||||||
|
*/
|
||||||
|
public class GenotypeWriterFactory {
|
||||||
|
/** available genotype writers */
|
||||||
|
public enum GENOTYPE_FORMAT {
|
||||||
|
GELI, GLF, GFF, TABULAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a genotype writer
|
||||||
|
* @param format the format
|
||||||
|
* @param header the sam file header
|
||||||
|
* @param destination the destination file
|
||||||
|
* @return the genotype writer object
|
||||||
|
*/
|
||||||
|
public static GenotypeWriter create(GENOTYPE_FORMAT format, SAMFileHeader header, File destination) {
|
||||||
|
switch (format) {
|
||||||
|
case GLF:
|
||||||
|
return new GLFWriter(header.toString(), destination);
|
||||||
|
case GELI:
|
||||||
|
return new GeliAdapter(destination, header);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,12 +1,12 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
|
||||||
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
import edu.mit.broad.picard.genotype.DiploidGenotype;
|
||||||
|
import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods;
|
||||||
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2009 The Broad Institute
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
|
@ -35,43 +35,52 @@ import net.sf.samtools.SAMFileHeader;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
*
|
* <p/>
|
||||||
* Class LikelyhoodObject
|
* Class LikelyhoodObject
|
||||||
*
|
* <p/>
|
||||||
* An object used to store likelyhood information for genotypes. Genotype
|
* An object used to store likelyhood information for genotypes. Genotype
|
||||||
* likelihoods are assumed to be infinite (negitive log likelihood), unless set.
|
* likelihoods are assumed to be infinite (negitive log likelihood), unless set.
|
||||||
* This allows the consumer to make an empty LikelihoodObject, and just set
|
* This allows the consumer to make an empty LikelihoodObject, and just set
|
||||||
* those values which have associated likelihood values.
|
* those values which have associated likelihood values.
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public class LikelihoodObject {
|
public class LikelihoodObject {
|
||||||
|
|
||||||
|
|
||||||
// our possible genotypes, in order according to GLFv3
|
// our possible genotypes, in order according to GLFv3
|
||||||
public enum GENOTYPE {
|
public enum GENOTYPE {
|
||||||
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
|
AA, AT, AC, AG, CC, CT, CG, GG, GT, TT
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// our pileup of bases
|
||||||
|
//final private String basePileup;
|
||||||
|
|
||||||
// possible types of likihoods to store
|
// possible types of likihoods to store
|
||||||
|
|
||||||
public enum LIKELIHOOD_TYPE {
|
public enum LIKELIHOOD_TYPE {
|
||||||
NEGITIVE_LOG, LOG, RAW;
|
NEGITIVE_LOG, LOG, RAW;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// our qhet and qstar values; wait, what?
|
||||||
|
// TODO: are these really needed here? We have them to support the tabular output format only.
|
||||||
|
//private double qhat;
|
||||||
|
//private double qstar;
|
||||||
|
|
||||||
// our liklihood storage type
|
// our liklihood storage type
|
||||||
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG;
|
protected LIKELIHOOD_TYPE mLikelihoodType = LIKELIHOOD_TYPE.NEGITIVE_LOG;
|
||||||
|
|
||||||
// default the bestGenotype likelihood to the allele AA
|
// default the bestGenotype likelihoods to the allele AA
|
||||||
protected GENOTYPE bestGenotype = GENOTYPE.AA;
|
protected GENOTYPE bestGenotype = GENOTYPE.AA;
|
||||||
|
|
||||||
// how many genotypes we're storing
|
// how many genotypes we're storing
|
||||||
public static final int genoTypeCount = GENOTYPE.values().length;
|
public static final int genoTypeCount = GENOTYPE.values().length;
|
||||||
|
|
||||||
// the associated negitive log likelihoods for each genotype
|
// the associated negitive log likelihoods for each genotype
|
||||||
protected final HashMap<GENOTYPE, Double> likelihood = new HashMap<GENOTYPE, Double>();
|
protected final HashMap<GENOTYPE, Double> likelihoods = new HashMap<GENOTYPE, Double>();
|
||||||
|
|
||||||
/** create a blank likelihood object */
|
/** create a blank likelihood object */
|
||||||
public LikelihoodObject() {
|
public LikelihoodObject() {
|
||||||
for (GENOTYPE type : GENOTYPE.values()) {
|
for (GENOTYPE type : GENOTYPE.values()) {
|
||||||
likelihood.put(type, Double.MAX_VALUE);
|
likelihoods.put(type, Double.MAX_VALUE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,15 +91,18 @@ public class LikelihoodObject {
|
||||||
*
|
*
|
||||||
* @param lk the likelihood object
|
* @param lk the likelihood object
|
||||||
*/
|
*/
|
||||||
public LikelihoodObject( GenotypeLikelihoods lk ) {
|
public LikelihoodObject(GenotypeLikelihoods lk) {
|
||||||
|
mLikelihoodType = LIKELIHOOD_TYPE.LOG;
|
||||||
Double minValue = Double.MAX_VALUE;
|
Double minValue = Double.MAX_VALUE;
|
||||||
for (GENOTYPE type : GENOTYPE.values()) {
|
for (GENOTYPE type : GENOTYPE.values()) {
|
||||||
byte[] bases = new byte[2];
|
byte[] bases = new byte[2];
|
||||||
bases[0] = (byte) type.toString().charAt(0);
|
bases[0] = (byte) type.toString().charAt(0);
|
||||||
bases[1] = (byte) type.toString().charAt(1);
|
bases[1] = (byte) type.toString().charAt(1);
|
||||||
double val = -1.0d * lk.getLikelihood(DiploidGenotype.fromBases(bases));
|
double val = -1.0d * lk.getLikelihood(DiploidGenotype.fromBases(bases));
|
||||||
likelihood.put(type, val);
|
likelihoods.put(type, val);
|
||||||
if (val < minValue) { bestGenotype = type; }
|
if (val < minValue) {
|
||||||
|
bestGenotype = type;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -98,18 +110,28 @@ public class LikelihoodObject {
|
||||||
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
|
* create a likelyhood object, given an array of genotype scores in GLFv3 ordering
|
||||||
*
|
*
|
||||||
* @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
|
* @param values an array of int's from 0 to 255, representing the negitive log likelihoods.
|
||||||
|
* @param type the likelihood storage type
|
||||||
*/
|
*/
|
||||||
public LikelihoodObject( double[] values ) {
|
public LikelihoodObject(double[] values, LIKELIHOOD_TYPE type) {
|
||||||
|
mLikelihoodType = type;
|
||||||
if (values.length != GENOTYPE.values().length) {
|
if (values.length != GENOTYPE.values().length) {
|
||||||
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
|
throw new IllegalArgumentException("invalid array passed to LikelihoodObject, should be size " + GENOTYPE.values().length);
|
||||||
}
|
}
|
||||||
|
findBestLikelihood(values);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* find the best likelihood
|
||||||
|
* @param values
|
||||||
|
*/
|
||||||
|
private void findBestLikelihood(double[] values) {
|
||||||
int index = 0;
|
int index = 0;
|
||||||
double lowestScore = Double.MAX_VALUE;
|
double lowestScore = Double.MAX_VALUE;
|
||||||
for (GENOTYPE type : GENOTYPE.values()) {
|
for (GENOTYPE t : GENOTYPE.values()) {
|
||||||
likelihood.put(type, values[index]);
|
likelihoods.put(t, values[index]);
|
||||||
if (values[index] < lowestScore) {
|
if (values[index] < lowestScore) {
|
||||||
lowestScore = values[index];
|
lowestScore = values[index];
|
||||||
bestGenotype = type;
|
bestGenotype = t;
|
||||||
}
|
}
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
|
|
@ -119,11 +141,11 @@ public class LikelihoodObject {
|
||||||
* set the likelihood, given it's probability and the genotype
|
* set the likelihood, given it's probability and the genotype
|
||||||
*
|
*
|
||||||
* @param type the genotype
|
* @param type the genotype
|
||||||
* @param lh the likelihood as a double between 0 and 1, which is converted to a byte
|
* @param lh the likelihood as a double
|
||||||
*/
|
*/
|
||||||
public void setLikelihood( GENOTYPE type, double lh ) {
|
public void setLikelihood(GENOTYPE type, double lh) {
|
||||||
likelihood.put(type, lh);
|
likelihoods.put(type, lh);
|
||||||
if (lh < likelihood.get(this.bestGenotype)) {
|
if (lh < likelihoods.get(this.bestGenotype)) {
|
||||||
this.bestGenotype = type;
|
this.bestGenotype = type;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -135,7 +157,7 @@ public class LikelihoodObject {
|
||||||
* @return the min value
|
* @return the min value
|
||||||
*/
|
*/
|
||||||
public double getBestLikelihood() {
|
public double getBestLikelihood() {
|
||||||
return likelihood.get(this.bestGenotype);
|
return likelihoods.get(this.bestGenotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -149,7 +171,7 @@ public class LikelihoodObject {
|
||||||
short ret[] = new short[GENOTYPE.values().length];
|
short ret[] = new short[GENOTYPE.values().length];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (GENOTYPE type : GENOTYPE.values()) {
|
for (GENOTYPE type : GENOTYPE.values()) {
|
||||||
ret[index] = ( likelihood.get(type).intValue() > 254 ) ? 255 : (short) likelihood.get(type).intValue();
|
ret[index] = (likelihoods.get(type).intValue() > 254) ? 255 : (short) likelihoods.get(type).intValue();
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
|
@ -166,7 +188,8 @@ public class LikelihoodObject {
|
||||||
double[] ft = new double[10];
|
double[] ft = new double[10];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (GENOTYPE T : GENOTYPE.values()) {
|
for (GENOTYPE T : GENOTYPE.values()) {
|
||||||
ft[index] = this.likelihood.get(T).floatValue();
|
ft[index] = this.likelihoods.get(T).doubleValue();
|
||||||
|
index++;
|
||||||
}
|
}
|
||||||
return ft;
|
return ft;
|
||||||
}
|
}
|
||||||
|
|
@ -177,16 +200,16 @@ public class LikelihoodObject {
|
||||||
*
|
*
|
||||||
* @return a GenotypeLikelihoods object representing our data
|
* @return a GenotypeLikelihoods object representing our data
|
||||||
*/
|
*/
|
||||||
public GenotypeLikelihoods convert( SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase ) {
|
public GenotypeLikelihoods convert(SAMFileHeader samHeader, int seqIndex, int seqPosition, byte refBase) {
|
||||||
double[] ft = toDoubleArray();
|
double[] ft = toDoubleArray();
|
||||||
float[] db = new float[ft.length];
|
float[] db = new float[ft.length];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
|
if (this.mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
|
||||||
for (;index < ft.length; index++) {
|
for (; index < ft.length; index++) {
|
||||||
db[index] = ((float)ft[index] * -1.0f);
|
db[index] = ((float) ft[index] * -1.0f);
|
||||||
}
|
}
|
||||||
} else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
|
} else if (this.mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
|
||||||
for (;index < ft.length; index++) {
|
for (; index < ft.length; index++) {
|
||||||
db[index] = (float) Math.log(ft[index]);
|
db[index] = (float) Math.log(ft[index]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -195,18 +218,45 @@ public class LikelihoodObject {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getter for the likelihood type
|
* getter for the likelihood type
|
||||||
|
*
|
||||||
* @return our likelihood storage type
|
* @return our likelihood storage type
|
||||||
*/
|
*/
|
||||||
public LIKELIHOOD_TYPE getLikelihoodType() {
|
public LIKELIHOOD_TYPE getLikelihoodType() {
|
||||||
return mLikelihoodType;
|
return mLikelihoodType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* validate a genotype score
|
||||||
|
*
|
||||||
|
* @param score the score to validate
|
||||||
|
*/
|
||||||
|
public void validateScore(double score) {
|
||||||
|
int x = 0;
|
||||||
|
switch (mLikelihoodType) {
|
||||||
|
case NEGITIVE_LOG:
|
||||||
|
if (score < 0)
|
||||||
|
throw new StingException("Likelikhood score of " + score + " is invalid, for NEGITIVE_LOG it must be greater than or equal to 0");
|
||||||
|
break;
|
||||||
|
case LOG:
|
||||||
|
if (score > 0)
|
||||||
|
throw new StingException("Likelikhood score of " + score + " is invalid, for LOG it must be less than or equal to 0");
|
||||||
|
break;
|
||||||
|
case RAW:
|
||||||
|
if (score < 0 || score > 1)
|
||||||
|
throw new StingException("Likelikhood score of " + score + " is invalid, for RAW it must be [0,1]");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* set our likelihood storage type, and adjust our current likelihood values to reflect
|
* set our likelihood storage type, and adjust our current likelihood values to reflect
|
||||||
* the new setting.
|
* the new setting.
|
||||||
|
*
|
||||||
* @param likelihood the type to set the values to.
|
* @param likelihood the type to set the values to.
|
||||||
*/
|
*/
|
||||||
public void setLikelihoodType( LIKELIHOOD_TYPE likelihood ) {
|
public void setLikelihoodType(LIKELIHOOD_TYPE likelihood) {
|
||||||
if (likelihood == mLikelihoodType)
|
if (likelihood == mLikelihoodType)
|
||||||
return;
|
return;
|
||||||
if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
|
if (mLikelihoodType == LIKELIHOOD_TYPE.RAW) {
|
||||||
|
|
@ -214,25 +264,27 @@ public class LikelihoodObject {
|
||||||
if (likelihood == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
|
if (likelihood == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
|
||||||
mult = -1.0;
|
mult = -1.0;
|
||||||
}
|
}
|
||||||
for (Double d : this.likelihood.values()) {
|
// one of us in log, the other negitive log, it doesn't matter which
|
||||||
d = mult * Math.log(d);
|
for (GENOTYPE g : likelihoods.keySet()) {
|
||||||
|
likelihoods.put(g, -1.0 * Math.log(likelihoods.get(g)));
|
||||||
}
|
}
|
||||||
}
|
} else if (likelihood == LIKELIHOOD_TYPE.RAW) {
|
||||||
else if (likelihood == LIKELIHOOD_TYPE.RAW) {
|
|
||||||
double mult = 1.0;
|
double mult = 1.0;
|
||||||
if (mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
|
if (mLikelihoodType == LIKELIHOOD_TYPE.NEGITIVE_LOG) {
|
||||||
mult = -1.0;
|
mult = -1.0;
|
||||||
}
|
}
|
||||||
for (Double d : this.likelihood.values()) {
|
|
||||||
d = Math.pow(d*mult,10);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// one of us in log, the other negitive log, it doesn't matter which
|
// one of us in log, the other negitive log, it doesn't matter which
|
||||||
for (Double d : this.likelihood.values()) {
|
for (GENOTYPE g : likelihoods.keySet()) {
|
||||||
d = -1.0 * Math.log(d);
|
likelihoods.put(g, Math.pow(likelihoods.get(g) * mult, 10));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// one of us in log, the other negitive log, it doesn't matter which
|
||||||
|
for (GENOTYPE g : likelihoods.keySet()) {
|
||||||
|
likelihoods.put(g, -1.0 * likelihoods.get(g));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.mLikelihoodType = likelihood;
|
this.mLikelihoodType = likelihood;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,99 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import org.broadinstitute.sting.utils.StingException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author aaron
|
||||||
|
*
|
||||||
|
* Class TabularLF
|
||||||
|
*
|
||||||
|
* the tabular likelihood format, as an implementation of the genotype interface
|
||||||
|
*/
|
||||||
|
public class TabularLFWriter implements GenotypeWriter {
|
||||||
|
/**
|
||||||
|
* where to print the tabular genotype likelihood info to
|
||||||
|
*/
|
||||||
|
public PrintStream outStream;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* construct, writing to a specified file
|
||||||
|
* @param writeTo
|
||||||
|
*/
|
||||||
|
public TabularLFWriter(File writeTo) {
|
||||||
|
try {
|
||||||
|
outStream = new PrintStream(writeTo);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
throw new StingException("Unable to write to specified file: " + writeTo.getName());
|
||||||
|
}
|
||||||
|
// print the header out
|
||||||
|
outStream.println("location sample_name ref alt genotype qhat qstar lodVsRef lodVsNextBest depth bases");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a single point genotype call to the
|
||||||
|
*
|
||||||
|
* @param contig the contig you're calling in
|
||||||
|
* @param position the position on the contig
|
||||||
|
* @param referenceBase the reference base
|
||||||
|
* @param readDepth the read depth at the specified position
|
||||||
|
* @param likelihoods the likelihoods of each of the possible alleles
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) {
|
||||||
|
/**return String.format("%s %s %c %c %s %f %f %f %f %d %s",
|
||||||
|
location,
|
||||||
|
contig.getSpecies(),
|
||||||
|
ref,
|
||||||
|
alt,
|
||||||
|
genotype(),
|
||||||
|
qhat,
|
||||||
|
qstar,
|
||||||
|
lodVsRef,
|
||||||
|
lodVsNextBest,
|
||||||
|
depth,
|
||||||
|
bases);*/
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a variable length call to the genotyper
|
||||||
|
*
|
||||||
|
* @param contig the contig you're calling in
|
||||||
|
* @param position the position on the genome
|
||||||
|
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||||
|
* @param readDepth the read depth
|
||||||
|
* @param refBase the reference base
|
||||||
|
* @param firstHomZyg the first homozygous indel
|
||||||
|
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||||
|
* @param hetLikelihood the heterozygous likelihood
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
||||||
|
throw new StingException("TabularLFWriter doesn't support variable length calls");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a no call to the genotype file, if supported.
|
||||||
|
*
|
||||||
|
* @param position
|
||||||
|
* @param readDepth
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addNoCall(int position, int readDepth) {
|
||||||
|
throw new StingException("TabularLFWriter doesn't support no-calls");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** finish writing, closing any open files. */
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
if (this.outStream != null) {
|
||||||
|
outStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,66 @@
|
||||||
|
package org.broadinstitute.sting.utils.genotype.gff;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||||
|
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author aaron
|
||||||
|
* <p/>
|
||||||
|
* Class GFFWriter
|
||||||
|
* <p/>
|
||||||
|
* A descriptions should go here. Blame aaron if it's missing.
|
||||||
|
*/
|
||||||
|
public class GFFWriter implements GenotypeWriter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a single point genotype call to the file
|
||||||
|
*
|
||||||
|
* @param contig the contig you're calling in
|
||||||
|
* @param position the position on the contig
|
||||||
|
* @param referenceBase the reference base
|
||||||
|
* @param readDepth the read depth at the specified position
|
||||||
|
* @param likelihoods the likelihoods of each of the possible alleles
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addGenotypeCall(SAMSequenceRecord contig, int position, float rmsMapQuals, char referenceBase, int readDepth, LikelihoodObject likelihoods) {
|
||||||
|
//To change body of implemented methods use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a variable length call to the genotype file
|
||||||
|
*
|
||||||
|
* @param contig the contig you're calling in
|
||||||
|
* @param position the position on the genome
|
||||||
|
* @param rmsMapQuals the root mean square of the mapping qualities
|
||||||
|
* @param readDepth the read depth
|
||||||
|
* @param refBase the reference base
|
||||||
|
* @param firstHomZyg the first homozygous indel
|
||||||
|
* @param secondHomZyg the second homozygous indel (if present, null if not)
|
||||||
|
* @param hetLikelihood the heterozygous likelihood
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addVariableLengthCall(SAMSequenceRecord contig, int position, float rmsMapQuals, int readDepth, char refBase, IndelLikelihood firstHomZyg, IndelLikelihood secondHomZyg, byte hetLikelihood) {
|
||||||
|
//To change body of implemented methods use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* add a no call to the genotype file, if supported.
|
||||||
|
*
|
||||||
|
* @param position the position
|
||||||
|
* @param readDepth the read depth
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void addNoCall(int position, int readDepth) {
|
||||||
|
//To change body of implemented methods use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** finish writing, closing any open files. */
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
//To change body of implemented methods use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -194,7 +194,8 @@ abstract class GLFRecord {
|
||||||
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
short bite = ((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||||
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
out.writeUByte((short) (this.getRecordType().getReadTypeValue() << 4 | (refBase.getBaseHexValue() & 0x0f)));
|
||||||
out.writeUInt(((Long) offset).intValue());
|
out.writeUInt(((Long) offset).intValue());
|
||||||
out.writeUInt((new Long(readDepth).intValue()));
|
int write = ((new Long(readDepth).intValue()) | this.minimumLikelihood << 24);
|
||||||
|
out.writeUInt(write);
|
||||||
out.writeUByte((short) rmsMapQ);
|
out.writeUByte((short) rmsMapQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.utils.genotype.glf;
|
||||||
|
|
||||||
import net.sf.samtools.util.BinaryCodec;
|
import net.sf.samtools.util.BinaryCodec;
|
||||||
import net.sf.samtools.util.BlockCompressedOutputStream;
|
import net.sf.samtools.util.BlockCompressedOutputStream;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
import org.broadinstitute.sting.utils.genotype.GenotypeWriter;
|
||||||
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
import org.broadinstitute.sting.utils.genotype.IndelLikelihood;
|
||||||
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
import org.broadinstitute.sting.utils.genotype.LikelihoodObject;
|
||||||
|
|
@ -53,6 +54,9 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
private String referenceSequenceName = null;
|
private String referenceSequenceName = null;
|
||||||
private long referenceSequenceLength = 0;
|
private long referenceSequenceLength = 0;
|
||||||
|
|
||||||
|
// the last position written
|
||||||
|
private int lastPos = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The public constructor for creating a GLF object
|
* The public constructor for creating a GLF object
|
||||||
*
|
*
|
||||||
|
|
@ -69,17 +73,15 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
/**
|
/**
|
||||||
* add a point genotype to the GLF writer
|
* add a point genotype to the GLF writer
|
||||||
*
|
*
|
||||||
* @param contigName the name of the contig you're calling in
|
* @param contig the name of the contig you're calling in
|
||||||
* @param contigLength the contig length
|
|
||||||
* @param refBase the reference base, as a char
|
* @param refBase the reference base, as a char
|
||||||
* @param genomicLoc the location, as an offset from the previous glf record
|
* @param genomicLoc the location the location on the reference contig
|
||||||
* @param readDepth the read depth at the specified postion
|
* @param readDepth the read depth at the specified postion
|
||||||
* @param rmsMapQ the root mean square of the mapping quality
|
* @param rmsMapQ the root mean square of the mapping quality
|
||||||
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
|
* @param lhValues the GenotypeLikelihoods object, representing the genotype likelyhoods
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void addGenotypeCall(String contigName,
|
public void addGenotypeCall(SAMSequenceRecord contig,
|
||||||
int contigLength,
|
|
||||||
int genomicLoc,
|
int genomicLoc,
|
||||||
float rmsMapQ,
|
float rmsMapQ,
|
||||||
char refBase,
|
char refBase,
|
||||||
|
|
@ -87,23 +89,23 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
LikelihoodObject lhValues) {
|
LikelihoodObject lhValues) {
|
||||||
|
|
||||||
// check if we've jumped to a new contig
|
// check if we've jumped to a new contig
|
||||||
checkSequence(contigName, contigLength);
|
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||||
|
|
||||||
SinglePointCall call = new SinglePointCall(refBase,
|
SinglePointCall call = new SinglePointCall(refBase,
|
||||||
genomicLoc,
|
genomicLoc - lastPos,
|
||||||
readDepth,
|
readDepth,
|
||||||
(short) rmsMapQ,
|
(short) rmsMapQ,
|
||||||
lhValues.toDoubleArray());
|
lhValues.toDoubleArray());
|
||||||
|
lastPos = genomicLoc;
|
||||||
call.write(this.outputBinaryCodec);
|
call.write(this.outputBinaryCodec);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* add a variable length (indel, deletion, etc) to the genotype writer
|
* add a variable length (indel, deletion, etc) to the genotype writer
|
||||||
*
|
*
|
||||||
* @param contigName the name of the contig you're calling in
|
* @param contig the name of the contig you're calling in
|
||||||
* @param contigLength the contig length
|
|
||||||
* @param refBase the reference base
|
* @param refBase the reference base
|
||||||
* @param genomicLoc the location, as an offset from the previous glf record
|
* @param genomicLoc the location on the reference contig
|
||||||
* @param readDepth the read depth at the specified postion
|
* @param readDepth the read depth at the specified postion
|
||||||
* @param rmsMapQ the root mean square of the mapping quality
|
* @param rmsMapQ the root mean square of the mapping quality
|
||||||
* @param firstHomZyg the first homozygous call
|
* @param firstHomZyg the first homozygous call
|
||||||
|
|
@ -111,8 +113,7 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
|
* @param hetLikelihood the negitive log likelihood of the heterozygote, from 0 to 255
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void addVariableLengthCall(String contigName,
|
public void addVariableLengthCall(SAMSequenceRecord contig,
|
||||||
int contigLength,
|
|
||||||
int genomicLoc,
|
int genomicLoc,
|
||||||
float rmsMapQ,
|
float rmsMapQ,
|
||||||
int readDepth,
|
int readDepth,
|
||||||
|
|
@ -122,11 +123,11 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
byte hetLikelihood) {
|
byte hetLikelihood) {
|
||||||
|
|
||||||
// check if we've jumped to a new contig
|
// check if we've jumped to a new contig
|
||||||
checkSequence(contigName, contigLength);
|
checkSequence(contig.getSequenceName(), contig.getSequenceLength());
|
||||||
|
|
||||||
// normalize the two
|
// normalize the two
|
||||||
VariableLengthCall call = new VariableLengthCall(refBase,
|
VariableLengthCall call = new VariableLengthCall(refBase,
|
||||||
genomicLoc,
|
genomicLoc - lastPos,
|
||||||
readDepth,
|
readDepth,
|
||||||
(short) rmsMapQ,
|
(short) rmsMapQ,
|
||||||
firstHomZyg.getLikelihood(),
|
firstHomZyg.getLikelihood(),
|
||||||
|
|
@ -136,7 +137,7 @@ public class GLFWriter implements GenotypeWriter {
|
||||||
firstHomZyg.getIndelSequence(),
|
firstHomZyg.getIndelSequence(),
|
||||||
secondHomZyg.getLengthOfIndel(),
|
secondHomZyg.getLengthOfIndel(),
|
||||||
secondHomZyg.getIndelSequence());
|
secondHomZyg.getIndelSequence());
|
||||||
|
lastPos = genomicLoc;
|
||||||
call.write(this.outputBinaryCodec);
|
call.write(this.outputBinaryCodec);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ import net.sf.samtools.util.BinaryCodec;
|
||||||
*/
|
*/
|
||||||
class SinglePointCall extends GLFRecord {
|
class SinglePointCall extends GLFRecord {
|
||||||
|
|
||||||
// our likelihood object
|
// our likelihoods object
|
||||||
private double likelihoods[];
|
private double likelihoods[];
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.utils.genotype;
|
||||||
|
|
||||||
import net.sf.samtools.SAMFileHeader;
|
import net.sf.samtools.SAMFileHeader;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.BaseTest;
|
import org.broadinstitute.sting.BaseTest;
|
||||||
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
@ -55,8 +56,9 @@ public class GeliAdapterTest extends BaseTest {
|
||||||
File fl = new File("testFile.txt");
|
File fl = new File("testFile.txt");
|
||||||
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
|
SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(2,1,10);
|
||||||
adapter = new GeliAdapter(fl,header);
|
adapter = new GeliAdapter(fl,header);
|
||||||
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods());
|
LikelihoodObject obj = new LikelihoodObject(createFakeLikelihoods(), LikelihoodObject.LIKELIHOOD_TYPE.LOG);
|
||||||
adapter.addGenotypeCall("chr1",10,100,100,'A',100,obj);
|
SAMSequenceRecord rec = new SAMSequenceRecord("chr1",10);
|
||||||
|
adapter.addGenotypeCall(rec,100,100,'A',100,obj);
|
||||||
adapter.close();
|
adapter.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
@Test
|
@Test
|
||||||
public void testBlankConstruction() {
|
public void testBlankConstruction() {
|
||||||
mLO = new LikelihoodObject();
|
mLO = new LikelihoodObject();
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
@ -61,12 +61,12 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
for (int x = 0; x < 10; x++) {
|
for (int x = 0; x < 10; x++) {
|
||||||
ray[x] = ( x * 25 );
|
ray[x] = ( x * 25 );
|
||||||
}
|
}
|
||||||
mLO = new LikelihoodObject(ray);
|
mLO = new LikelihoodObject(ray,LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) {
|
for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) {
|
||||||
assertTrue(ray[index] == mLO.likelihood.get(t));
|
assertTrue(ray[index] == mLO.likelihoods.get(t));
|
||||||
++index;
|
++index;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -77,8 +77,8 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
for (int x = 0; x < 10; x++) {
|
for (int x = 0; x < 10; x++) {
|
||||||
ray[x] = ( x * 25.0 );
|
ray[x] = ( x * 25.0 );
|
||||||
}
|
}
|
||||||
mLO = new LikelihoodObject(ray);
|
mLO = new LikelihoodObject(ray,LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
short[] ret = mLO.toByteArray();
|
short[] ret = mLO.toByteArray();
|
||||||
|
|
@ -104,8 +104,8 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
ray[x] = ( 240.0 );
|
ray[x] = ( 240.0 );
|
||||||
}
|
}
|
||||||
ray [5] = 0;
|
ray [5] = 0;
|
||||||
mLO = new LikelihoodObject(ray);
|
mLO = new LikelihoodObject(ray, LikelihoodObject.LIKELIHOOD_TYPE.NEGITIVE_LOG);
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
short smallest = (short)mLO.getBestLikelihood();
|
short smallest = (short)mLO.getBestLikelihood();
|
||||||
assertTrue(smallest == 0);
|
assertTrue(smallest == 0);
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
@ -122,7 +122,7 @@ public class LikelihoodObjectTest extends BaseTest {
|
||||||
for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) {
|
for (LikelihoodObject.GENOTYPE t : LikelihoodObject.GENOTYPE.values()) {
|
||||||
mLO.setLikelihood(t,128);
|
mLO.setLikelihood(t,128);
|
||||||
}
|
}
|
||||||
assertTrue(mLO.likelihood.size() == LikelihoodObject.GENOTYPE.values().length);
|
assertTrue(mLO.likelihoods.size() == LikelihoodObject.GENOTYPE.values().length);
|
||||||
|
|
||||||
int index = 0;
|
int index = 0;
|
||||||
short[] ret = mLO.toByteArray();
|
short[] ret = mLO.toByteArray();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue