General support for both GL (log10) and PL (phred-scaled) genotype likelihoods. All walkers now use the Tribble GenotypeLikelihoods object for parsing VCFs with genotype likelihood fields. Please use GenotypeLikelihoods object from now on for seamless support for GL and PL tags. UGv2 now uses PL by default.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4589 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
15183ed778
commit
cbce3e3c83
|
|
@ -200,17 +200,8 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
/**
|
||||
* Use likelihoods if: is validation, prior is negative; or: is not validation, has genotype key
|
||||
*/
|
||||
if ( (isValidation && prior < 0.0) || genotype.isCalled() && genotype.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
String[] glArray = genotype.getAttributeAsString(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).split(",");
|
||||
|
||||
double[] likeArray = new double[glArray.length];
|
||||
|
||||
// convert to double array so we can normalize
|
||||
int k=0;
|
||||
for (String gl : glArray) {
|
||||
likeArray[k++] = Double.valueOf(gl);
|
||||
}
|
||||
|
||||
if ( (isValidation && prior < 0.0) || genotype.isCalled() && genotype.hasLikelihoods()) {
|
||||
double[] likeArray = genotype.getLikelihoods().getAsVector();
|
||||
double[] normalizedLikelihoods = MathUtils.normalizeFromLog10(likeArray);
|
||||
// see if we need to randomly mask out genotype in this position.
|
||||
Double d = generator.nextDouble();
|
||||
|
|
@ -234,7 +225,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
|
|||
/**
|
||||
* otherwise, use the prior uniformly
|
||||
*/
|
||||
else if (! isValidation && genotype.isCalled() && !genotype.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) {
|
||||
else if (! isValidation && genotype.isCalled() && ! genotype.hasLikelihoods() ) {
|
||||
// hack to deal with input VCFs with no genotype likelihoods. Just assume the called genotype
|
||||
// is confident. This is useful for Hapmap and 1KG release VCFs.
|
||||
double AA = (1.0-prior)/2.0;
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
headerInfo.add(new VCFInfoHeaderLine(VCFConstants.DOWNSAMPLED_KEY, 0, VCFHeaderLineType.Flag, "Were any of the samples downsampled?"));
|
||||
|
||||
// FORMAT and INFO fields
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings(VCFConstants.GENOTYPE_LIKELIHOODS_KEY));
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.utils.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -86,6 +87,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
final Interval fromInterval = new Interval(vc.getChr(), vc.getStart(), vc.getStart(), false, String.format("%s:%d", vc.getChr(), vc.getStart()));
|
||||
final int length = vc.getEnd() - vc.getStart();
|
||||
final Interval toInterval = liftOver.liftOver(fromInterval);
|
||||
VariantContext originalVC = vc;
|
||||
|
||||
if ( toInterval != null ) {
|
||||
// check whether the strand flips, and if so reverse complement everything
|
||||
|
|
@ -95,6 +97,14 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
}
|
||||
|
||||
vc = VariantContextUtils.modifyLocation(vc, GenomeLocParser.createPotentiallyInvalidGenomeLoc(toInterval.getSequence(), toInterval.getStart(), toInterval.getStart() + length));
|
||||
VariantContext newVC = VariantContext.createVariantContextWithPaddedAlleles(vc, ref.getBase(), false);
|
||||
|
||||
if ( VariantContextUtils.getSNPSubstitutionType(originalVC) != VariantContextUtils.getSNPSubstitutionType(newVC) ) {
|
||||
logger.warn(String.format("VCF at %s / %d => %s / %d is switching substitution type %s/%s to %s/%s",
|
||||
originalVC.getChr(), originalVC.getStart(), newVC.getChr(), newVC.getStart(),
|
||||
originalVC.getReference(), originalVC.getAlternateAllele(0), newVC.getReference(), newVC.getAlternateAllele(0)));
|
||||
}
|
||||
|
||||
writer.add(vc, ref.getBase());
|
||||
successfulIntervals++;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -258,7 +258,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
attributes.put(VCFConstants.GENOTYPE_QUALITY_KEY,String.format("%4.2f", 10*qual));
|
||||
|
||||
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
||||
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
|
||||
attributes.put(likelihoods.getKey(), likelihoods.getAsString());
|
||||
calls.put(sample, new Genotype(sample, myAlleles, qual, null, attributes, false));
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -128,7 +128,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
|
|||
attributes.put(VCFConstants.DEPTH_KEY, getFilteredDepth(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()));
|
||||
|
||||
GenotypeLikelihoods likelihoods = new GenotypeLikelihoods(GL.getLikelihoods());
|
||||
attributes.put(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, likelihoods.getAsString());
|
||||
attributes.put(likelihoods.getKey(), likelihoods.getAsString());
|
||||
|
||||
calls.put(sample, new Genotype(sample, myAlleles, AFbasedGenotype.second, null, attributes, false));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ public class UnifiedGenotyperV2 extends LocusWalker<VariantCallContext, UnifiedG
|
|||
}
|
||||
|
||||
// FORMAT and INFO fields
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings());
|
||||
headerInfo.addAll(VCFUtils.getSupportedHeaderStrings(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY));
|
||||
|
||||
// FILTER fields
|
||||
if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING ||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import java.math.BigDecimal;
|
|||
import java.util.*;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.playground.gatk.walkers.genotyper.AlleleFrequencyCalculationModel;
|
||||
|
||||
/**
|
||||
* MathUtils is a static class (no instantiation allowed!) with some useful math methods.
|
||||
|
|
@ -756,4 +757,4 @@ public class MathUtils {
|
|||
|
||||
public static double ratio(int num, int denom) { return ((double)num) / (Math.max(denom, 1)); }
|
||||
public static double ratio(long num, long denom) { return ((double)num) / (Math.max(denom, 1)); }
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,11 +27,13 @@ package org.broadinstitute.sting.utils.vcf;
|
|||
|
||||
import org.broad.tribble.util.variantcontext.Genotype;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.util.variantcontext.GenotypeLikelihoods;
|
||||
import org.broad.tribble.vcf.*;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -165,12 +167,19 @@ public class VCFUtils {
|
|||
* return a set of supported format lines; what we currently support for output in the genotype fields of a VCF
|
||||
* @return a set of VCF format lines
|
||||
*/
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings() {
|
||||
public static Set<VCFFormatHeaderLine> getSupportedHeaderStrings(String glType) {
|
||||
Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
|
||||
if ( glType == VCFConstants.GENOTYPE_LIKELIHOODS_KEY )
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
else if ( glType == VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY )
|
||||
result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Normalized, Phred-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic"));
|
||||
else
|
||||
throw new ReviewedStingException("Unexpected GL type " + glType);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue