From a8eb8c27f037675369a768eb24489da582d7e274 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 4 Aug 2011 15:34:49 -0400 Subject: [PATCH] a) Minor changes to indel consensus scripts to better reflect good default values, b) Fixed up Mills/Devine codec so it always produces correct ref padded bases, and added option to VariantsToVCF to fix reference base --- .../walkers/variantrecalibration/VariantDataManager.java | 8 ++++++++ .../sting/gatk/walkers/variantutils/VariantsToVCF.java | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 7426a7726..b7f71c1ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -240,6 +240,14 @@ public class VariantDataManager { if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) { + // normalize QD by event length for indel case + int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now + if (eventLength > 0) // sanity check + value /= (double)eventLength; + + } + if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( Exception e ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index c9b63878d..2afa315ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -67,6 +67,9 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) protected String sampleName = null; + @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) + protected boolean fixReferenceBase = false; + private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; @@ -104,6 +107,10 @@ public class VariantsToVCF extends RodWalker { vc = VariantContext.modifyGenotypes(vc, genotypes); } + // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding + if (fixReferenceBase) { + vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); + } writeRecord(vc, tracker, ref.getBase()); }