From f10588420c11e8db007a2c975e61206e68a71299 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 4 Aug 2011 12:36:24 -0400 Subject: [PATCH 1/3] Fixing path to dbSNP file as the other one was replaced --- .../indels/RealignerTargetCreatorIntegrationTest.java | 2 +- .../RecalibrationWalkersIntegrationTest.java | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java index f5ed69476..60312dbd2 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorIntegrationTest.java @@ -17,7 +17,7 @@ public class RealignerTargetCreatorIntegrationTest extends WalkerTest { executeTest("test standard", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", + "-T RealignerTargetCreator -B:dbsnp,vcf " + GATKDataLocation + "dbsnp_129_b36.vcf -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000 -o %s", 1, Arrays.asList("0367d39a122c8ac0899fb868a82ef728")); executeTest("test dbsnp", spec2); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index 049f44845..e81d2670c 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -30,7 +30,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + ( bam.equals( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" ) @@ -97,7 +97,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -standard" + " -OQ" + " -recalFile %s" + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf", + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf", 1, // just one output file Arrays.asList(md5)); executeTest("testCountCovariatesUseOriginalQuals", spec); @@ -144,7 +144,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + " -standard" + @@ -249,7 +249,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { " -B:anyNameABCD,VCF3 " + validationDataLocation + "vcfexample3.vcf" + " -T CountCovariates" + " -I " + bam + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -L 1:10,000,000-10,200,000" + " -cov ReadGroupCovariate" + " -cov QualityScoreCovariate" + @@ -275,7 +275,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + b36KGReference + - " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_132.b36.excluding_sites_after_129.vcf" + + " -B:dbsnp,VCF " + GATKDataLocation + "dbsnp_129_b36.vcf" + " -T CountCovariates" + " -I " + bam + " -cov ReadGroupCovariate" + From e48492f3c3fb0621c894195cecc0d621620f7b2d Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 4 Aug 2011 12:48:56 -0400 Subject: [PATCH 2/3] Validate that the reference padding base for indels is correct. --- .../gatk/walkers/variantutils/ValidateVariants.java | 4 ++-- .../sting/utils/variantcontext/VariantContext.java | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java index 0644c669b..0de405d97 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java @@ -154,10 +154,10 @@ public class ValidateVariants extends RodWalker { try { switch( type ) { case ALL: - vc.extraStrictValidation(observedRefAllele, rsIDs); + vc.extraStrictValidation(observedRefAllele, ref.getBase(), rsIDs); break; case REF: - vc.validateReferenceBases(observedRefAllele); + vc.validateReferenceBases(observedRefAllele, ref.getBase()); break; case IDS: vc.validateRSIDs(rsIDs); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 1712f6f7b..fff1961c6 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1055,11 +1055,12 @@ public class VariantContext implements Feature { // to enable tribble intergrati * Run all extra-strict validation tests on a Variant Context object * * @param reference the true reference allele + * @param paddedRefBase the reference base used for padding indels * @param rsIDs the true dbSNP IDs */ - public void extraStrictValidation(Allele reference, Set rsIDs) { + public void extraStrictValidation(Allele reference, Byte paddedRefBase, Set rsIDs) { // validate the reference - validateReferenceBases(reference); + validateReferenceBases(reference, paddedRefBase); // validate the RS IDs validateRSIDs(rsIDs); @@ -1074,11 +1075,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati //checkReferenceTrack(); } - public void validateReferenceBases(Allele reference) { + public void validateReferenceBases(Allele reference, Byte paddedRefBase) { // don't validate if we're an insertion if ( !reference.isNull() && !reference.basesMatch(getReference()) ) { throw new TribbleException.InternalCodecException(String.format("the REF allele is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), reference.getBaseString(), getReference().getBaseString())); } + + // we also need to validate the padding base for simple indels + if ( hasReferenceBaseForIndel() && !getReferenceBaseForIndel().equals(paddedRefBase) ) + throw new TribbleException.InternalCodecException(String.format("the padded REF base is incorrect for the record at position %s:%d, %s vs. %s", getChr(), getStart(), (char)getReferenceBaseForIndel().byteValue(), (char)paddedRefBase.byteValue())); } public void validateRSIDs(Set rsIDs) { From a8eb8c27f037675369a768eb24489da582d7e274 Mon Sep 17 00:00:00 2001 From: Guillermo del Angel Date: Thu, 4 Aug 2011 15:34:49 -0400 Subject: [PATCH 3/3] a) Minor changes to indel consensus scripts to better reflect good default values, b) Fixed up Mills/Devine codec so it always produces correct ref padded bases, and added option to VariantsToVCF to fix reference base --- .../walkers/variantrecalibration/VariantDataManager.java | 8 ++++++++ .../sting/gatk/walkers/variantutils/VariantsToVCF.java | 7 +++++++ 2 files changed, 15 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 7426a7726..b7f71c1ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -240,6 +240,14 @@ public class VariantDataManager { if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if (vc.isIndel() && annotationKey.equalsIgnoreCase("QD")) { + // normalize QD by event length for indel case + int eventLength = Math.abs(vc.getAlternateAllele(0).getBaseString().length() - vc.getReference().getBaseString().length()); // ignore multi-allelic complication here for now + if (eventLength > 0) // sanity check + value /= (double)eventLength; + + } + if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("FS") && MathUtils.compareDoubles(value, 0.0, 0.001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( Exception e ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java index c9b63878d..2afa315ff 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java @@ -67,6 +67,9 @@ public class VariantsToVCF extends RodWalker { @Argument(fullName="sample", shortName="sample", doc="The sample name represented by the variant rod (for data like GELI with genotypes)", required=false) protected String sampleName = null; + @Argument(fullName="fixRef", shortName="fixRef", doc="Fix common reference base in case there's an indel without padding", required=false) + protected boolean fixReferenceBase = false; + private Set allowedGenotypeFormatStrings = new HashSet(); private boolean wroteHeader = false; @@ -104,6 +107,10 @@ public class VariantsToVCF extends RodWalker { vc = VariantContext.modifyGenotypes(vc, genotypes); } + // todo - fix me. This may not be the cleanest way to handle features what need correct indel padding + if (fixReferenceBase) { + vc = new VariantContext("Variant",vc.getChr(),vc.getStart(), vc.getEnd(), vc.getAlleles(), vc.getGenotypes(), vc.getNegLog10PError(), vc.getFilters(),vc.getAttributes(), ref.getBase()); + } writeRecord(vc, tracker, ref.getBase()); }