From 36edc60ccc37494d6d0804d4a8451717217a1499 Mon Sep 17 00:00:00 2001 From: ebanks Date: Mon, 5 Jul 2010 13:04:24 +0000 Subject: [PATCH] Connected UG to the new comp track annotation system in VA. Also, when emit confidence is lower than call confidence (so that we emit records filtered with LowQual), add a corresponding FILTER header field to the VCF so that the validator doesn't complain. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3720 348d0f76-0448-11de-a6fe-93d51630548a --- .../annotator/VariantAnnotatorEngine.java | 2 +- ...JointEstimateGenotypeCalculationModel.java | 2 +- .../walkers/genotyper/UnifiedGenotyper.java | 14 ++++++----- .../genotyper/UnifiedGenotyperEngine.java | 24 +++++++------------ .../UnifiedGenotyperIntegrationTest.java | 2 +- 5 files changed, 19 insertions(+), 25 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 16e400669..55f4cef71 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -155,7 +155,7 @@ public class VariantAnnotatorEngine { private void initialize(GenomeAnalysisEngine engine) { - // check to see whether a dbsnp rod was included + // check to see whether comp rods were included List dataSources = engine.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java index d135c55f1..454be16b9 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/JointEstimateGenotypeCalculationModel.java @@ -44,7 +44,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc protected JointEstimateGenotypeCalculationModel() { - filter.add("LowQual"); + filter.add(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME); } public VariantCallContext callExtendedLocus(RefMetaDataTracker tracker, byte[] ref, GenomeLoc loc, Map stratifiedContexts) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index b12951b84..3a25f3678 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -29,6 +29,7 @@ import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.*; @@ -140,18 +141,19 @@ public class UnifiedGenotyper extends LocusWalker dbSet : UG_engine.dbAnnotations.entrySet() ) + headerInfo.add(new VCFInfoHeaderLine(dbSet.getValue(), 0, VCFHeaderLineType.Flag, (dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ? "dbSNP" : dbSet.getValue()) + " Membership")); if ( !UAC.NO_SLOD ) headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias")); // FORMAT and INFO fields headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3)); + // FILTER fields + if ( UAC.STANDARD_CONFIDENCE_FOR_EMITTING < UAC.STANDARD_CONFIDENCE_FOR_CALLING || + UAC.TRIGGER_CONFIDENCE_FOR_EMITTING < UAC.TRIGGER_CONFIDENCE_FOR_CALLING ) + headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality")); + // all of the arguments from the argument collection Set args = new HashSet(); args.add(UAC); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 3e79b7094..cbb9495fc 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -33,7 +33,6 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper; import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine; import org.broadinstitute.sting.utils.sam.AlignmentUtils; @@ -46,6 +45,7 @@ import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; import org.broadinstitute.sting.utils.pileup.*; +import org.broad.tribble.vcf.VCFRecord; import java.io.PrintStream; import java.util.*; @@ -54,13 +54,9 @@ import java.util.*; public class UnifiedGenotyperEngine { public static final String TRIGGER_TRACK_NAME = "trigger"; + public static final String LOW_QUAL_FILTER_NAME = "LowQual"; - // should we annotate dbsnp? - protected boolean annotateDbsnp = false; - // should we annotate hapmap2? - protected boolean annotateHapmap2 = false; - // should we annotate hapmap3? - protected boolean annotateHapmap3 = false; + protected HashMap dbAnnotations = new HashMap(); // the unified argument collection protected UnifiedArgumentCollection UAC = null; @@ -112,18 +108,14 @@ public class UnifiedGenotyperEngine { else this.samples = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader()); - // check to see whether a dbsnp rod was included + // check to see whether comp rods were included List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { - RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - this.annotateDbsnp = true; + if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { + dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFRecord.DBSNP_KEY); } - if ( rod.getName().equals("hapmap2") ) { - this.annotateHapmap2 = true; - } - if ( rod.getName().equals("hapmap3") ) { - this.annotateHapmap3 = true; + else if ( source.getName().startsWith(VariantAnnotatorEngine.dbPrefix) ) { + dbAnnotations.put(source.getName(), source.getName().substring(VariantAnnotatorEngine.dbPrefix.length())); } } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 25300542f..e50e991d4 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("226ca8079db4e46a61367db49bac8b2b")); + Arrays.asList("a38ccaef73e57bed1e5f797b91e7ef38")); executeTest("testConfidence2", spec2); }