From 1539791a0499daa9990c3203af9693f0df9f3834 Mon Sep 17 00:00:00 2001 From: ebanks Date: Thu, 5 Aug 2010 04:45:47 +0000 Subject: [PATCH] Fix for Kiran: when using VCFs for the comp tracks in the Annotator(s), don't put the headers from them into the output VCF. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3950 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/annotator/VariantAnnotator.java | 2 +- .../walkers/variantutils/CombineVariants.java | 2 +- .../variantutils/FilterLiftedVariants.java | 3 +- .../variantutils/LiftoverVariants.java | 3 +- .../walkers/annotator/GenomicAnnotator.java | 2 +- .../sting/utils/SampleUtils.java | 29 +++------------- .../sting/utils/genotype/vcf/VCFUtils.java | 34 +++++++++++++++++++ 7 files changed, 46 insertions(+), 29 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index c7eb45085..671778b04 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -135,7 +135,7 @@ public class VariantAnnotator extends RodWalker { // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant"))); hInfo.add(new VCFHeaderLine("source", "VariantAnnotator")); hInfo.addAll(engine.getVCFAnnotationDescriptions()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index dc6b31cb5..fb46189c2 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -80,7 +80,7 @@ public class CombineVariants extends RodWalker { vcfWriter = new VCFWriter(out); validateAnnotateUnionArguments(); - Map vcfRods = SampleUtils.getVCFHeadersFromRods(getToolkit(), null); + Map vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null); Set samples = SampleUtils.getSampleList(vcfRods, genotypeMergeOption); String[] annotationsToUse = {}; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java index be8f62654..9bef35d37 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; @@ -48,7 +49,7 @@ public class FilterLiftedVariants extends RodWalker { public void initialize() { Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); - Map vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); writer = new VCFWriter(out); final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java index ce900927e..d14270faf 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java @@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -71,7 +72,7 @@ public class LiftoverVariants extends RodWalker { liftOver.validateToSequences(toHeader.getSequenceDictionary()); Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant")); - Map vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); + Map vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant")); writer = new VCFWriter(out); final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java index 07c57a585..75a7f8c4f 100644 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotator.java @@ -233,7 +233,7 @@ public class GenomicAnnotator extends RodWalker, Link // setup the header fields Set hInfo = new HashSet(); - hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant"))); hInfo.add(new VCFHeaderLine("source", "Annotator")); hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName())); hInfo.addAll(engine.getVCFAnnotationDescriptions()); diff --git a/java/src/org/broadinstitute/sting/utils/SampleUtils.java b/java/src/org/broadinstitute/sting/utils/SampleUtils.java index d8d966cbe..b8a8af9e6 100755 --- a/java/src/org/broadinstitute/sting/utils/SampleUtils.java +++ b/java/src/org/broadinstitute/sting/utils/SampleUtils.java @@ -30,9 +30,8 @@ import net.sf.samtools.SAMReadGroupRecord; import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; -import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils; import java.util.*; @@ -87,36 +86,18 @@ public class SampleUtils { public static Set getUniqueSamplesFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { Set samples = new TreeSet(); - for ( VCFHeader header : getVCFHeadersFromRods(toolkit, rodNames).values() ) + for ( VCFHeader header : VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).values() ) samples.addAll(header.getGenotypeSamples()); return samples; } public static Set getRodNamesWithVCFHeader(GenomeAnalysisEngine toolkit, Collection rodNames) { - return getVCFHeadersFromRods(toolkit, rodNames).keySet(); - } - - public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { - Map data = new HashMap(); - - // iterate to get all of the sample names - List dataSources = toolkit.getRodDataSources(); - for ( ReferenceOrderedDataSource source : dataSources ) { - // ignore the rod if it's not in our list - if ( rodNames != null && !rodNames.contains(source.getName()) ) - continue; - - RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader ) - data.put(rod.getName(), (VCFHeader)rod.getHeader()); - } - - return data; + return VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).keySet(); } public static Set getSampleListWithVCFHeader(GenomeAnalysisEngine toolkit, Collection rodNames) { - return getSampleList(SampleUtils.getVCFHeadersFromRods(toolkit, rodNames)); + return getSampleList(VCFUtils.getVCFHeadersFromRods(toolkit, rodNames)); } public static Set getSampleList(Map headers) { @@ -152,7 +133,7 @@ public class SampleUtils { // iterate to get all of the sample names - for ( Map.Entry pair : getVCFHeadersFromRods(toolkit, null).entrySet() ) { + for ( Map.Entry pair : VCFUtils.getVCFHeadersFromRods(toolkit, null).entrySet() ) { Set vcfSamples = pair.getValue().getGenotypeSamples(); for ( String sample : vcfSamples ) addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey()); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 6126fbd16..2aebc9e0c 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -43,6 +43,24 @@ public class VCFUtils { */ private VCFUtils() { } + public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection rodNames) { + Map data = new HashMap(); + + // iterate to get all of the sample names + List dataSources = toolkit.getRodDataSources(); + for ( ReferenceOrderedDataSource source : dataSources ) { + // ignore the rod if it's not in our list + if ( rodNames != null && !rodNames.contains(source.getName()) ) + continue; + + RMDTrack rod = source.getReferenceOrderedData(); + if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader ) + data.put(rod.getName(), (VCFHeader)rod.getHeader()); + } + + return data; + } + /** * Gets the header fields from all VCF rods input by the user * @@ -51,6 +69,18 @@ public class VCFUtils { * @return a set of all fields */ public static Set getHeaderFields(GenomeAnalysisEngine toolkit) { + return getHeaderFields(toolkit, null); + } + + /** + * Gets the header fields from all VCF rods input by the user + * + * @param toolkit GATK engine + * @param rodNames names of rods to use, or null if we should use all possible ones + * + * @return a set of all fields + */ + public static Set getHeaderFields(GenomeAnalysisEngine toolkit, Collection rodNames) { // keep a map of sample name to occurrences encountered TreeSet fields = new TreeSet(); @@ -58,6 +88,10 @@ public class VCFUtils { // iterate to get all of the sample names List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { + // ignore the rod if it's not in our list + if ( rodNames != null && !rodNames.contains(source.getName()) ) + continue; + RMDTrack rod = source.getReferenceOrderedData(); if ( rod.getRecordType().equals(VariantContext.class)) { fields.addAll(((VCFHeader)rod.getHeader()).getMetaData());