Fix for Kiran: when using VCFs for the comp tracks in the Annotator(s), don't put the headers from them into the output VCF.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3950 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-05 04:45:47 +00:00
parent 227c4b10f0
commit 1539791a04
7 changed files with 46 additions and 29 deletions

View File

@ -135,7 +135,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")));
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
hInfo.addAll(engine.getVCFAnnotationDescriptions());

View File

@ -80,7 +80,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
vcfWriter = new VCFWriter(out);
validateAnnotateUnionArguments();
Map<String, VCFHeader> vcfRods = SampleUtils.getVCFHeadersFromRods(getToolkit(), null);
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null);
Set<String> samples = SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
String[] annotationsToUse = {};

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
@ -48,7 +49,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
public void initialize() {
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);

View File

@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -71,7 +72,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
liftOver.validateToSequences(toHeader.getSequenceDictionary());
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
writer = new VCFWriter(out);
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);

View File

@ -233,7 +233,7 @@ public class GenomicAnnotator extends RodWalker<LinkedList<VariantContext>, Link
// setup the header fields
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")));
hInfo.add(new VCFHeaderLine("source", "Annotator"));
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
hInfo.addAll(engine.getVCFAnnotationDescriptions());

View File

@ -30,9 +30,8 @@ import net.sf.samtools.SAMReadGroupRecord;
import org.broad.tribble.vcf.VCFHeader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
import java.util.*;
@ -87,36 +86,18 @@ public class SampleUtils {
public static Set<String> getUniqueSamplesFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
Set<String> samples = new TreeSet<String>();
for ( VCFHeader header : getVCFHeadersFromRods(toolkit, rodNames).values() )
for ( VCFHeader header : VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).values() )
samples.addAll(header.getGenotypeSamples());
return samples;
}
public static Set<String> getRodNamesWithVCFHeader(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
return getVCFHeadersFromRods(toolkit, rodNames).keySet();
}
public static Map<String, VCFHeader> getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
Map<String, VCFHeader> data = new HashMap<String, VCFHeader>();
// iterate to get all of the sample names
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
// ignore the rod if it's not in our list
if ( rodNames != null && !rodNames.contains(source.getName()) )
continue;
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader )
data.put(rod.getName(), (VCFHeader)rod.getHeader());
}
return data;
return VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).keySet();
}
public static Set<String> getSampleListWithVCFHeader(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
return getSampleList(SampleUtils.getVCFHeadersFromRods(toolkit, rodNames));
return getSampleList(VCFUtils.getVCFHeadersFromRods(toolkit, rodNames));
}
public static Set<String> getSampleList(Map<String, VCFHeader> headers) {
@ -152,7 +133,7 @@ public class SampleUtils {
// iterate to get all of the sample names
for ( Map.Entry<String, VCFHeader> pair : getVCFHeadersFromRods(toolkit, null).entrySet() ) {
for ( Map.Entry<String, VCFHeader> pair : VCFUtils.getVCFHeadersFromRods(toolkit, null).entrySet() ) {
Set<String> vcfSamples = pair.getValue().getGenotypeSamples();
for ( String sample : vcfSamples )
addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey());

View File

@ -43,6 +43,24 @@ public class VCFUtils {
*/
private VCFUtils() { }
public static Map<String, VCFHeader> getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
Map<String, VCFHeader> data = new HashMap<String, VCFHeader>();
// iterate to get all of the sample names
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
// ignore the rod if it's not in our list
if ( rodNames != null && !rodNames.contains(source.getName()) )
continue;
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader )
data.put(rod.getName(), (VCFHeader)rod.getHeader());
}
return data;
}
/**
* Gets the header fields from all VCF rods input by the user
*
@ -51,6 +69,18 @@ public class VCFUtils {
* @return a set of all fields
*/
public static Set<VCFHeaderLine> getHeaderFields(GenomeAnalysisEngine toolkit) {
return getHeaderFields(toolkit, null);
}
/**
* Gets the header fields from all VCF rods input by the user
*
* @param toolkit GATK engine
* @param rodNames names of rods to use, or null if we should use all possible ones
*
* @return a set of all fields
*/
public static Set<VCFHeaderLine> getHeaderFields(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
// keep a map of sample name to occurrences encountered
TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>();
@ -58,6 +88,10 @@ public class VCFUtils {
// iterate to get all of the sample names
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
for ( ReferenceOrderedDataSource source : dataSources ) {
// ignore the rod if it's not in our list
if ( rodNames != null && !rodNames.contains(source.getName()) )
continue;
RMDTrack rod = source.getReferenceOrderedData();
if ( rod.getRecordType().equals(VariantContext.class)) {
fields.addAll(((VCFHeader)rod.getHeader()).getMetaData());