Fix for Kiran: when using VCFs for the comp tracks in the Annotator(s), don't put the headers from them into the output VCF.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3950 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
227c4b10f0
commit
1539791a04
|
|
@ -135,7 +135,7 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> {
|
|||
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")));
|
||||
hInfo.add(new VCFHeaderLine("source", "VariantAnnotator"));
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> {
|
|||
vcfWriter = new VCFWriter(out);
|
||||
validateAnnotateUnionArguments();
|
||||
|
||||
Map<String, VCFHeader> vcfRods = SampleUtils.getVCFHeadersFromRods(getToolkit(), null);
|
||||
Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), null);
|
||||
Set<String> samples = SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
|
||||
|
||||
String[] annotationsToUse = {};
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantutils;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
|
@ -48,7 +49,7 @@ public class FilterLiftedVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
public void initialize() {
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.walkers.variantutils;
|
|||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -71,7 +72,7 @@ public class LiftoverVariants extends RodWalker<Integer, Integer> {
|
|||
liftOver.validateToSequences(toHeader.getSequenceDictionary());
|
||||
|
||||
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = SampleUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
Map<String, VCFHeader> vcfHeaders = VCFUtils.getVCFHeadersFromRods(getToolkit(), Arrays.asList("variant"));
|
||||
|
||||
writer = new VCFWriter(out);
|
||||
final VCFHeader vcfHeader = new VCFHeader(vcfHeaders.containsKey("variant") ? vcfHeaders.get("variant").getMetaData() : null, samples);
|
||||
|
|
|
|||
|
|
@ -233,7 +233,7 @@ public class GenomicAnnotator extends RodWalker<LinkedList<VariantContext>, Link
|
|||
|
||||
// setup the header fields
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit()));
|
||||
hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), Arrays.asList("variant")));
|
||||
hInfo.add(new VCFHeaderLine("source", "Annotator"));
|
||||
hInfo.add(new VCFHeaderLine("annotatorReference", getToolkit().getArguments().referenceFile.getName()));
|
||||
hInfo.addAll(engine.getVCFAnnotationDescriptions());
|
||||
|
|
|
|||
|
|
@ -30,9 +30,8 @@ import net.sf.samtools.SAMReadGroupRecord;
|
|||
import org.broad.tribble.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.genotype.vcf.VCFUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -87,36 +86,18 @@ public class SampleUtils {
|
|||
public static Set<String> getUniqueSamplesFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
|
||||
Set<String> samples = new TreeSet<String>();
|
||||
|
||||
for ( VCFHeader header : getVCFHeadersFromRods(toolkit, rodNames).values() )
|
||||
for ( VCFHeader header : VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).values() )
|
||||
samples.addAll(header.getGenotypeSamples());
|
||||
|
||||
return samples;
|
||||
}
|
||||
|
||||
public static Set<String> getRodNamesWithVCFHeader(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
|
||||
return getVCFHeadersFromRods(toolkit, rodNames).keySet();
|
||||
}
|
||||
|
||||
public static Map<String, VCFHeader> getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
|
||||
Map<String, VCFHeader> data = new HashMap<String, VCFHeader>();
|
||||
|
||||
// iterate to get all of the sample names
|
||||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
// ignore the rod if it's not in our list
|
||||
if ( rodNames != null && !rodNames.contains(source.getName()) )
|
||||
continue;
|
||||
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader )
|
||||
data.put(rod.getName(), (VCFHeader)rod.getHeader());
|
||||
}
|
||||
|
||||
return data;
|
||||
return VCFUtils.getVCFHeadersFromRods(toolkit, rodNames).keySet();
|
||||
}
|
||||
|
||||
public static Set<String> getSampleListWithVCFHeader(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
|
||||
return getSampleList(SampleUtils.getVCFHeadersFromRods(toolkit, rodNames));
|
||||
return getSampleList(VCFUtils.getVCFHeadersFromRods(toolkit, rodNames));
|
||||
}
|
||||
|
||||
public static Set<String> getSampleList(Map<String, VCFHeader> headers) {
|
||||
|
|
@ -152,7 +133,7 @@ public class SampleUtils {
|
|||
|
||||
// iterate to get all of the sample names
|
||||
|
||||
for ( Map.Entry<String, VCFHeader> pair : getVCFHeadersFromRods(toolkit, null).entrySet() ) {
|
||||
for ( Map.Entry<String, VCFHeader> pair : VCFUtils.getVCFHeadersFromRods(toolkit, null).entrySet() ) {
|
||||
Set<String> vcfSamples = pair.getValue().getGenotypeSamples();
|
||||
for ( String sample : vcfSamples )
|
||||
addUniqueSample(samples, sampleOverlapMap, rodNamesToSampleNames, sample, pair.getKey());
|
||||
|
|
|
|||
|
|
@ -43,6 +43,24 @@ public class VCFUtils {
|
|||
*/
|
||||
private VCFUtils() { }
|
||||
|
||||
public static Map<String, VCFHeader> getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
|
||||
Map<String, VCFHeader> data = new HashMap<String, VCFHeader>();
|
||||
|
||||
// iterate to get all of the sample names
|
||||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
// ignore the rod if it's not in our list
|
||||
if ( rodNames != null && !rodNames.contains(source.getName()) )
|
||||
continue;
|
||||
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getHeader() != null && rod.getHeader() instanceof VCFHeader )
|
||||
data.put(rod.getName(), (VCFHeader)rod.getHeader());
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header fields from all VCF rods input by the user
|
||||
*
|
||||
|
|
@ -51,6 +69,18 @@ public class VCFUtils {
|
|||
* @return a set of all fields
|
||||
*/
|
||||
public static Set<VCFHeaderLine> getHeaderFields(GenomeAnalysisEngine toolkit) {
|
||||
return getHeaderFields(toolkit, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the header fields from all VCF rods input by the user
|
||||
*
|
||||
* @param toolkit GATK engine
|
||||
* @param rodNames names of rods to use, or null if we should use all possible ones
|
||||
*
|
||||
* @return a set of all fields
|
||||
*/
|
||||
public static Set<VCFHeaderLine> getHeaderFields(GenomeAnalysisEngine toolkit, Collection<String> rodNames) {
|
||||
|
||||
// keep a map of sample name to occurrences encountered
|
||||
TreeSet<VCFHeaderLine> fields = new TreeSet<VCFHeaderLine>();
|
||||
|
|
@ -58,6 +88,10 @@ public class VCFUtils {
|
|||
// iterate to get all of the sample names
|
||||
List<ReferenceOrderedDataSource> dataSources = toolkit.getRodDataSources();
|
||||
for ( ReferenceOrderedDataSource source : dataSources ) {
|
||||
// ignore the rod if it's not in our list
|
||||
if ( rodNames != null && !rodNames.contains(source.getName()) )
|
||||
continue;
|
||||
|
||||
RMDTrack rod = source.getReferenceOrderedData();
|
||||
if ( rod.getRecordType().equals(VariantContext.class)) {
|
||||
fields.addAll(((VCFHeader)rod.getHeader()).getMetaData());
|
||||
|
|
|
|||
Loading…
Reference in New Issue