From f809f24afbabddf1e2558e5532f84ec6b2d3a209 Mon Sep 17 00:00:00 2001 From: Khalid Shakir Date: Mon, 13 Aug 2012 16:49:27 -0400 Subject: [PATCH] Removed SelectHeader's --include_reference_name option since the reference is always included. In SelectHeaders instead of including the path to the file, only include the name of the reference since dbGaP does not like paths in headers. --- .../walkers/variantutils/SelectHeaders.java | 12 +----- .../sting/utils/codecs/vcf/VCFUtils.java | 43 ++++++++++++------- 2 files changed, 29 insertions(+), 26 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java index f14f6c2a6..46a3a8cd1 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java @@ -120,12 +120,6 @@ public class SelectHeaders extends RodWalker implements TreeRe @Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false) public Set XLheaderNames; - /** - * Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added. - */ - @Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false) - public boolean includeReference; - /** * Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added. */ @@ -162,10 +156,6 @@ public class SelectHeaders extends RodWalker implements TreeRe // Select only the headers requested by name or expression. headerLines = new LinkedHashSet(getSelectedHeaders(headerLines)); - // Optionally add in the reference. - if (includeReference && getToolkit().getArguments().referenceFile != null) - headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName()))); - // Optionally add in the intervals. if (includeIntervals && getToolkit().getArguments().intervals != null) { for (IntervalBinding intervalBinding : getToolkit().getArguments().intervals) { @@ -205,7 +195,7 @@ public class SelectHeaders extends RodWalker implements TreeRe selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true); // always include the contig lines - selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary()); + selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary(), true); return selectedHeaders; } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java index f80b0eae4..561e8e78d 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.vcf; import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceRecord; +import org.apache.commons.io.FilenameUtils; import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broadinstitute.sting.commandline.RodBinding; @@ -165,12 +166,13 @@ public class VCFUtils { if ( map.containsKey(key) ) { VCFHeaderLine other = map.get(key); - if ( line.equals(other) ) - continue; - else if ( ! line.getClass().equals(other.getClass()) ) + if ( line.equals(other) ) { + // continue; + } else if ( ! line.getClass().equals(other.getClass()) ) { throw new IllegalStateException("Incompatible header types: " + line + " " + other ); - else if ( line instanceof VCFFilterHeaderLine ) { - String lineName = ((VCFFilterHeaderLine) line).getID(); String otherName = ((VCFFilterHeaderLine) other).getID(); + } else if ( line instanceof VCFFilterHeaderLine ) { + String lineName = ((VCFFilterHeaderLine) line).getID(); + String otherName = ((VCFFilterHeaderLine) other).getID(); if ( ! lineName.equals(otherName) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); } else if ( line instanceof VCFCompoundHeaderLine ) { @@ -198,7 +200,7 @@ public class VCFUtils { throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other ); } } - if ( ! compLine.getDescription().equals(compOther) ) + if ( ! compLine.getDescription().equals(compOther.getDescription()) ) conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine); } else { // we are not equal, but we're not anything special either @@ -235,7 +237,7 @@ public class VCFUtils { * @param header the header to update * @param engine the GATK engine containing command line arguments and the master sequence dictionary */ - public final static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) { + public static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) { return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary()); } @@ -246,11 +248,15 @@ public class VCFUtils { * @param referenceFile the file path to the reference sequence used to generate this vcf * @param refDict the SAM formatted reference sequence dictionary */ - public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { + public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) { return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples()); } - public final static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { + public static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict) { + return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false); + } + + public static Set withUpdatedContigsAsLines(final Set oldLines, final File referenceFile, final SAMSequenceDictionary refDict, boolean referenceNameOnly) { final Set lines = new LinkedHashSet(oldLines.size()); for ( final VCFHeaderLine line : oldLines ) { @@ -264,17 +270,24 @@ public class VCFUtils { for ( final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile) ) lines.add(contigLine); - lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath())); + String referenceValue; + if (referenceFile != null) { + if (referenceNameOnly) + referenceValue = FilenameUtils.getBaseName(referenceFile.getName()); + else + referenceValue = "file://" + referenceFile.getAbsolutePath(); + lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue)); + } return lines; } /** * Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null - * @param refDict + * @param refDict reference dictionary * @param referenceFile for assembly name. May be null - * @return + * @return list of vcf contig header lines */ - public final static List makeContigHeaderLines(final SAMSequenceDictionary refDict, + public static List makeContigHeaderLines(final SAMSequenceDictionary refDict, final File referenceFile) { final List lines = new ArrayList(); final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null; @@ -283,7 +296,7 @@ public class VCFUtils { return lines; } - private final static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) { + private static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) { final Map map = new LinkedHashMap(3); map.put("ID", contig.getSequenceName()); map.put("length", String.valueOf(contig.getSequenceLength())); @@ -291,7 +304,7 @@ public class VCFUtils { return new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, map, contig.getSequenceIndex()); } - private final static String getReferenceAssembly(final String refPath) { + private static String getReferenceAssembly(final String refPath) { // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot String assembly = null; if (refPath.contains("b37") || refPath.contains("v37"))