Removed SelectHeader's --include_reference_name option since the reference is always included.

In SelectHeaders instead of including the path to the file, only include the name of the reference since dbGaP does not like paths in headers.
This commit is contained in:
Khalid Shakir 2012-08-13 16:49:27 -04:00
parent 22b4466cf5
commit f809f24afb
2 changed files with 29 additions and 26 deletions

View File

@ -120,12 +120,6 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
@Argument(fullName = "exclude_header_name", shortName = "xl_hn", doc = "Exclude header. Can be specified multiple times", required = false)
public Set<String> XLheaderNames;
/**
* Note that reference inclusion takes precedence over other header matching. If set other reference lines may be excluded but the file name will still be added.
*/
@Argument(fullName = "include_reference_name", shortName = "irn", doc = "If set the reference file name minus the file extension will be added to the headers", required = false)
public boolean includeReference;
/**
* Note that interval name inclusion takes precedence over other header matching. If set other interval lines may be excluded but the intervals will still be added.
*/
@ -162,10 +156,6 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
// Select only the headers requested by name or expression.
headerLines = new LinkedHashSet<VCFHeaderLine>(getSelectedHeaders(headerLines));
// Optionally add in the reference.
if (includeReference && getToolkit().getArguments().referenceFile != null)
headerLines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, FilenameUtils.getBaseName(getToolkit().getArguments().referenceFile.getName())));
// Optionally add in the intervals.
if (includeIntervals && getToolkit().getArguments().intervals != null) {
for (IntervalBinding<Feature> intervalBinding : getToolkit().getArguments().intervals) {
@ -205,7 +195,7 @@ public class SelectHeaders extends RodWalker<Integer, Integer> implements TreeRe
selectedHeaders = ListFileUtils.excludeMatching(selectedHeaders, headerKey, XLheaderNames, true);
// always include the contig lines
selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary());
selectedHeaders = VCFUtils.withUpdatedContigsAsLines(selectedHeaders, getToolkit().getArguments().referenceFile, getToolkit().getMasterSequenceDictionary(), true);
return selectedHeaders;
}

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.codecs.vcf;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
@ -165,12 +166,13 @@ public class VCFUtils {
if ( map.containsKey(key) ) {
VCFHeaderLine other = map.get(key);
if ( line.equals(other) )
continue;
else if ( ! line.getClass().equals(other.getClass()) )
if ( line.equals(other) ) {
// continue;
} else if ( ! line.getClass().equals(other.getClass()) ) {
throw new IllegalStateException("Incompatible header types: " + line + " " + other );
else if ( line instanceof VCFFilterHeaderLine ) {
String lineName = ((VCFFilterHeaderLine) line).getID(); String otherName = ((VCFFilterHeaderLine) other).getID();
} else if ( line instanceof VCFFilterHeaderLine ) {
String lineName = ((VCFFilterHeaderLine) line).getID();
String otherName = ((VCFFilterHeaderLine) other).getID();
if ( ! lineName.equals(otherName) )
throw new IllegalStateException("Incompatible header types: " + line + " " + other );
} else if ( line instanceof VCFCompoundHeaderLine ) {
@ -198,7 +200,7 @@ public class VCFUtils {
throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other );
}
}
if ( ! compLine.getDescription().equals(compOther) )
if ( ! compLine.getDescription().equals(compOther.getDescription()) )
conflictWarner.warn(line, "Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine);
} else {
// we are not equal, but we're not anything special either
@ -235,7 +237,7 @@ public class VCFUtils {
* @param header the header to update
* @param engine the GATK engine containing command line arguments and the master sequence dictionary
*/
public final static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) {
public static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) {
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
}
@ -246,11 +248,15 @@ public class VCFUtils {
* @param referenceFile the file path to the reference sequence used to generate this vcf
* @param refDict the SAM formatted reference sequence dictionary
*/
public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
public static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
return new VCFHeader(withUpdatedContigsAsLines(oldHeader.getMetaDataInInputOrder(), referenceFile, refDict), oldHeader.getGenotypeSamples());
}
public final static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict) {
return withUpdatedContigsAsLines(oldLines, referenceFile, refDict, false);
}
public static Set<VCFHeaderLine> withUpdatedContigsAsLines(final Set<VCFHeaderLine> oldLines, final File referenceFile, final SAMSequenceDictionary refDict, boolean referenceNameOnly) {
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldLines.size());
for ( final VCFHeaderLine line : oldLines ) {
@ -264,17 +270,24 @@ public class VCFUtils {
for ( final VCFHeaderLine contigLine : makeContigHeaderLines(refDict, referenceFile) )
lines.add(contigLine);
lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath()));
String referenceValue;
if (referenceFile != null) {
if (referenceNameOnly)
referenceValue = FilenameUtils.getBaseName(referenceFile.getName());
else
referenceValue = "file://" + referenceFile.getAbsolutePath();
lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, referenceValue));
}
return lines;
}
/**
* Create VCFHeaderLines for each refDict entry, and optionally the assembly if referenceFile != null
* @param refDict
* @param refDict reference dictionary
* @param referenceFile for assembly name. May be null
* @return
* @return list of vcf contig header lines
*/
public final static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict,
public static List<VCFContigHeaderLine> makeContigHeaderLines(final SAMSequenceDictionary refDict,
final File referenceFile) {
final List<VCFContigHeaderLine> lines = new ArrayList<VCFContigHeaderLine>();
final String assembly = referenceFile != null ? getReferenceAssembly(referenceFile.getName()) : null;
@ -283,7 +296,7 @@ public class VCFUtils {
return lines;
}
private final static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
private static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
final Map<String, String> map = new LinkedHashMap<String, String>(3);
map.put("ID", contig.getSequenceName());
map.put("length", String.valueOf(contig.getSequenceLength()));
@ -291,7 +304,7 @@ public class VCFUtils {
return new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, map, contig.getSequenceIndex());
}
private final static String getReferenceAssembly(final String refPath) {
private static String getReferenceAssembly(final String refPath) {
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
String assembly = null;
if (refPath.contains("b37") || refPath.contains("v37"))