package org.broad.tribble.vcf; import java.util.*; /** * @author aaron *

* Class VCFHeader *

* A class representing the VCF header */ public class VCFHeader { // the mandatory header fields public enum HEADER_FIELDS { CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO } // the associated meta data private final Set mMetaData; // the list of auxillary tags private final Set mGenotypeSampleNames = new LinkedHashSet(); // the character string that indicates meta data public static final String METADATA_INDICATOR = "##"; // the header string indicator public static final String HEADER_INDICATOR = "#"; // our header version private VCFHeaderVersion version; /** do we have genotying data? */ private boolean hasGenotypingData = false; /** * create a VCF header, given a list of meta data and auxillary tags * * @param metaData the meta data associated with this header */ public VCFHeader(Set metaData) { mMetaData = new TreeSet(metaData); loadVCFVersion(); } /** * create a VCF header, given a list of meta data and auxillary tags * * @param metaData the meta data associated with this header * @param genotypeSampleNames the genotype format field, and the sample names */ public VCFHeader(Set metaData, Set genotypeSampleNames) { mMetaData = new TreeSet(metaData); for (String col : genotypeSampleNames) { if (!col.equals("FORMAT")) mGenotypeSampleNames.add(col); } if (genotypeSampleNames.size() > 0) hasGenotypingData = true; loadVCFVersion(); } /** * check our metadata for a VCF version tag, and throw an exception if the version is out of date * or the version is not present */ public void loadVCFVersion() { List toRemove = new ArrayList(); for ( VCFHeaderLine line : mMetaData ) if ( VCFHeaderVersion.isFormatString(line.getKey())) { version = VCFHeaderVersion.toHeaderVersion(line.getValue(),line.getKey()); toRemove.add(line); } // remove old header lines for now, mMetaData.removeAll(toRemove); } /** * get the header fields in order they're presented in the input file (which is now required to be * the order presented in the spec). * * @return a set of the header fields, in order */ public Set getHeaderFields() { Set fields = new LinkedHashSet(); for (HEADER_FIELDS field : HEADER_FIELDS.values()) fields.add(field); return fields; } /** * get the meta data, associated with this header * * @return a set of the meta data */ public Set getMetaData() { Set lines = new LinkedHashSet(); if (version == null) lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString())); else lines.add(new VCFHeaderLine(version.getFormatString(), version.getVersionString())); lines.addAll(mMetaData); return lines; } /** * get the genotyping sample names * * @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false */ public Set getGenotypeSamples() { return mGenotypeSampleNames; } /** * do we have genotyping data? * * @return true if we have genotyping columns, false otherwise */ public boolean hasGenotypingData() { return hasGenotypingData; } /** @return the column count, */ public int getColumnCount() { return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); } /** * convert the header to a new VCF version * @param version the version to convert to */ public void setVersion(VCFHeaderVersion version) { if (version.equals(this.version)) return; // we're all set, do nothing // store the new version, and update each of the header lines this.version = version; for (VCFHeaderLine line : mMetaData) line.setVersion(version); } }