package org.broad.tribble.vcf;
import java.util.*;
/**
* @author aaron
*
* Class VCFHeader
*
* A class representing the VCF header
*/
public class VCFHeader {
// the mandatory header fields
public enum HEADER_FIELDS {
CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
}
// the associated meta data
private final Set mMetaData;
// the list of auxillary tags
private final Set mGenotypeSampleNames = new LinkedHashSet();
// the character string that indicates meta data
public static final String METADATA_INDICATOR = "##";
// the header string indicator
public static final String HEADER_INDICATOR = "#";
// our header version
private VCFHeaderVersion version;
/** do we have genotying data? */
private boolean hasGenotypingData = false;
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
*/
public VCFHeader(Set metaData) {
mMetaData = new TreeSet(metaData);
loadVCFVersion();
}
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names
*/
public VCFHeader(Set metaData, Set genotypeSampleNames) {
mMetaData = new TreeSet(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col);
}
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
loadVCFVersion();
}
/**
* check our metadata for a VCF version tag, and throw an exception if the version is out of date
* or the version is not present
*/
public void loadVCFVersion() {
List toRemove = new ArrayList();
for ( VCFHeaderLine line : mMetaData )
if ( VCFHeaderVersion.isFormatString(line.getKey())) {
version = VCFHeaderVersion.toHeaderVersion(line.getValue(),line.getKey());
toRemove.add(line);
}
// remove old header lines for now,
mMetaData.removeAll(toRemove);
}
/**
* get the header fields in order they're presented in the input file (which is now required to be
* the order presented in the spec).
*
* @return a set of the header fields, in order
*/
public Set getHeaderFields() {
Set fields = new LinkedHashSet();
for (HEADER_FIELDS field : HEADER_FIELDS.values())
fields.add(field);
return fields;
}
/**
* get the meta data, associated with this header
*
* @return a set of the meta data
*/
public Set getMetaData() {
Set lines = new LinkedHashSet();
if (version == null)
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString()));
else
lines.add(new VCFHeaderLine(version.getFormatString(), version.getVersionString()));
lines.addAll(mMetaData);
return lines;
}
/**
* get the genotyping sample names
*
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
*/
public Set getGenotypeSamples() {
return mGenotypeSampleNames;
}
/**
* do we have genotyping data?
*
* @return true if we have genotyping columns, false otherwise
*/
public boolean hasGenotypingData() {
return hasGenotypingData;
}
/** @return the column count, */
public int getColumnCount() {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
}
/**
* convert the header to a new VCF version
* @param version the version to convert to
*/
public void setVersion(VCFHeaderVersion version) {
if (version.equals(this.version))
return; // we're all set, do nothing
// store the new version, and update each of the header lines
this.version = version;
for (VCFHeaderLine line : mMetaData)
line.setVersion(version);
}
}