package org.broad.tribble.vcf;
import java.util.*;
/**
* @author aaron
*
* Class VCFHeader
*
* A class representing the VCF header
*/
public class VCFHeader {
// the manditory header fields
public enum HEADER_FIELDS {
CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
}
// the associated meta data
private final Set mMetaData;
// the list of auxillary tags
private final Set mGenotypeSampleNames = new LinkedHashSet();
// the character string that indicates meta data
public static final String METADATA_INDICATOR = "##";
// the header string indicator
public static final String HEADER_INDICATOR = "#";
// our header versionVCF
private VCFHeaderVersion versionVCF;
/** do we have genotying data? */
private boolean hasGenotypingData = false;
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
*/
public VCFHeader(Set metaData) {
mMetaData = new TreeSet(metaData);
checkVCFVersion();
}
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names
*/
public VCFHeader(Set metaData, Set genotypeSampleNames) {
mMetaData = new TreeSet(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col);
}
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
checkVCFVersion();
}
/**
* check our metadata for a VCF versionVCF tag, and throw an exception if the versionVCF is out of date
* or the versionVCF is not present
*/
// TODO: fix this function
public void checkVCFVersion() {
VCFHeaderVersion version;
List toRemove = new ArrayList();
for ( VCFHeaderLine line : mMetaData )
if ( VCFHeaderVersion.isFormatString(line.getKey())) {
version = VCFHeaderVersion.toHeaderVersion(line.getValue(),line.getKey());
if (version == null)
{
toRemove.add(line);
}
/**throw new RuntimeException("VCF version " + line.getValue() +
" is not supported; only versionVCF " + VCFHeaderVersion.VCF3_2 + " and greater can be used");*/
else return;
}
// remove old header lines for now,
mMetaData.removeAll(toRemove);
mMetaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString()));
}
/**
* get the header fields in order they're presented in the input file (which is now required to be
* the order presented in the spec).
*
* @return a set of the header fields, in order
*/
public Set getHeaderFields() {
Set fields = new LinkedHashSet();
for (HEADER_FIELDS field : HEADER_FIELDS.values())
fields.add(field);
return fields;
}
/**
* get the meta data, associated with this header
*
* @return a set of the meta data
*/
public Set getMetaData() {
return mMetaData;
}
/**
* get the genotyping sample names
*
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
*/
public Set getGenotypeSamples() {
return mGenotypeSampleNames;
}
/**
* do we have genotyping data?
*
* @return true if we have genotyping columns, false otherwise
*/
public boolean hasGenotypingData() {
return hasGenotypingData;
}
/** @return the column count, */
public int getColumnCount() {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
}
}