gatk-3.8/java/src/org/broad/tribble/vcf/VCFHeader.java

149 lines
4.5 KiB
Java

package org.broad.tribble.vcf;
import java.util.*;
/**
* @author aaron
* <p/>
* Class VCFHeader
* <p/>
* A class representing the VCF header
*/
public class VCFHeader {
// the mandatory header fields
public enum HEADER_FIELDS {
CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO
}
// the associated meta data
private final Set<VCFHeaderLine> mMetaData;
// the list of auxillary tags
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
// the character string that indicates meta data
public static final String METADATA_INDICATOR = "##";
// the header string indicator
public static final String HEADER_INDICATOR = "#";
// our header version
private VCFHeaderVersion version;
/** do we have genotying data? */
private boolean hasGenotypingData = false;
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
*/
public VCFHeader(Set<VCFHeaderLine> metaData) {
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
loadVCFVersion();
}
/**
* create a VCF header, given a list of meta data and auxillary tags
*
* @param metaData the meta data associated with this header
* @param genotypeSampleNames the genotype format field, and the sample names
*/
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
for (String col : genotypeSampleNames) {
if (!col.equals("FORMAT"))
mGenotypeSampleNames.add(col);
}
if (genotypeSampleNames.size() > 0) hasGenotypingData = true;
loadVCFVersion();
}
/**
* check our metadata for a VCF version tag, and throw an exception if the version is out of date
* or the version is not present
*/
public void loadVCFVersion() {
List<VCFHeaderLine> toRemove = new ArrayList<VCFHeaderLine>();
for ( VCFHeaderLine line : mMetaData )
if ( VCFHeaderVersion.isFormatString(line.getKey())) {
version = VCFHeaderVersion.toHeaderVersion(line.getValue(),line.getKey());
toRemove.add(line);
}
// remove old header lines for now,
mMetaData.removeAll(toRemove);
}
/**
* get the header fields in order they're presented in the input file (which is now required to be
* the order presented in the spec).
*
* @return a set of the header fields, in order
*/
public Set<HEADER_FIELDS> getHeaderFields() {
Set<HEADER_FIELDS> fields = new LinkedHashSet<HEADER_FIELDS>();
for (HEADER_FIELDS field : HEADER_FIELDS.values())
fields.add(field);
return fields;
}
/**
* get the meta data, associated with this header
*
* @return a set of the meta data
*/
public Set<VCFHeaderLine> getMetaData() {
Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>();
if (version == null)
lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString()));
else
lines.add(new VCFHeaderLine(version.getFormatString(), version.getVersionString()));
lines.addAll(mMetaData);
return lines;
}
/**
* get the genotyping sample names
*
* @return a list of the genotype column names, which may be empty if hasGenotypingData() returns false
*/
public Set<String> getGenotypeSamples() {
return mGenotypeSampleNames;
}
/**
* do we have genotyping data?
*
* @return true if we have genotyping columns, false otherwise
*/
public boolean hasGenotypingData() {
return hasGenotypingData;
}
/** @return the column count, */
public int getColumnCount() {
return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0);
}
/**
* convert the header to a new VCF version
* @param version the version to convert to
*/
public void setVersion(VCFHeaderVersion version) {
if (version.equals(this.version))
return; // we're all set, do nothing
// store the new version, and update each of the header lines
this.version = version;
for (VCFHeaderLine line : mMetaData)
line.setVersion(version);
}
}