GATK binary VCF (gvcf) prototype format for efficiency testing

-- Very minimal working version that can read / write binary VCFs with genotypes
-- Already 10x faster for sites, 5x for fully parsed genotypes, and 1000x for skipping genotypes when reading
This commit is contained in:
Mark DePristo 2011-09-02 21:15:19 -04:00
parent 048202d18e
commit d471617c65
4 changed files with 659 additions and 0 deletions

View File

@ -0,0 +1,252 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gvcf;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.*;
import java.util.*;
/**
* GATK binary VCF record
*
* @author Your Name
* @since Date created
*/
public class GVCF {
private final static int RECORD_TERMINATOR = 123456789;
private int chromOffset;
private int start, stop;
private String id;
private List<Allele> alleleMap;
private int alleleOffsets[];
private float qual;
private byte refPad;
private String info;
private int filterOffset;
private List<GVCFGenotype> genotypes = Collections.emptyList();
public GVCF(final GVCFHeaderBuilder gvcfHeaderBuilder, final VariantContext vc, boolean skipGenotypes) {
chromOffset = gvcfHeaderBuilder.encodeString(vc.getChr());
start = vc.getStart();
stop = vc.getEnd();
refPad = vc.hasReferenceBaseForIndel() ? vc.getReferenceBaseForIndel() : 0;
id = vc.getID();
// encode alleles
alleleMap = new ArrayList<Allele>(vc.getNAlleles());
alleleOffsets = new int[vc.getNAlleles()];
alleleMap.add(vc.getReference());
alleleOffsets[0] = gvcfHeaderBuilder.encodeAllele(vc.getReference());
for ( int i = 0; i < vc.getAlternateAlleles().size(); i++ ) {
alleleMap.add(vc.getAlternateAllele(i));
alleleOffsets[i+1] = gvcfHeaderBuilder.encodeAllele(vc.getAlternateAllele(i));
}
qual = (float)vc.getNegLog10PError(); //qualToByte(vc.getPhredScaledQual());
info = infoFieldString(vc, gvcfHeaderBuilder);
filterOffset = gvcfHeaderBuilder.encodeString(StandardVCFWriter.getFilterString(vc));
if ( ! skipGenotypes ) {
genotypes = encodeGenotypes(gvcfHeaderBuilder, vc);
}
}
public GVCF(DataInputStream inputStream, boolean skipGenotypes) throws IOException {
chromOffset = inputStream.readInt();
start = inputStream.readInt();
stop = inputStream.readInt();
id = inputStream.readUTF();
refPad = inputStream.readByte();
alleleOffsets = readIntArray(inputStream);
qual = inputStream.readFloat();
info = inputStream.readUTF();
filterOffset = inputStream.readInt();
int nGenotypes = inputStream.readInt();
int sizeOfGenotypes = inputStream.readInt();
if ( skipGenotypes ) {
genotypes = Collections.emptyList();
inputStream.skipBytes(sizeOfGenotypes);
} else {
genotypes = new ArrayList<GVCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ )
genotypes.add(new GVCFGenotype(this, inputStream));
}
int recordDone = inputStream.readInt();
if ( recordDone != RECORD_TERMINATOR )
throw new UserException.MalformedFile("Record not terminated by RECORD_TERMINATOR key");
}
public VariantContext decode(final String source, final GVCFHeader header) {
final String contig = header.getString(chromOffset);
alleleMap = header.getAlleles(alleleOffsets);
double negLog10PError = qual; // QualityUtils.qualToErrorProb(qual);
Set<String> filters = header.getFilters(filterOffset);
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put("INFO", info);
Byte refPadByte = refPad == 0 ? null : refPad;
Map<String, Genotype> genotypes = decodeGenotypes(header);
return new VariantContext(source, contig, start, stop, alleleMap, genotypes, negLog10PError, filters, attributes, refPadByte);
}
private Map<String, Genotype> decodeGenotypes(final GVCFHeader header) {
if ( genotypes.isEmpty() )
return VariantContext.NO_GENOTYPES;
else {
Map<String, Genotype> map = new TreeMap<String, Genotype>();
for ( int i = 0; i < genotypes.size(); i++ ) {
final String sampleName = header.getSample(i);
final Genotype g = genotypes.get(i).decode(sampleName, header, this, alleleMap);
map.put(sampleName, g);
}
return map;
}
}
private List<GVCFGenotype> encodeGenotypes(final GVCFHeaderBuilder gvcfHeaderBuilder, final VariantContext vc) {
int nGenotypes = vc.getNSamples();
if ( nGenotypes > 0 ) {
List<GVCFGenotype> genotypes = new ArrayList<GVCFGenotype>(nGenotypes);
for ( int i = 0; i < nGenotypes; i++ ) genotypes.add(null);
for ( Genotype g : vc.getGenotypes().values() ) {
int i = gvcfHeaderBuilder.encodeSample(g.getSampleName());
genotypes.set(i, new GVCFGenotype(gvcfHeaderBuilder, alleleMap, g));
}
return genotypes;
} else {
return Collections.emptyList();
}
}
public int getNAlleles() { return alleleOffsets.length; }
public int write(DataOutputStream outputStream) throws IOException {
int startSize = outputStream.size();
outputStream.writeInt(chromOffset);
outputStream.writeInt(start);
outputStream.writeInt(stop);
outputStream.writeUTF(id);
outputStream.writeByte(refPad);
writeIntArray(alleleOffsets, outputStream, true);
outputStream.writeFloat(qual);
outputStream.writeUTF(info);
outputStream.writeInt(filterOffset);
int nGenotypes = genotypes.size();
int expectedSizeOfGenotypes = nGenotypes == 0 ? 0 : genotypes.get(0).sizeInBytes() * nGenotypes;
outputStream.writeInt(nGenotypes);
outputStream.writeInt(expectedSizeOfGenotypes);
int obsSizeOfGenotypes = 0;
for ( GVCFGenotype g : genotypes )
obsSizeOfGenotypes += g.write(outputStream);
if ( obsSizeOfGenotypes != expectedSizeOfGenotypes )
throw new RuntimeException("Expect and observed genotype sizes disagree! expect = " + expectedSizeOfGenotypes + " obs =" + obsSizeOfGenotypes);
outputStream.writeInt(RECORD_TERMINATOR);
return outputStream.size() - startSize;
}
private final String infoFieldString(VariantContext vc, final GVCFHeaderBuilder gvcfHeaderBuilder) {
StringBuilder s = new StringBuilder();
boolean first = true;
for ( Map.Entry<String, Object> field : vc.getAttributes().entrySet() ) {
String key = field.getKey();
if ( key.equals(VariantContext.ID_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_MAP_KEY) || key.equals(VariantContext.UNPARSED_GENOTYPE_PARSER_KEY) )
continue;
int stringIndex = gvcfHeaderBuilder.encodeString(key);
String outputValue = StandardVCFWriter.formatVCFField(field.getValue());
if ( outputValue != null ) {
if ( ! first ) s.append(";");
s.append(stringIndex).append("=").append(outputValue);
first = false;
}
}
return s.toString();
}
private final static int BUFFER_SIZE = 1048576; // 2**20
public static DataOutputStream createOutputStream(final File file) throws FileNotFoundException {
return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file), BUFFER_SIZE));
}
public static DataInputStream createInputStream(final File file) throws FileNotFoundException {
return new DataInputStream(new BufferedInputStream(new FileInputStream(file), BUFFER_SIZE));
}
protected final static int[] readIntArray(final DataInputStream inputStream) throws IOException {
return readIntArray(inputStream, inputStream.readInt());
}
protected final static int[] readIntArray(final DataInputStream inputStream, int size) throws IOException {
int[] array = new int[size];
for ( int i = 0; i < array.length; i++ )
array[i] = inputStream.readInt();
return array;
}
protected final static void writeIntArray(int[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
if ( writeSize ) outputStream.writeInt(array.length);
for ( int i : array )
outputStream.writeInt(i);
}
protected final static byte[] readByteArray(final DataInputStream inputStream) throws IOException {
return readByteArray(inputStream, inputStream.readInt());
}
protected final static byte[] readByteArray(final DataInputStream inputStream, int size) throws IOException {
byte[] array = new byte[size];
for ( int i = 0; i < array.length; i++ )
array[i] = inputStream.readByte();
return array;
}
protected final static void writeByteArray(byte[] array, final DataOutputStream outputStream, boolean writeSize) throws IOException {
if ( writeSize ) outputStream.writeInt(array.length);
for ( byte i : array )
outputStream.writeByte(i);
}
protected final static byte qualToByte(double phredScaledQual) {
return (byte)Math.round(Math.min(phredScaledQual, 255));
}
}

View File

@ -0,0 +1,147 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gvcf;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.*;
/**
* GATK binary VCF record
*
* @author Your Name
* @since Date created
*/
public class GVCFGenotype {
private byte gq;
private int gt;
private int dp;
private int ad[];
private byte[] pl;
// todo -- what to do about phasing? Perhaps we shouldn't support it
// todo -- is the FL field generic or just a flag? Should we even support per sample filtering?
public GVCFGenotype(final GVCFHeaderBuilder gvcfHeaderBuilder, final List<Allele> allAlleles, Genotype genotype) {
gq = GVCF.qualToByte(genotype.getPhredScaledQual());
gt = encodeAlleles(genotype.getAlleles(), allAlleles);
dp = genotype.getAttributeAsInt("DP", 0);
int nAlleles = allAlleles.size();
ad = new int[nAlleles];
int npls = nAllelesToNPls(nAlleles);
pl = new byte[npls];
}
private int nAllelesToNPls( int nAlleles ) {
return nAlleles*(nAlleles+1) / 2;
}
public GVCFGenotype(GVCF gvcf, DataInputStream inputStream) throws IOException {
int gqInt = inputStream.readUnsignedByte();
gq = (byte)gqInt;
gt = inputStream.readInt();
dp = inputStream.readInt();
ad = GVCF.readIntArray(inputStream, gvcf.getNAlleles());
pl = GVCF.readByteArray(inputStream, nAllelesToNPls(gvcf.getNAlleles()));
}
// 2 alleles => 1 + 8 + 8 + 3 => 20
protected int sizeInBytes() {
return 1 // gq
+ 4 * 2 // gt + dp
+ 4 * ad.length // ad
+ 1 * pl.length; // pl
}
public Genotype decode(final String sampleName, final GVCFHeader header, GVCF gvcf, List<Allele> alleleIndex) {
final List<Allele> alleles = decodeAlleles(gt, alleleIndex);
final double negLog10PError = gq / 10.0;
final Set<String> filters = Collections.emptySet();
final Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put("DP", dp);
attributes.put("AD", ad);
attributes.put("PL", pl);
return new Genotype(sampleName, alleles, negLog10PError, filters, attributes, false);
}
private static int encodeAlleles(List<Allele> gtList, List<Allele> allAlleles) {
final int nAlleles = gtList.size();
if ( nAlleles > 4 )
throw new IllegalArgumentException("encodeAlleles doesn't support more than 4 alt alleles, but I saw " + gtList);
int gtInt = 0;
for ( int i = 0; i < nAlleles ; i++ ) {
final int bitOffset = i * 8;
final int allelei = getAlleleIndex(gtList.get(i), allAlleles);
final int gti = (allelei + 1) << bitOffset;
gtInt = gtInt | gti;
}
return gtInt;
}
private static int getAlleleIndex(Allele q, List<Allele> allAlleles) {
if ( q.isNoCall() )
return 254;
for ( int i = 0; i < allAlleles.size(); i++ )
if ( q.equals(allAlleles.get(i)) )
return i;
throw new IllegalStateException("getAlleleIndex passed allele not in map! allele " + q + " allAlleles " + allAlleles);
}
private static List<Allele> decodeAlleles(int gtInt, List<Allele> alleleIndex) {
List<Allele> alleles = new ArrayList<Allele>(4);
for ( int i = 0; i < 32; i += 8 ) {
final int gi = (gtInt & (0x000000FF << i)) >> i;
if ( gi != 0 ) {
final int allelei = gi - 1;
alleles.add( allelei == 254 ? Allele.NO_CALL : alleleIndex.get(allelei) );
} else {
break;
}
}
return alleles;
}
public int write(DataOutputStream outputStream) throws IOException {
int startSize = outputStream.size();
outputStream.writeByte(gq);
outputStream.writeInt(gt);
outputStream.writeInt(dp);
GVCF.writeIntArray(ad, outputStream, false);
GVCF.writeByteArray(pl, outputStream, false);
return outputStream.size() - startSize;
}
}

View File

@ -0,0 +1,180 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gvcf;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.QualityUtils;
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.*;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class GVCFHeader {
final protected static Logger logger = Logger.getLogger(GVCFHeader.class);
private static byte[] MAGIC_HEADER = "GVCF0.1\1".getBytes();
final List<Allele> alleles;
final List<String> strings;
final List<String> samples;
final List<Set<String>> filters;
public GVCFHeader(final Map<Allele, Integer> allelesIn, final Map<String, Integer> stringIn, final Map<String, Integer> samplesIn) {
this.alleles = linearize(allelesIn);
this.strings = linearize(stringIn);
this.samples = linearize(samplesIn);
this.filters = null; // not used with this constructor
}
public GVCFHeader(DataInputStream inputStream) throws IOException {
byte[] headerTest = new byte[MAGIC_HEADER.length];
inputStream.read(headerTest);
if ( ! Arrays.equals(headerTest, MAGIC_HEADER) ) {
throw new UserException("Could not read GVCF file. MAGIC_HEADER missing. Saw " + headerTest);
} else {
alleles = stringsToAlleles(readStrings(inputStream));
strings = readStrings(inputStream);
samples = readStrings(inputStream);
logger.info(String.format("Allele map of %d elements", alleles.size()));
logger.info(String.format("String map of %d elements", strings.size()));
logger.info(String.format("Sample map of %d elements", samples.size()));
filters = initializeFilterCache();
}
}
public int write(final DataOutputStream outputStream) throws IOException {
int startBytes = outputStream.size();
outputStream.write(MAGIC_HEADER);
write(outputStream, allelesToStrings(alleles));
write(outputStream, strings);
write(outputStream, samples);
return outputStream.size() - startBytes;
}
public void write(DataOutputStream outputStream, List<String> l) throws IOException {
outputStream.writeInt(l.size());
for ( String elt : l ) outputStream.writeUTF(elt);
}
private List<String> allelesToStrings(List<Allele> alleles) {
List<String> strings = new ArrayList<String>(alleles.size());
for ( Allele allele : alleles ) strings.add(allele.toString());
return strings;
}
private List<Set<String>> initializeFilterCache() {
// required to allow offset -> set lookup
List<Set<String>> l = new ArrayList<Set<String>>(strings.size());
for ( int i = 0; i < strings.size(); i++ ) l.add(null);
return l;
}
private static List<Allele> stringsToAlleles(final List<String> strings) {
final List<Allele> alleles = new ArrayList<Allele>(strings.size());
for ( String string : strings ) {
boolean isRef = string.endsWith("*");
if ( isRef ) string = string.substring(0, string.length() - 1);
alleles.add(Allele.create(string, isRef));
}
return alleles;
}
private static List<String> readStrings(final DataInputStream inputStream) throws IOException {
final int nStrings = inputStream.readInt();
final List<String> strings = new ArrayList<String>(nStrings);
for ( int i = 0; i < nStrings; i++ ) {
strings.add(inputStream.readUTF());
}
return strings;
}
private static <T> List<T> linearize(final Map<T, Integer> map) {
final ArrayList<T> l = new ArrayList<T>(map.size());
for ( int i = 0; i < map.size(); i++ ) l.add(null);
for ( final Map.Entry<T, Integer> elt : map.entrySet() )
l.set(elt.getValue(), elt.getKey());
return l;
}
public String getSample(final int offset) { return samples.get(offset); }
public String getString(final int offset) { return strings.get(offset); }
public Allele getAllele(final int offset) { return alleles.get(offset); }
public List<Allele> getAlleles(final int[] offsets) {
final List<Allele> alleles = new ArrayList<Allele>(offsets.length);
for ( int i : offsets ) alleles.add(getAllele(i));
return alleles;
}
public Set<String> getFilters(final int offset) {
Set<String> cached = filters.get(offset);
if ( cached != null )
return cached;
else {
final String filterString = getString(offset);
if ( filterString.equals(VCFConstants.UNFILTERED) )
return null; // UNFILTERED records are represented by null
else {
Set<String> set = VCFCodec.parseFilters(null, -1, filterString);
filters.set(offset, set); // remember the result
return set;
}
}
}
}

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.gvcf;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import java.util.HashMap;
import java.util.Map;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class GVCFHeaderBuilder {
Map<Allele, Integer> alleles = new HashMap<Allele, Integer>();
Map<String, Integer> strings = new HashMap<String, Integer>();
Map<String, Integer> samples = new HashMap<String, Integer>();
public GVCFHeader createHeader() {
return new GVCFHeader(alleles, strings, samples);
}
public int encodeString(final String chr) { return encode(strings, chr); }
public int encodeAllele(final Allele allele) { return encode(alleles, allele); }
public int encodeSample(final String sampleName) { return encode(samples, sampleName); }
private <T> int encode(Map<T, Integer> map, T key) {
Integer v = map.get(key);
if ( v == null ) {
v = map.size();
map.put(key, v);
}
return v;
}
}