gatk-3.8/java/src/org/broad/tribble/vcf/VCFReaderUtils.java

package org.broad.tribble.vcf;


import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** The VCFReaderUtils class, which contains a collection of utilities for working with VCF files */
public class VCFReaderUtils {

    // our pattern matching for the genotype mFields
    private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)");

    /**
     * create a VCF header, given an array of strings that all start with at least the # character.  This function is
     * package protected so that the VCFReaderUtils can access this function
     *
     * @param headerStrings a list of header strings
     * @param version Header version to parse
     * @return a VCF Header created from the list of stinrgs
     */
    public static VCFHeader createHeader(List<String> headerStrings, VCFHeaderVersion version) {
        Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
        Set<String> auxTags = new LinkedHashSet<String>();
        // iterate over all the passed in strings
        for ( String str : headerStrings ) {
            if ( !str.startsWith("##") ) {
                String[] strings = str.substring(1).split("\\t");
                // the columns should be in order according to Richard Durbin
                int arrayIndex = 0;
                for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {
                    try {
                        if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex]))
                            throw new RuntimeException("VCFReaderUtils: we were expecting column name " + field + " but we saw " + strings[arrayIndex]);
                    } catch (IllegalArgumentException e) {
                        throw new RuntimeException("VCFReaderUtils: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name.");
                    }
                    arrayIndex++;
                }
                while (arrayIndex < strings.length) {
                    if (!strings[arrayIndex].equals("FORMAT"))
                        auxTags.add(strings[arrayIndex]);
                    arrayIndex++;
                }
            } else {
                if ( str.startsWith("##INFO=") )
                    metaData.add(new VCFInfoHeaderLine(str.substring(7),version));
                else if ( str.startsWith("##FILTER=") )
                    metaData.add(new VCFFilterHeaderLine(str.substring(9),version));
                else if ( str.startsWith("##FORMAT=") )
                    metaData.add(new VCFFormatHeaderLine(str.substring(9),version));
                else {
                    int equals = str.indexOf("=");
                    if ( equals != -1 )
                        metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
                }
            }
        }

        return new VCFHeader(metaData, auxTags);
    }

    /**
     * create the next VCFRecord, given the input line
     *
     * @param line    the line from the file
     * @param mHeader the VCF header
     *
     * @return the VCFRecord
     */
    public static VCFRecord createRecord(String line, VCFHeader mHeader) {
        return createRecord(line, mHeader, false);
    }

    public static VCFRecord createRecord(String line, VCFHeader mHeader, boolean ignoreGenotypes) {
        // things we need to make a VCF record
        Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();
        String tokens[] = line.split("\\t");

        // check to ensure that the column count of tokens is right
        if (tokens.length != mHeader.getColumnCount()) {
            throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);
        }

        int index = 0;
        for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())
            values.put(field, tokens[index++]);
        // if we have genotyping data, we try and extract the genotype fields
        if ( ! ignoreGenotypes && mHeader.hasGenotypingData()) {
            String mFormatString = tokens[index];
            String keyStrings[] = mFormatString.split(":");
            List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();
            index++;
			String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");
            for (String str : mHeader.getGenotypeSamples()) {
                genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));
                index++;
            }
            VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);
            // associate the genotypes with this new record
            for ( VCFGenotypeRecord gr : genotypeRecords )
                gr.setVCFRecord(vrec);
            return vrec;

        }
        return new VCFRecord(values);
    }

    /**
     * generate a VCF genotype record, given it's format string, the genotype string, and allele info
     *
     * @param sampleName     the sample name
     * @param formatString   the format string for this record, which contains the keys for the genotype parameters
     * @param genotypeString contains the phasing information, allele information, and values for genotype parameters
     * @param altAlleles     the alternate allele string array, which we index into based on the field parameters
     * @param referenceBase  the reference base
     *
     * @return a VCFGenotypeRecord
     */
    public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {
        return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase);
    }

    /**
     * generate a VCF genotype record, given it's format string, the genotype string, and allele info
     *
     * @param sampleName     the sample name
     * @param keyStrings     the split format string for this record, which contains the keys for the genotype parameters
     * @param genotypeString contains the phasing information, allele information, and values for genotype parameters
     * @param altAlleles     the alternate allele string array, which we index into based on the field parameters
     * @param referenceBase  the reference base
     *
     * @return a VCFGenotypeRecord
     */
    public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) {
        // parameters to create the VCF genotype record
        HashMap<String, String> tagToValue = new HashMap<String, String>();
        VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNPHASED;
        List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();

        for (String key : keyStrings) {
            String parse;
            int nextDivider;
            if (!genotypeString.contains(":")) {
                nextDivider = genotypeString.length();
                parse = genotypeString;
            } else {
                nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":");
                parse = genotypeString.substring(0, nextDivider);
            }
            if (key.equals(VCFConstants.GENOTYPE_KEY)) {
                Matcher m = gtPattern.matcher(parse);
                if (!m.matches())
                    throw new RuntimeException("VCFReaderUtils: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse);
                phase = VCFGenotypeRecord.determinePhase(m.group(2));
                addAllele(m.group(1), altAlleles, referenceBase, bases);
                if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases);
            } else {
                if ( parse.length() == 0 )
                    parse = VCFGenotypeRecord.getMissingFieldValue(key);
                tagToValue.put(key, parse);
            }
            if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;
            genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());
        }
        if ( bases.size() > 0 && bases.get(0).equals(VCFConstants.EMPTY_ALLELE) )
            tagToValue.clear();
        // catch some common errors, either there are too many field keys or there are two many field values
        else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))
            throw new RuntimeException("VCFReaderUtils: genotype value count doesn't match the key count (expected "
                    + keyStrings.length + " but saw " + tagToValue.size() + ")");
        else if ( genotypeString.length() > 0 )
            throw new RuntimeException("VCFReaderUtils: genotype string contained additional unprocessed fields: " + genotypeString
                    + ".  This most likely means that the format string is shorter then the value fields.");

        VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase);
        for ( Map.Entry<String, String> entry : tagToValue.entrySet() )
            rec.setField(entry.getKey(), entry.getValue());
        return rec;
    }


    /**
     * add an alternate allele to the list of alleles we have for a VCF genotype record
     *
     * @param alleleNumber  the allele number, as a string
     * @param altAlleles    the list of alternate alleles
     * @param referenceBase the reference base
     * @param bases         the list of bases for this genotype call
     */
    private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {
        if (alleleNumber.equals(VCFConstants.EMPTY_ALLELE)) {
            bases.add(new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE));
        } else {
            int alleleValue = Integer.valueOf(alleleNumber);
            // check to make sure the allele value is within bounds
            if (alleleValue < 0 || alleleValue > altAlleles.length)
                throw new IllegalArgumentException("VCFReaderUtils: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");
            if (alleleValue == 0)
                bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase)));
            else
                bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1]));
        }
    }
}
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`package org.broad.tribble.vcf;`



			`import java.util.*;`
			`import java.util.regex.Matcher;`
			`import java.util.regex.Pattern;`

			`/** The VCFReaderUtils class, which contains a collection of utilities for working with VCF files */`
			`public class VCFReaderUtils {`

			`// our pattern matching for the genotype mFields`
			`private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\\|\\/])([0-9\\.]*)");`

			`/**`
			`* create a VCF header, given an array of strings that all start with at least the # character. This function is`
			`* package protected so that the VCFReaderUtils can access this function`
			`*`
			`* @param headerStrings a list of header strings`
First step in major cleanup/redo of VCF functionality. Specifically, now: a) VCF track name can work again with 3.3 or 4.0 VCF's when specifying -B name,VCF,file. Code will read header and parse automatically the version. b) Old VCF codec is deprecated. Reader goes now direct from parsing VCF lines into producing VariantContext objects, with no intermediate VCF records. If anyone can't resist the urge to still input files using the old method, a new VCF3Codec is in place with the old code, but it will be eventually deleted. c) VCF headers and VCF info fields no longer keep track of the version. They are parsed into an internal representation and will be output only in VCF4.0 format. d) As a consequence, the existing GATK bug where files are produced with VCF4 body but VCF3.3 headers is solved. e) Several VCF 4.0 writer bugs are now solved. f) Integration test MD5's are changed, mostly because of corrected VCF4.0 headers and because validation data mostly uses now VCF4.0. g) Several VCF files in the ValidationData/ directory have been converted to VCF 4.0 format. I kept the old versions, and the new versions have a .vcf4 extension. Pending issues: a) We are still not dealing with indels consistently or correctly when representing them. This will be a second part of the changes. b) The VCF writer doesn't use VCFRecord but it does still use a lot of leftovers like VCFGenotypeEncoding, VCFGenotypeRecord, etc. This needs to be simplified and cleaned. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3813 348d0f76-0448-11de-a6fe-93d51630548a 2010-07-17 06:49:16 +08:00			`* @param version Header version to parse`
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`* @return a VCF Header created from the list of stinrgs`
			`*/`
			`public static VCFHeader createHeader(List<String> headerStrings, VCFHeaderVersion version) {`
			`Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();`
			`Set<String> auxTags = new LinkedHashSet<String>();`
			`// iterate over all the passed in strings`
			`for ( String str : headerStrings ) {`
			`if ( !str.startsWith("##") ) {`
			`String[] strings = str.substring(1).split("\\t");`
			`// the columns should be in order according to Richard Durbin`
			`int arrayIndex = 0;`
			`for (VCFHeader.HEADER_FIELDS field : VCFHeader.HEADER_FIELDS.values()) {`
			`try {`
			`if (field != VCFHeader.HEADER_FIELDS.valueOf(strings[arrayIndex]))`
			`throw new RuntimeException("VCFReaderUtils: we were expecting column name " + field + " but we saw " + strings[arrayIndex]);`
			`} catch (IllegalArgumentException e) {`
			`throw new RuntimeException("VCFReaderUtils: Unknown column name \"" + strings[arrayIndex] + "\", it does not match a known column header name.");`
			`}`
			`arrayIndex++;`
			`}`
			`while (arrayIndex < strings.length) {`
			`if (!strings[arrayIndex].equals("FORMAT"))`
			`auxTags.add(strings[arrayIndex]);`
			`arrayIndex++;`
			`}`
			`} else {`
			`if ( str.startsWith("##INFO=") )`
			`metaData.add(new VCFInfoHeaderLine(str.substring(7),version));`
			`else if ( str.startsWith("##FILTER=") )`
			`metaData.add(new VCFFilterHeaderLine(str.substring(9),version));`
			`else if ( str.startsWith("##FORMAT=") )`
			`metaData.add(new VCFFormatHeaderLine(str.substring(9),version));`
			`else {`
			`int equals = str.indexOf("=");`
			`if ( equals != -1 )`
First step in major cleanup/redo of VCF functionality. Specifically, now: a) VCF track name can work again with 3.3 or 4.0 VCF's when specifying -B name,VCF,file. Code will read header and parse automatically the version. b) Old VCF codec is deprecated. Reader goes now direct from parsing VCF lines into producing VariantContext objects, with no intermediate VCF records. If anyone can't resist the urge to still input files using the old method, a new VCF3Codec is in place with the old code, but it will be eventually deleted. c) VCF headers and VCF info fields no longer keep track of the version. They are parsed into an internal representation and will be output only in VCF4.0 format. d) As a consequence, the existing GATK bug where files are produced with VCF4 body but VCF3.3 headers is solved. e) Several VCF 4.0 writer bugs are now solved. f) Integration test MD5's are changed, mostly because of corrected VCF4.0 headers and because validation data mostly uses now VCF4.0. g) Several VCF files in the ValidationData/ directory have been converted to VCF 4.0 format. I kept the old versions, and the new versions have a .vcf4 extension. Pending issues: a) We are still not dealing with indels consistently or correctly when representing them. This will be a second part of the changes. b) The VCF writer doesn't use VCFRecord but it does still use a lot of leftovers like VCFGenotypeEncoding, VCFGenotypeRecord, etc. This needs to be simplified and cleaned. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3813 348d0f76-0448-11de-a6fe-93d51630548a 2010-07-17 06:49:16 +08:00			`metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));`
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`}`
			`}`
			`}`

			`return new VCFHeader(metaData, auxTags);`
			`}`

			`/**`
			`* create the next VCFRecord, given the input line`
			`*`
			`* @param line the line from the file`
			`* @param mHeader the VCF header`
			`*`
			`* @return the VCFRecord`
			`*/`
			`public static VCFRecord createRecord(String line, VCFHeader mHeader) {`
			`return createRecord(line, mHeader, false);`
			`}`

			`public static VCFRecord createRecord(String line, VCFHeader mHeader, boolean ignoreGenotypes) {`
			`// things we need to make a VCF record`
			`Map<VCFHeader.HEADER_FIELDS, String> values = new HashMap<VCFHeader.HEADER_FIELDS, String>();`
			`String tokens[] = line.split("\\t");`

			`// check to ensure that the column count of tokens is right`
			`if (tokens.length != mHeader.getColumnCount()) {`
			`throw new RuntimeException("The input file line doesn't contain enough fields, it should have " + mHeader.getColumnCount() + " fields, it has " + tokens.length + ". Line = " + line);`
			`}`

			`int index = 0;`
			`for (VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields())`
			`values.put(field, tokens[index++]);`
			`// if we have genotyping data, we try and extract the genotype fields`
			`if ( ! ignoreGenotypes && mHeader.hasGenotypingData()) {`
			`String mFormatString = tokens[index];`
			`String keyStrings[] = mFormatString.split(":");`
			`List<VCFGenotypeRecord> genotypeRecords = new ArrayList<VCFGenotypeRecord>();`
			`index++;`
			`String[] alt_alleles = values.get(VCFHeader.HEADER_FIELDS.ALT).split(",");`
			`for (String str : mHeader.getGenotypeSamples()) {`
			`genotypeRecords.add(getVCFGenotype(str, keyStrings, tokens[index], alt_alleles, values.get(VCFHeader.HEADER_FIELDS.REF).charAt(0)));`
			`index++;`
			`}`
			`VCFRecord vrec = new VCFRecord(values, mFormatString, genotypeRecords);`
			`// associate the genotypes with this new record`
			`for ( VCFGenotypeRecord gr : genotypeRecords )`
			`gr.setVCFRecord(vrec);`
			`return vrec;`

			`}`
			`return new VCFRecord(values);`
			`}`

			`/**`
			`* generate a VCF genotype record, given it's format string, the genotype string, and allele info`
			`*`
			`* @param sampleName the sample name`
			`* @param formatString the format string for this record, which contains the keys for the genotype parameters`
			`* @param genotypeString contains the phasing information, allele information, and values for genotype parameters`
			`* @param altAlleles the alternate allele string array, which we index into based on the field parameters`
			`* @param referenceBase the reference base`
			`*`
			`* @return a VCFGenotypeRecord`
			`*/`
			`public static VCFGenotypeRecord getVCFGenotype(String sampleName, String formatString, String genotypeString, String altAlleles[], char referenceBase) {`
			`return getVCFGenotype(sampleName, formatString.split(":"), genotypeString, altAlleles, referenceBase);`
			`}`

			`/**`
			`* generate a VCF genotype record, given it's format string, the genotype string, and allele info`
			`*`
			`* @param sampleName the sample name`
			`* @param keyStrings the split format string for this record, which contains the keys for the genotype parameters`
			`* @param genotypeString contains the phasing information, allele information, and values for genotype parameters`
			`* @param altAlleles the alternate allele string array, which we index into based on the field parameters`
			`* @param referenceBase the reference base`
			`*`
			`* @return a VCFGenotypeRecord`
			`*/`
			`public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) {`
			`// parameters to create the VCF genotype record`
			`HashMap<String, String> tagToValue = new HashMap<String, String>();`
Updates to fix some bugs in merger. Now able to merge into project wide indel VCF files. Integration teests coming tomorrow git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3727 348d0f76-0448-11de-a6fe-93d51630548a 2010-07-07 11:16:33 +08:00			`VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNPHASED;`
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`List<VCFGenotypeEncoding> bases = new ArrayList<VCFGenotypeEncoding>();`

			`for (String key : keyStrings) {`
			`String parse;`
			`int nextDivider;`
			`if (!genotypeString.contains(":")) {`
			`nextDivider = genotypeString.length();`
			`parse = genotypeString;`
			`} else {`
			`nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":");`
			`parse = genotypeString.substring(0, nextDivider);`
			`}`
First pass needed to remove old VCF code: moving all VCF-related constants into a single unified class git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3759 348d0f76-0448-11de-a6fe-93d51630548a 2010-07-11 15:19:16 +08:00			`if (key.equals(VCFConstants.GENOTYPE_KEY)) {`
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`Matcher m = gtPattern.matcher(parse);`
			`if (!m.matches())`
			`throw new RuntimeException("VCFReaderUtils: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse);`
			`phase = VCFGenotypeRecord.determinePhase(m.group(2));`
			`addAllele(m.group(1), altAlleles, referenceBase, bases);`
			`if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases);`
			`} else {`
			`if ( parse.length() == 0 )`
			`parse = VCFGenotypeRecord.getMissingFieldValue(key);`
			`tagToValue.put(key, parse);`
			`}`
			`if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1;`
			`genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length());`
			`}`
First pass needed to remove old VCF code: moving all VCF-related constants into a single unified class git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3759 348d0f76-0448-11de-a6fe-93d51630548a 2010-07-11 15:19:16 +08:00			`if ( bases.size() > 0 && bases.get(0).equals(VCFConstants.EMPTY_ALLELE) )`
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`tagToValue.clear();`
			`// catch some common errors, either there are too many field keys or there are two many field values`
			`else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0))`
			`throw new RuntimeException("VCFReaderUtils: genotype value count doesn't match the key count (expected "`
			`+ keyStrings.length + " but saw " + tagToValue.size() + ")");`
			`else if ( genotypeString.length() > 0 )`
			`throw new RuntimeException("VCFReaderUtils: genotype string contained additional unprocessed fields: " + genotypeString`
			`+ ". This most likely means that the format string is shorter then the value fields.");`

			`VCFGenotypeRecord rec = new VCFGenotypeRecord(sampleName, bases, phase);`
			`for ( Map.Entry<String, String> entry : tagToValue.entrySet() )`
			`rec.setField(entry.getKey(), entry.getValue());`
			`return rec;`
			`}`


			`/**`
			`* add an alternate allele to the list of alleles we have for a VCF genotype record`
			`*`
			`* @param alleleNumber the allele number, as a string`
			`* @param altAlleles the list of alternate alleles`
			`* @param referenceBase the reference base`
			`* @param bases the list of bases for this genotype call`
			`*/`
			`private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List<VCFGenotypeEncoding> bases) {`
First pass needed to remove old VCF code: moving all VCF-related constants into a single unified class git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3759 348d0f76-0448-11de-a6fe-93d51630548a 2010-07-11 15:19:16 +08:00			`if (alleleNumber.equals(VCFConstants.EMPTY_ALLELE)) {`
			`bases.add(new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE));`
moving VCF 3.3 back into the GATK so Guillermo can make changes for VCF 4 output git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3639 348d0f76-0448-11de-a6fe-93d51630548a 2010-06-26 02:20:06 +08:00			`} else {`
			`int alleleValue = Integer.valueOf(alleleNumber);`
			`// check to make sure the allele value is within bounds`
			`if (alleleValue < 0 \|\| alleleValue > altAlleles.length)`
			`throw new IllegalArgumentException("VCFReaderUtils: the allele value of " + alleleValue + " is out of bounds given the alternate allele list.");`
			`if (alleleValue == 0)`
			`bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase)));`
			`else`
			`bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1]));`
			`}`
			`}`
			`}`