diff --git a/java/src/org/broad/tribble/vcf/VCFConstants.java b/java/src/org/broad/tribble/vcf/VCFConstants.java new file mode 100755 index 000000000..9140b7fcd --- /dev/null +++ b/java/src/org/broad/tribble/vcf/VCFConstants.java @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broad.tribble.vcf; + +public final class VCFConstants { + + // standard INFO/FORMAT field keys + public static final String ANCESTRAL_ALLELE_KEY = "AA"; + public static final String ALLELE_COUNT_KEY = "AC"; + public static final String ALLELE_FREQUENCY_KEY = "AF"; + public static final String ALLELE_NUMBER_KEY = "AN"; + public static final String RMS_BASE_QUALITY_KEY = "BQ"; + public static final String CIGAR_KEY = "CIGAR"; + public static final String DBSNP_KEY = "DB"; + public static final String DEPTH_KEY = "DP"; + public static final String END_KEY = "END"; + public static final String GENOTYPE_FILTER_KEY = "FT"; + public static final String GENOTYPE_KEY = "GT"; + public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; + public static final String GENOTYPE_QUALITY_KEY = "GQ"; + public static final String HAPMAP2_KEY = "H2"; + public static final String HAPMAP3_KEY = "H3"; + public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; + public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; + public static final String MAPPING_QUALITY_ZERO_KEY = "MQ0"; + public static final String SAMPLE_NUMBER_KEY = "NS"; + public static final String OLD_DEPTH_KEY = "RD"; + public static final String STRAND_BIAS_KEY = "SB"; + public static final String SOMATIC_KEY = "SOMATIC"; + public static final String VALIDATED_KEY = "VALIDATED"; + + // separators + public static final String FORMAT_FIELD_SEPARATOR = ":"; + public static final String GENOTYPE_FIELD_SEPARATOR = ":"; + public static final String FIELD_SEPARATOR = "\t"; + public static final String FILTER_CODE_SEPARATOR = ";"; + public static final String INFO_FIELD_SEPARATOR = ";"; + + // missing/default values + public static final String UNFILTERED = "."; + public static final String PASSES_FILTERS_v3 = "0"; + public static final String PASSES_FILTERS_v4 = "PASS"; + public static final String EMPTY_ID_FIELD = "."; + public static final String EMPTY_INFO_FIELD = "."; + public static final String EMPTY_ALTERNATE_ALLELE_FIELD = "."; + public static final String MISSING_VALUE_v4 = "."; + public static final String MISSING_QUALITY_v3 = "-1"; + public static final String MISSING_GENOTYPE_QUALITY_v3 = "-1"; + public static final String MISSING_HAPLOTYPE_QUALITY_v3 = "-1"; + public static final String MISSING_DEPTH_v3 = "-1"; + public static final String EMPTY_ALLELE = "."; + public static final String EMPTY_GENOTYPE = "./."; + public static final double MAX_GENOTYPE_QUAL = 99.0; + + public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; +} \ No newline at end of file diff --git a/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java b/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java index 1763d0865..d5c4e30f4 100644 --- a/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java +++ b/java/src/org/broad/tribble/vcf/VCFGenotypeEncoding.java @@ -30,8 +30,8 @@ public class VCFGenotypeEncoding { public VCFGenotypeEncoding(String baseString, boolean allowMultipleBaseReference) { if ((baseString.length() == 1)) { // are we an empty (no-call) genotype? - if (baseString.equals(VCFGenotypeRecord.EMPTY_ALLELE)) { - mBases = VCFGenotypeRecord.EMPTY_ALLELE; + if (baseString.equals(VCFConstants.EMPTY_ALLELE)) { + mBases = VCFConstants.EMPTY_ALLELE; mLength = 0; mType = TYPE.UNCALLED; } else if (!validBases(baseString)) { @@ -82,7 +82,7 @@ public class VCFGenotypeEncoding { VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj; return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength); } - if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) + if ( mType == TYPE.UNCALLED && obj.toString().equals(VCFConstants.EMPTY_ALLELE) ) return true; return false; } diff --git a/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java b/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java index e9739132d..2d8b51cbe 100644 --- a/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java @@ -15,24 +15,6 @@ import java.util.*; */ public class VCFGenotypeRecord { - // key names - public static final String GENOTYPE_KEY = "GT"; - public static final String GENOTYPE_QUALITY_KEY = "GQ"; - public static final String DEPTH_KEY = "DP"; - public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; - public static final String GENOTYPE_FILTER_KEY = "FT"; - public static final String GENOTYPE_LIKELIHOODS_KEY = "GL"; - public static final String OLD_DEPTH_KEY = "RD"; - - // the values for empty fields - public static final String EMPTY_GENOTYPE = "./."; - public static final String EMPTY_ALLELE = "."; - public static final int MISSING_GENOTYPE_QUALITY = -1; - public static final int MISSING_DEPTH = -1; - public static final int MISSING_HAPLOTYPE_QUALITY = -1; - public static final String PASSES_FILTERS = "0"; - public static final String UNFILTERED = "."; - public static final double MAX_QUAL_VALUE = 99.0; // what kind of phasing this genotype has @@ -89,7 +71,7 @@ public class VCFGenotypeRecord { */ public void setField(String key, String value) { // make sure the GT field isn't being set - if ( key.equals(GENOTYPE_KEY) ) + if ( key.equals(VCFConstants.GENOTYPE_KEY) ) throw new IllegalArgumentException("Setting the GT field is not allowed as that's done internally"); mFields.put(key, value); } @@ -132,20 +114,19 @@ public class VCFGenotypeRecord { * @return the phred-scaled quality score */ public double getQual() { - return ( mFields.containsKey(GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(GENOTYPE_QUALITY_KEY)) : MISSING_GENOTYPE_QUALITY); + return ( mFields.containsKey(VCFConstants.GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(VCFConstants.GENOTYPE_QUALITY_KEY)) : Double.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3)); } public boolean isMissingQual() { - return (int)getQual() == MISSING_GENOTYPE_QUALITY; + return VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)getQual())); } public double getNegLog10PError() { - double qual = getQual(); - return (qual == MISSING_GENOTYPE_QUALITY ? MISSING_GENOTYPE_QUALITY : qual / 10.0); + return (isMissingQual() ? Double.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3) : getQual() / 10.0); } public int getReadCount() { - return ( mFields.containsKey(DEPTH_KEY) ? Integer.valueOf(mFields.get(DEPTH_KEY)) : MISSING_DEPTH); + return ( mFields.containsKey(VCFConstants.DEPTH_KEY) ? Integer.valueOf(mFields.get(VCFConstants.DEPTH_KEY)) : Integer.valueOf(VCFConstants.MISSING_DEPTH_v3)); } public String getLocation() { @@ -203,9 +184,9 @@ public class VCFGenotypeRecord { } public boolean isFiltered() { - return ( mFields.get(GENOTYPE_FILTER_KEY) != null && - !mFields.get(GENOTYPE_FILTER_KEY).equals(UNFILTERED) && - !mFields.get(GENOTYPE_FILTER_KEY).equals(PASSES_FILTERS)); + return ( mFields.get(VCFConstants.GENOTYPE_FILTER_KEY) != null && + !mFields.get(VCFConstants.GENOTYPE_FILTER_KEY).equals(VCFConstants.UNFILTERED) && + !mFields.get(VCFConstants.GENOTYPE_FILTER_KEY).equals(VCFConstants.PASSES_FILTERS_v3)); } public int getPloidy() { @@ -220,7 +201,7 @@ public class VCFGenotypeRecord { List alleleStrings = new ArrayList(altAlleles.size()); for (VCFGenotypeEncoding allele : mGenotypeAlleles) { if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED) - alleleStrings.add(VCFGenotypeRecord.EMPTY_ALLELE); + alleleStrings.add(VCFConstants.EMPTY_ALLELE); else alleleStrings.add(String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0)); } @@ -269,14 +250,14 @@ public class VCFGenotypeRecord { builder.append(toGenotypeString(altAlleles)); for ( String field : genotypeFormatStrings ) { - if ( field.equals(GENOTYPE_KEY) ) + if ( field.equals(VCFConstants.GENOTYPE_KEY) ) continue; String value = mFields.get(field); - if ( value == null && field.equals(OLD_DEPTH_KEY) ) - value = mFields.get(DEPTH_KEY); + if ( value == null && field.equals(VCFConstants.OLD_DEPTH_KEY) ) + value = mFields.get(VCFConstants.DEPTH_KEY); - builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); + builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); if ( value == null || value.equals("") ) builder.append(getMissingFieldValue(field, doVCF40)); else @@ -299,15 +280,15 @@ public class VCFGenotypeRecord { } public static String stringEncodingForEmptyGenotype(String[] genotypeFormatStrings, boolean doVCF40) { StringBuilder builder = new StringBuilder(); - builder.append(EMPTY_GENOTYPE); + builder.append(VCFConstants.EMPTY_GENOTYPE); for ( String field : genotypeFormatStrings ) { - if ( field.equals(GENOTYPE_KEY) ) + if ( field.equals(VCFConstants.GENOTYPE_KEY) ) continue; // in VCF4.0, if a genotype is empty only the ./. key can be included if (!doVCF40) { - builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); + builder.append(VCFConstants.GENOTYPE_FIELD_SEPARATOR); builder.append(getMissingFieldValue(field)); } } @@ -324,9 +305,9 @@ public class VCFGenotypeRecord { if (doVCF40) { result = "."; // default missing value // TODO - take number of elements in field as input and output corresponding .'s - if ( field.equals(GENOTYPE_LIKELIHOODS_KEY) ) + if ( field.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) ) result = ".,.,."; - else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) + else if ( field.equals(VCFConstants.HAPLOTYPE_QUALITY_KEY) ) result = ".,."; } @@ -334,13 +315,13 @@ public class VCFGenotypeRecord { result = ""; - if ( field.equals(GENOTYPE_QUALITY_KEY) ) - result = String.valueOf(MISSING_GENOTYPE_QUALITY); - else if ( field.equals(DEPTH_KEY) || field.equals(OLD_DEPTH_KEY) ) - result = String.valueOf(MISSING_DEPTH); - else if ( field.equals(GENOTYPE_FILTER_KEY) ) - result = UNFILTERED; - else if ( field.equals(GENOTYPE_LIKELIHOODS_KEY) ) + if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) + result = String.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3); + else if ( field.equals(VCFConstants.DEPTH_KEY) || field.equals(VCFConstants.OLD_DEPTH_KEY) ) + result = String.valueOf(VCFConstants.MISSING_DEPTH_v3); + else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) + result = VCFConstants.UNFILTERED; + else if ( field.equals(VCFConstants.GENOTYPE_LIKELIHOODS_KEY) ) result = "0,0,0"; // TODO -- support haplotype quality //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) @@ -351,10 +332,10 @@ public class VCFGenotypeRecord { public static Set getSupportedHeaderStrings(VCFHeaderVersion version) { Set result = new HashSet(); - result.add(new VCFFormatHeaderLine(GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); - result.add(new VCFFormatHeaderLine(GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); - result.add(new VCFFormatHeaderLine(DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); - result.add(new VCFFormatHeaderLine(GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality")); + result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)")); + result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, 3, VCFHeaderLineType.Float, "Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic")); //result.add(new VCFFormatHeaderLine(HAPLOTYPE_QUALITY_KEY, 1, VCFFormatHeaderLine.INFO_TYPE.Integer, "Haplotype Quality")); return result; } diff --git a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java index 6ed7d9b13..9efe455c8 100644 --- a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java +++ b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java @@ -149,7 +149,7 @@ public class VCFReaderUtils { nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":"); parse = genotypeString.substring(0, nextDivider); } - if (key.equals(VCFGenotypeRecord.GENOTYPE_KEY)) { + if (key.equals(VCFConstants.GENOTYPE_KEY)) { Matcher m = gtPattern.matcher(parse); if (!m.matches()) throw new RuntimeException("VCFReaderUtils: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse); @@ -164,7 +164,7 @@ public class VCFReaderUtils { if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1; genotypeString = genotypeString.substring(nextDivider + 1, genotypeString.length()); } - if ( bases.size() > 0 && bases.get(0).equals(VCFGenotypeRecord.EMPTY_ALLELE) ) + if ( bases.size() > 0 && bases.get(0).equals(VCFConstants.EMPTY_ALLELE) ) tagToValue.clear(); // catch some common errors, either there are too many field keys or there are two many field values else if ( keyStrings.length != tagToValue.size() + ((bases.size() > 0) ? 1 : 0)) @@ -190,8 +190,8 @@ public class VCFReaderUtils { * @param bases the list of bases for this genotype call */ private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { - if (alleleNumber.equals(VCFGenotypeRecord.EMPTY_ALLELE)) { - bases.add(new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); + if (alleleNumber.equals(VCFConstants.EMPTY_ALLELE)) { + bases.add(new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); } else { int alleleValue = Integer.valueOf(alleleNumber); // check to make sure the allele value is within bounds diff --git a/java/src/org/broad/tribble/vcf/VCFRecord.java b/java/src/org/broad/tribble/vcf/VCFRecord.java index 5b9bff8f8..50a673b69 100644 --- a/java/src/org/broad/tribble/vcf/VCFRecord.java +++ b/java/src/org/broad/tribble/vcf/VCFRecord.java @@ -9,36 +9,6 @@ import java.util.*; /** the basic VCF record type */ public class VCFRecord implements Feature { - // standard info field keys - public static final String ANCESTRAL_ALLELE_KEY = "AA"; - public static final String ALLELE_COUNT_KEY = "AC"; - public static final String ALLELE_FREQUENCY_KEY = "AF"; - public static final String ALLELE_NUMBER_KEY = "AN"; - public static final String RMS_BASE_QUALITY_KEY = "BQ"; - public static final String DBSNP_KEY = "DB"; - public static final String DEPTH_KEY = "DP"; - public static final String HAPMAP2_KEY = "H2"; - public static final String HAPMAP3_KEY = "H3"; - public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; - public static final String SAMPLE_NUMBER_KEY = "NS"; - public static final String STRAND_BIAS_KEY = "SB"; - - // commonly used strings that are in the standard - public static final String FORMAT_FIELD_SEPERATOR = ":"; - public static final String GENOTYPE_FIELD_SEPERATOR = ":"; - public static final String FIELD_SEPERATOR = "\t"; - public static final String FILTER_CODE_SEPERATOR = ";"; - public static final String INFO_FIELD_SEPERATOR = ";"; - - // default values - public static final String UNFILTERED = "."; - public static final String PASSES_FILTERS = "0"; - public static final String EMPTY_INFO_FIELD = "."; - public static final String EMPTY_ID_FIELD = "."; - public static final String EMPTY_ALLELE_FIELD = "."; - public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; - public static final int MISSING_GENOTYPE_QUALITY = -1; - // the reference base private String mReferenceBases; // our location @@ -176,7 +146,7 @@ public class VCFRecord implements Feature { case INFO: String vals[] = columnValues.get(val).split(";"); for (String alt : vals) { - if ( alt.equals(EMPTY_INFO_FIELD) ) + if ( alt.equals(VCFConstants.EMPTY_INFO_FIELD) ) continue; String keyVal[] = alt.split("="); if ( keyVal.length == 1 ) @@ -206,7 +176,7 @@ public class VCFRecord implements Feature { * @return the ID value for this record */ public String getID() { - return mID == null ? EMPTY_ID_FIELD : mID; + return mID == null ? VCFConstants.EMPTY_ID_FIELD : mID; } /** @@ -259,14 +229,14 @@ public class VCFRecord implements Feature { } public double getNonRefAlleleFrequency() { - if ( mInfoFields.containsKey(ALLELE_FREQUENCY_KEY) ) { - return Double.valueOf(mInfoFields.get(ALLELE_FREQUENCY_KEY)); + if ( mInfoFields.containsKey(VCFConstants.ALLELE_FREQUENCY_KEY) ) { + return Double.valueOf(mInfoFields.get(VCFConstants.ALLELE_FREQUENCY_KEY)); } else { // this is the poor man's AF - if ( mInfoFields.containsKey(ALLELE_COUNT_KEY) && mInfoFields.containsKey(ALLELE_NUMBER_KEY)) { - String splt[] = mInfoFields.get(ALLELE_COUNT_KEY).split(","); + if ( mInfoFields.containsKey(VCFConstants.ALLELE_COUNT_KEY) && mInfoFields.containsKey(VCFConstants.ALLELE_NUMBER_KEY)) { + String splt[] = mInfoFields.get(VCFConstants.ALLELE_COUNT_KEY).split(","); if ( splt.length > 0 ) { - return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(ALLELE_NUMBER_KEY))); + return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(VCFConstants.ALLELE_NUMBER_KEY))); } } } @@ -304,14 +274,14 @@ public class VCFRecord implements Feature { } public boolean isInDBSNP() { - return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(DBSNP_KEY) != null && mInfoFields.get(DBSNP_KEY).equals("1") ) ); + return ( ( mID != null && ! mID.equals(".") ) || ( mInfoFields.get(VCFConstants.DBSNP_KEY) != null && mInfoFields.get(VCFConstants.DBSNP_KEY).equals("1") ) ); } public boolean isInHapmap() { - if ( mInfoFields.get(HAPMAP2_KEY) != null && mInfoFields.get(HAPMAP2_KEY).equals("1") ) { + if ( mInfoFields.get(VCFConstants.HAPMAP2_KEY) != null && mInfoFields.get(VCFConstants.HAPMAP2_KEY).equals("1") ) { return true; } else { - return ( mInfoFields.get(HAPMAP3_KEY) != null && mInfoFields.get(HAPMAP3_KEY).equals("1") ); + return ( mInfoFields.get(VCFConstants.HAPMAP3_KEY) != null && mInfoFields.get(VCFConstants.HAPMAP3_KEY).equals("1") ); } } @@ -339,7 +309,7 @@ public class VCFRecord implements Feature { } public boolean isMissingQual() { - return (int)mQual == MISSING_GENOTYPE_QUALITY; + return VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)mQual)); } /** @@ -355,13 +325,13 @@ public class VCFRecord implements Feature { * @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied */ public String[] getFilteringCodes() { - if (mFilterString == null) return new String[]{UNFILTERED}; - return mFilterString.split(FILTER_CODE_SEPERATOR); + if (mFilterString == null) return new String[]{VCFConstants.UNFILTERED}; + return mFilterString.split(VCFConstants.FILTER_CODE_SEPARATOR); } public boolean isFiltered() { String[] codes = getFilteringCodes(); - return !codes[0].equals(UNFILTERED) && !codes[0].equals(PASSES_FILTERS); + return !codes[0].equals(VCFConstants.UNFILTERED) && !codes[0].equals(VCFConstants.PASSES_FILTERS_v3); } // public boolean hasFilteringCodes() { @@ -432,8 +402,8 @@ public class VCFRecord implements Feature { } public void setQual(double qual) { - if ( qual < 0 && (int)qual != MISSING_GENOTYPE_QUALITY ) - throw new IllegalArgumentException("Qual values cannot be negative unless they are " + MISSING_GENOTYPE_QUALITY + " ('unknown')"); + if ( qual < 0 && !VCFConstants.MISSING_GENOTYPE_QUALITY_v3.equals(String.valueOf((int)qual)) ) + throw new IllegalArgumentException("Qual values cannot be negative unless they are " + VCFConstants.MISSING_GENOTYPE_QUALITY_v3 + " ('unknown')"); mQual = qual; } @@ -507,13 +477,13 @@ public class VCFRecord implements Feature { // CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO builder.append(mContig); - builder.append(FIELD_SEPERATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(mPosition); - builder.append(FIELD_SEPERATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(getID()); - builder.append(FIELD_SEPERATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(getReference()); - builder.append(FIELD_SEPERATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); List alts = getAlternateAlleles(); if ( alts.size() > 0 ) { builder.append(alts.get(0)); @@ -522,16 +492,16 @@ public class VCFRecord implements Feature { builder.append(alts.get(i)); } } else { - builder.append(EMPTY_ALLELE_FIELD); + builder.append(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD); } - builder.append(FIELD_SEPERATOR); - if ( (int)mQual == MISSING_GENOTYPE_QUALITY ) - builder.append(MISSING_GENOTYPE_QUALITY); + builder.append(VCFConstants.FIELD_SEPARATOR); + if ( isMissingQual() ) + builder.append(VCFConstants.MISSING_GENOTYPE_QUALITY_v3); else - builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, mQual)); - builder.append(FIELD_SEPERATOR); - builder.append(ParsingUtils.join(FILTER_CODE_SEPERATOR, getFilteringCodes())); - builder.append(FIELD_SEPERATOR); + builder.append(String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, mQual)); + builder.append(VCFConstants.FIELD_SEPARATOR); + builder.append(ParsingUtils.join(VCFConstants.FILTER_CODE_SEPARATOR, getFilteringCodes())); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(createInfoString()); if ( mGenotypeFormatString != null && mGenotypeFormatString.length() > 0 ) { @@ -558,14 +528,14 @@ public class VCFRecord implements Feature { if ( isFirst ) isFirst = false; else - info.append(INFO_FIELD_SEPERATOR); + info.append(VCFConstants.INFO_FIELD_SEPARATOR); info.append(entry.getKey()); if ( entry.getValue() != null && !entry.getValue().equals("") ) { info.append("="); info.append(entry.getValue()); } } - return info.length() == 0 ? EMPTY_INFO_FIELD : info.toString(); + return info.length() == 0 ? VCFConstants.EMPTY_INFO_FIELD : info.toString(); } /** @@ -587,12 +557,12 @@ public class VCFRecord implements Feature { } throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated"); } - tempStr.append(FIELD_SEPERATOR + mGenotypeFormatString); + tempStr.append(VCFConstants.FIELD_SEPARATOR + mGenotypeFormatString); String[] genotypeFormatStrings = mGenotypeFormatString.split(":"); for ( String genotype : header.getGenotypeSamples() ) { - tempStr.append(FIELD_SEPERATOR); + tempStr.append(VCFConstants.FIELD_SEPARATOR); if ( gMap.containsKey(genotype) ) { VCFGenotypeRecord rec = gMap.get(genotype); tempStr.append(rec.toStringEncoding(mAlts, genotypeFormatStrings)); diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java index ab51b0918..993288c4a 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextUtils.java @@ -28,9 +28,8 @@ import org.apache.commons.jexl2.*; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.collections.ExpandingArrayList; import org.broadinstitute.sting.utils.genotype.HardyWeinbergCalculation; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFConstants; public class VariantContextUtils { public static JexlEngine engine = new JexlEngine(); @@ -240,8 +239,8 @@ public class VariantContextUtils { negLog10PError = Math.max(negLog10PError, vc.isVariant() ? vc.getNegLog10PError() : -1); filters.addAll(vc.getFilters()); - if ( vc.hasAttribute(VCFRecord.DEPTH_KEY) ) - depth += Integer.valueOf(vc.getAttributeAsString(VCFRecord.DEPTH_KEY)); + if ( vc.hasAttribute(VCFConstants.DEPTH_KEY) ) + depth += Integer.valueOf(vc.getAttributeAsString(VCFConstants.DEPTH_KEY)); if ( rsID == null && vc.hasAttribute("ID") ) rsID = vc.getAttributeAsString("ID"); @@ -259,7 +258,7 @@ public class VariantContextUtils { // we care about where the call came from if ( annotateOrigin ) { - String setValue = ""; + String setValue; if ( nFiltered == 0 && VCs.size() == priorityListOfVCs.size() ) // nothing was unfiltered setValue = "Intersection"; else if ( nFiltered == VCs.size() ) // everything was filtered out @@ -275,7 +274,7 @@ public class VariantContextUtils { } if ( depth > 0 ) - attributes.put(VCFRecord.DEPTH_KEY, String.valueOf(depth)); + attributes.put(VCFConstants.DEPTH_KEY, String.valueOf(depth)); if ( rsID != null ) attributes.put("ID", rsID); diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java index 667804194..0a59ae593 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptors.java @@ -178,13 +178,13 @@ public class VariantContextAdaptors { Map fields = new HashMap(); for ( Map.Entry e : vcfG.getFields().entrySet() ) { // todo -- fixme if we put GQ and GF into key itself - if ( ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) + if ( ! e.getKey().equals(VCFConstants.GENOTYPE_QUALITY_KEY) && ! e.getKey().equals(VCFConstants.GENOTYPE_FILTER_KEY) ) fields.put(e.getKey(), e.getValue()); } Set genotypeFilters = new HashSet(); if ( vcfG.isFiltered() ) // setup the FL genotype filter fields - genotypeFilters.addAll(Arrays.asList(vcfG.getFields().get(VCFGenotypeRecord.GENOTYPE_FILTER_KEY).split(";"))); + genotypeFilters.addAll(Arrays.asList(vcfG.getFields().get(VCFConstants.GENOTYPE_FILTER_KEY).split(";"))); double qual = vcfG.isMissingQual() ? VariantContext.NO_NEG_LOG_10PERROR : vcfG.getNegLog10PError(); Genotype g = new Genotype(vcfG.getSampleName(), genotypeAlleles, qual, genotypeFilters, fields, vcfG.getPhaseType() == VCFGenotypeRecord.PHASE.PHASED); @@ -260,13 +260,13 @@ public class VariantContextAdaptors { String contig = vc.getLocation().getContig(); long position = vc.getLocation().getStart(); - String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFRecord.EMPTY_ID_FIELD; + String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFConstants.EMPTY_ID_FIELD; double qual = vc.hasNegLog10PError() ? vc.getPhredScaledQual() : -1; - String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFRecord.PASSES_FILTERS : VCFRecord.UNFILTERED); + String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? VCFConstants.PASSES_FILTERS_v3 : VCFConstants.UNFILTERED); Map alleleMap = new HashMap(); - alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); // convenience for lookup + alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup List vcfAltAlleles = new ArrayList(); for ( Allele a : vc.getAlleles() ) { @@ -313,15 +313,15 @@ public class VariantContextAdaptors { List vcfGenotypeAttributeKeys = new ArrayList(); if ( vc.hasGenotypes() ) { - vcfGenotypeAttributeKeys.add(VCFGenotypeRecord.GENOTYPE_KEY); + vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); for ( String key : calcVCFGenotypeKeys(vc) ) { if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) ) vcfGenotypeAttributeKeys.add(key); } if ( filtersWereAppliedToGenotypes ) - vcfGenotypeAttributeKeys.add(VCFGenotypeRecord.GENOTYPE_FILTER_KEY); + vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_FILTER_KEY); } - String genotypeFormatString = Utils.join(VCFRecord.GENOTYPE_FIELD_SEPERATOR, vcfGenotypeAttributeKeys); + String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys); List genotypeObjects = new ArrayList(vc.getGenotypes().size()); for ( Genotype g : vc.getGenotypesSortedByName() ) { @@ -335,22 +335,22 @@ public class VariantContextAdaptors { VCFGenotypeRecord vcfG = new VCFGenotypeRecord(g.getSampleName(), encodings, phasing); for ( String key : vcfGenotypeAttributeKeys ) { - if ( key.equals(VCFGenotypeRecord.GENOTYPE_KEY) ) + if ( key.equals(VCFConstants.GENOTYPE_KEY) ) continue; Object val = g.getAttribute(key); // some exceptions - if ( key.equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) { + if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) { if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 ) - val = VCFGenotypeRecord.MISSING_GENOTYPE_QUALITY; + val = VCFConstants.MISSING_GENOTYPE_QUALITY_v3; else val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE); - } else if ( key.equals(VCFGenotypeRecord.DEPTH_KEY) && val == null ) { + } else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) { ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); if ( pileup != null ) val = pileup.size(); - } else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) { - val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFRecord.PASSES_FILTERS; + } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { + val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : VCFConstants.PASSES_FILTERS_v3; } String outputValue = formatVCFField(key, val); @@ -410,7 +410,7 @@ public class VariantContextAdaptors { } if ( sawGoodQual ) - keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); + keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); return Utils.sorted(new ArrayList(keys)); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java index f3a977e6b..f2e04561a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/VariantsToVCF.java @@ -26,10 +26,7 @@ package org.broadinstitute.sting.gatk.walkers; import org.broad.tribble.dbsnp.DbSNPFeature; -import org.broad.tribble.vcf.VCFGenotypeRecord; -import org.broad.tribble.vcf.VCFHeader; -import org.broad.tribble.vcf.VCFHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -58,7 +55,7 @@ public class VariantsToVCF extends RodWalker { // Don't allow mixed types for now private EnumSet ALLOWED_VARIANT_CONTEXT_TYPES = EnumSet.of(VariantContext.Type.SNP, VariantContext.Type.NO_VARIATION, VariantContext.Type.INDEL); - private String[] ALLOWED_FORMAT_FIELDS = {VCFGenotypeRecord.GENOTYPE_KEY, VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, VCFGenotypeRecord.DEPTH_KEY, VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY }; + private String[] ALLOWED_FORMAT_FIELDS = {VCFConstants.GENOTYPE_KEY, VCFConstants.GENOTYPE_QUALITY_KEY, VCFConstants.DEPTH_KEY, VCFConstants.GENOTYPE_LIKELIHOODS_KEY }; public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { if ( tracker == null || !BaseUtils.isRegularBase(ref.getBase()) ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java index 347f7e2aa..f0f3ec715 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java @@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.*; @@ -39,10 +39,10 @@ import java.util.*; public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { - private String[] keyNames = { VCFRecord.ALLELE_NUMBER_KEY, VCFRecord.ALLELE_COUNT_KEY, VCFRecord.ALLELE_FREQUENCY_KEY }; - private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFRecord.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency"), - new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), - new VCFInfoHeaderLine(VCFRecord.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; + private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; + private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, -1, VCFHeaderLineType.Float, "Allele Frequency"), + new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, -1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), + new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; public Map annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc) { @@ -50,7 +50,7 @@ public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation return null; Map map = new HashMap(); - map.put(VCFRecord.ALLELE_NUMBER_KEY, vc.getChromosomeCount()); + map.put(VCFConstants.ALLELE_NUMBER_KEY, vc.getChromosomeCount()); if ( vc.getAlternateAlleles().size() > 0 ) { ArrayList alleleFreqs = new ArrayList(); @@ -60,8 +60,8 @@ public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation alleleFreqs.add((double)vc.getChromosomeCount(allele) / (double)vc.getChromosomeCount()); } - map.put(VCFRecord.ALLELE_COUNT_KEY, alleleCounts); - map.put(VCFRecord.ALLELE_FREQUENCY_KEY, alleleFreqs); + map.put(VCFConstants.ALLELE_COUNT_KEY, alleleCounts); + map.put(VCFConstants.ALLELE_FREQUENCY_KEY, alleleFreqs); } return map; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 43afe2db3..d7d9be1b0 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -29,7 +29,7 @@ public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation return map; } - public List getKeyNames() { return Arrays.asList(VCFRecord.DEPTH_KEY); } + public List getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Total Depth")); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java index 555d8823b..a5eaa5289 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java @@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -36,7 +37,7 @@ public class MappingQualityZero implements InfoFieldAnnotation, StandardAnnotati return map; } - public List getKeyNames() { return Arrays.asList("MQ0"); } + public List getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Total Mapping Quality Zero Reads")); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 433713faa..47225e11a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broad.tribble.vcf.VCFHeaderLineType; import org.broad.tribble.vcf.VCFInfoHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; @@ -38,7 +38,7 @@ public class RMSMappingQuality implements InfoFieldAnnotation, StandardAnnotatio return map; } - public List getKeyNames() { return Arrays.asList(VCFRecord.RMS_MAPPING_QUALITY_KEY); } + public List getKeyNames() { return Arrays.asList(VCFConstants.RMS_MAPPING_QUALITY_KEY); } public List getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Float, "RMS Mapping Quality")); } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 55f4cef71..6114b544f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -159,7 +159,7 @@ public class VariantAnnotatorEngine { List dataSources = engine.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFRecord.DBSNP_KEY); + dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFConstants.DBSNP_KEY); } else if ( source.getName().startsWith(dbPrefix) ) { dbAnnotations.put(source.getName(), source.getName().substring(dbPrefix.length())); @@ -189,9 +189,9 @@ public class VariantAnnotatorEngine { for ( Map.Entry dbSet : dbAnnotations.entrySet() ) { if ( dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { DbSNPFeature dbsnp = DbSNPHelper.getFirstRealSNP(tracker.getReferenceMetaData(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME)); - infoAnnotations.put(VCFRecord.DBSNP_KEY, dbsnp == null ? false : true); + infoAnnotations.put(VCFConstants.DBSNP_KEY, dbsnp == null ? false : true); // annotate dbsnp id if available and not already there - if ( dbsnp != null && (!vc.hasAttribute("ID") || vc.getAttribute("ID").equals(VCFRecord.EMPTY_ID_FIELD)) ) + if ( dbsnp != null && (!vc.hasAttribute("ID") || vc.getAttribute("ID").equals(VCFConstants.EMPTY_ID_FIELD)) ) infoAnnotations.put("ID", dbsnp.getRsID()); } else { List dbRod = tracker.getReferenceMetaData(dbSet.getKey()); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index f6cfaa37f..7a4f452a5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -100,7 +100,7 @@ public class VariantFiltrationWalker extends RodWalker { hInfo.add(new VCFFilterHeaderLine(exp.name, exp.exp.toString())); if ( genotypeFilterExps.size() > 0 ) - hInfo.add(new VCFFormatHeaderLine(VCFGenotypeRecord.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter")); + hInfo.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter")); List dataSources = getToolkit().getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java index 53d3e6607..300ff8a01 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotypeCalculationModel.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper; -import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.genotype.DiploidGenotype; @@ -153,7 +153,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul CalledGenotype cg = new CalledGenotype(sample, myAlleles, AFbasedGenotype.second); cg.setLikelihoods(GLs.get(sample).getLikelihoods()); cg.setReadBackedPileup(contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).getBasePileup()); - cg.putAttribute(VCFGenotypeRecord.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); + cg.putAttribute(VCFConstants.DEPTH_KEY, contexts.get(sample).getContext(StratifiedAlignmentContext.StratifiedContextType.COMPLETE).size()); cg.setPosteriors(GLs.get(sample).getPosteriors()); double[] likelihoods = GLs.get(sample).getLikelihoods(); @@ -161,7 +161,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul likelihoods[refGenotype.ordinal()], likelihoods[hetGenotype.ordinal()], likelihoods[homGenotype.ordinal()]); - cg.putAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY, GL); + cg.putAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY, GL); calls.put(sample, cg); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 85e82bf5c..e9f399766 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -144,7 +144,7 @@ public class UnifiedGenotyper extends LocusWalker dbSet : UG_engine.dbAnnotations.entrySet() ) headerInfo.add(new VCFInfoHeaderLine(dbSet.getValue(), 0, VCFHeaderLineType.Flag, (dbSet.getKey().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ? "dbSNP" : dbSet.getValue()) + " Membership")); if ( !UAC.NO_SLOD ) - headerInfo.add(new VCFInfoHeaderLine(VCFRecord.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias")); + headerInfo.add(new VCFInfoHeaderLine(VCFConstants.STRAND_BIAS_KEY, 1, VCFHeaderLineType.Float, "Strand Bias")); // FORMAT and INFO fields headerInfo.addAll(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 2f1c84194..96dab1593 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -46,7 +46,7 @@ import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; import org.broadinstitute.sting.utils.genotype.glf.GLFGenotypeWriter; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; import org.broadinstitute.sting.utils.pileup.*; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFConstants; import java.io.PrintStream; import java.util.*; @@ -113,7 +113,7 @@ public class UnifiedGenotyperEngine { List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { if ( source.getName().equals(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME) ) { - dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFRecord.DBSNP_KEY); + dbAnnotations.put(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, VCFConstants.DBSNP_KEY); } else if ( source.getName().startsWith(VariantAnnotatorEngine.dbPrefix) ) { dbAnnotations.put(source.getName(), source.getName().substring(VariantAnnotatorEngine.dbPrefix.length())); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java index 24363debe..3b1c9ad61 100644 --- a/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverter.java @@ -128,8 +128,8 @@ public class SequenomValidationConverter extends RodWalker { hInfo.add(new VCFInfoHeaderLine("HetPct", 1, VCFHeaderLineType.Float, "Percent of heterozygous genotypes")); hInfo.add(new VCFInfoHeaderLine("HomVarPct", 1, VCFHeaderLineType.Float, "Percent homozygous variant genotypes")); hInfo.add(new VCFInfoHeaderLine("HW", 1, VCFHeaderLineType.Float, "Phred-scaled Hardy-Weinberg violation p-value")); - hInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed")); - hInfo.add(new VCFInfoHeaderLine(VCFRecord.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes")); + hInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, 1, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed")); + hInfo.add(new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes")); hInfo.add(new VCFFilterHeaderLine("HardyWeinbergViolation", "The validation is in Hardy-Weinberg violation")); hInfo.add(new VCFFilterHeaderLine("HighNoCallRate", "The validation no-call rate is too high")); hInfo.add(new VCFFilterHeaderLine("TooManyHomVars", "The validation homozygous variant rate is too high")); @@ -202,8 +202,8 @@ public class SequenomValidationConverter extends RodWalker { int altAlleleCount = altAlleles.size() == 0 ? 0 : vContext.getChromosomeCount(altAlleles.iterator().next()); if ( !isViolation && altAlleleCount > 0 ) numTrueVariants++; - infoMap.put(VCFRecord.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount)); - infoMap.put(VCFRecord.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount())); + infoMap.put(VCFConstants.ALLELE_COUNT_KEY, String.format("%d", altAlleleCount)); + infoMap.put(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", vContext.getChromosomeCount())); record.addInfoFields(infoMap); // set the id if it's a plink rod diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java index 8f1ba6c2d..18a60bc19 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyVariantCuts.java @@ -101,10 +101,10 @@ public class ApplyVariantCuts extends RodWalker { throw new StingException("Can not find input file: " + TRANCHE_FILENAME); } - ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.GENOTYPE_KEY); // copied from VariantsToVCF - ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); - ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.DEPTH_KEY); - ALLOWED_FORMAT_FIELDS.add(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY); + ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_KEY); // copied from VariantsToVCF + ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_QUALITY_KEY); + ALLOWED_FORMAT_FIELDS.add(VCFConstants.DEPTH_KEY); + ALLOWED_FORMAT_FIELDS.add(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); // setup the header fields final Set hInfo = new HashSet(); @@ -154,7 +154,7 @@ public class ApplyVariantCuts extends RodWalker { for( int tranche = qCuts.size() - 1; tranche >= 0; tranche-- ) { if( qual >= qCuts.get(tranche) ) { if(tranche == qCuts.size() - 1) { - vcf.setFilterString(VCFRecord.PASSES_FILTERS); + vcf.setFilterString(VCFConstants.PASSES_FILTERS_v3); setFilter = true; } else { vcf.setFilterString(filterName.get(tranche)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 652c0005c..60ce01a08 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -123,10 +123,10 @@ public class VariantRecalibrator extends RodWalker hInfo = new HashSet(); @@ -198,7 +198,7 @@ public class VariantRecalibrator extends RodWalker { VCFRecord vcf = VariantContextAdaptors.toVCF(filteredVC, ref.getBase()); if ( filteredVC.getChromosomeCount() > 0 ) { - vcf.addInfoField(VCFRecord.ALLELE_NUMBER_KEY, String.format("%d", filteredVC.getChromosomeCount())); + vcf.addInfoField(VCFConstants.ALLELE_NUMBER_KEY, String.format("%d", filteredVC.getChromosomeCount())); if ( altAlleleCountString.length() > 0 ) { - vcf.addInfoField(VCFRecord.ALLELE_COUNT_KEY, altAlleleCountString.toString()); - vcf.addInfoField(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%4.2f", + vcf.addInfoField(VCFConstants.ALLELE_COUNT_KEY, altAlleleCountString.toString()); + vcf.addInfoField(VCFConstants.ALLELE_FREQUENCY_KEY, String.format("%4.2f", Double.valueOf(altAlleleCountString.toString())/(filteredVC.getChromosomeCount()))); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java index 3e2b79cf4..39a3fbefa 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java @@ -26,7 +26,7 @@ package org.broadinstitute.sting.playground.gatk.walkers; import org.broad.tribble.vcf.VCFRecord; -import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -119,8 +119,8 @@ public class ProduceBeagleInputWalker extends RodWalker { for ( String sample : samples ) { // use sample as key into genotypes structure Genotype genotype = genotypes.get(sample); - if (genotype.isCalled() && genotype.hasAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY)) { - String[] glArray = genotype.getAttributeAsString(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY).split(","); + if (genotype.isCalled() && genotype.hasAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY)) { + String[] glArray = genotype.getAttributeAsString(VCFConstants.GENOTYPE_LIKELIHOODS_KEY).split(","); Double maxLikelihood = -100.0; ArrayList likeArray = new ArrayList(); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java index 13a55d176..4d784dfbe 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/TrioGenotyperWalker.java @@ -25,7 +25,7 @@ package org.broadinstitute.sting.playground.gatk.walkers; -import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; @@ -159,9 +159,9 @@ public class TrioGenotyperWalker extends RefWalker{ * @return */ private double genotypeL( List alleles, Genotype genotypeCall ) { - String postTriplet = (String)genotypeCall.getAttribute(VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY); + String postTriplet = (String)genotypeCall.getAttribute(VCFConstants.GENOTYPE_LIKELIHOODS_KEY); if ( postTriplet == null ) - throw new StingException("BUG: TrioGenotyperWalker expected genotype likelihood triplets " + VCFGenotypeRecord.GENOTYPE_LIKELIHOODS_KEY); + throw new StingException("BUG: TrioGenotyperWalker expected genotype likelihood triplets " + VCFConstants.GENOTYPE_LIKELIHOODS_KEY); // calculate the offset -- AA => 0, AB => 1, BB => 2 int i = 0; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index ad16f6275..c5b26bb32 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -3,7 +3,7 @@ package org.broadinstitute.sting.utils.genotype.glf; import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.util.BinaryCodec; import net.sf.samtools.util.BlockCompressedOutputStream; -import org.broad.tribble.vcf.VCFGenotypeRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.gatk.contexts.variantcontext.MutableGenotype; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -191,8 +191,8 @@ public class GLFWriter implements GLFGenotypeWriter { rms = (Double)((MutableGenotype)genotype).getAttribute(RMS_MAPPING_QUAL); // if we can't get the depth from the read pile-up (preferred), check the tags, the VC might have it - if (genotype.hasAttribute(VCFGenotypeRecord.DEPTH_KEY) && 0 == readCount) - readCount = (Integer)((MutableGenotype)genotype).getAttribute(VCFGenotypeRecord.DEPTH_KEY); + if (genotype.hasAttribute(VCFConstants.DEPTH_KEY) && 0 == readCount) + readCount = (Integer)((MutableGenotype)genotype).getAttribute(VCFConstants.DEPTH_KEY); addCall(GenomeLocParser.getContigInfo(vc.getLocation().getContig()), (int)vc.getLocation().getStart(), (float) rms, ref, readCount, obj); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java index 9e8d7c7b5..f4be429c9 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -2,7 +2,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.vcf.VCFGenotypeEncoding; import org.broad.tribble.vcf.VCFGenotypeRecord; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFConstants; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.Utils; @@ -76,7 +76,7 @@ class VCFParameters { public void addAlternateBase(VCFGenotypeEncoding base) { if ( !alternateBases.contains(base) && !base.toString().equals(String.valueOf(getReferenceBases()).toUpperCase()) && - !base.toString().equals(VCFGenotypeRecord.EMPTY_ALLELE) ) { + !base.toString().equals(VCFConstants.EMPTY_ALLELE) ) { alternateBases.add(base); alleleCounts.add(0); } @@ -92,7 +92,7 @@ class VCFParameters { } public String getFormatString() { - return Utils.join(VCFRecord.FORMAT_FIELD_SEPERATOR, formatList); + return Utils.join(VCFConstants.FORMAT_FIELD_SEPARATOR, formatList); } public List getGenotypeRecords() { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index 9064de4e5..02095a474 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -91,7 +91,7 @@ public class VCFUtils { public static VCFRecord mergeRecords(Map rods, Map, String> rodNamesToSampleNames) { VCFParameters params = new VCFParameters(); - params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY); + params.addFormatItem(VCFConstants.GENOTYPE_KEY); // keep track of the data so we can merge them intelligently double maxConfidence = 0.0; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index f0042a94d..8700f436d 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -15,7 +15,7 @@ import java.io.*; import java.util.*; /** - * this class writers VCF files + * this class writes VCF files */ public class VCFWriter { @@ -38,23 +38,6 @@ public class VCFWriter { Map numberUsedForInfoFields = new HashMap(); Map numberUsedForFormatFields = new HashMap(); - // commonly used strings that are in the standard - private final String FORMAT_FIELD_SEPARATOR = ":"; - private static final String GENOTYPE_FIELD_SEPARATOR = ":"; - private static final String FIELD_SEPARATOR = "\t"; - private static final String FILTER_CODE_SEPARATOR = ";"; - private static final String INFO_FIELD_SEPARATOR = ";"; - - // default values - private static final String UNFILTERED = "."; - private static final String PASSES_FILTERS_VCF3 = "0"; - private static final String PASSES_FILTERS_VCF4 = "PASS"; - private static final String EMPTY_INFO_FIELD = "."; - private static final String EMPTY_ID_FIELD = "."; - private static final String EMPTY_ALLELE_FIELD = "."; - private static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; - private static final String MISSING_GENOTYPE_FIELD = "."; - /** * create a VCF writer, given a file to write to * @@ -66,7 +49,7 @@ public class VCFWriter { public VCFWriter(File location, boolean useVCF4Format) { this.writingVCF40Format = useVCF4Format; - this.PASSES_FILTERS_STRING = useVCF4Format ? PASSES_FILTERS_VCF4 : PASSES_FILTERS_VCF3; + this.PASSES_FILTERS_STRING = useVCF4Format ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.PASSES_FILTERS_v3; FileOutputStream output; try { @@ -90,7 +73,7 @@ public class VCFWriter { } public VCFWriter(OutputStream output, boolean useVCF4Format) { this.writingVCF40Format = useVCF4Format; - this.PASSES_FILTERS_STRING = useVCF4Format ? PASSES_FILTERS_VCF4 : PASSES_FILTERS_VCF3; + this.PASSES_FILTERS_STRING = useVCF4Format ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.PASSES_FILTERS_v3; mWriter = new BufferedWriter(new OutputStreamWriter(output)); } @@ -148,12 +131,12 @@ public class VCFWriter { StringBuilder b = new StringBuilder(); b.append(VCFHeader.HEADER_INDICATOR); for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) - b.append(field + FIELD_SEPARATOR); + b.append(field + VCFConstants.FIELD_SEPARATOR); if (header.hasGenotypingData()) { - b.append("FORMAT" + FIELD_SEPARATOR); + b.append("FORMAT" + VCFConstants.FIELD_SEPARATOR); for (String field : header.getGenotypeSamples()) - b.append(field + FIELD_SEPARATOR); + b.append(field + VCFConstants.FIELD_SEPARATOR); } mWriter.write(b.toString() + "\n"); mWriter.flush(); // necessary so that writing to an output stream will work @@ -232,7 +215,7 @@ public class VCFWriter { String contig = loc.getContig(); long position = loc.getStart(); - String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : EMPTY_ID_FIELD; + String ID = vc.hasAttribute("ID") ? vc.getAttributeAsString("ID") : VCFConstants.EMPTY_ID_FIELD; // deal with the reference @@ -243,10 +226,10 @@ public class VCFWriter { boolean filtersWereAppliedToContext = true; List allowedGenotypeAttributeKeys = null; - String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? PASSES_FILTERS_STRING : UNFILTERED); + String filters = vc.isFiltered() ? Utils.join(";", Utils.sorted(vc.getFilters())) : (filtersWereAppliedToContext ? PASSES_FILTERS_STRING : VCFConstants.UNFILTERED); Map alleleMap = new HashMap(); - alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFGenotypeRecord.EMPTY_ALLELE)); // convenience for lookup + alleleMap.put(Allele.NO_CALL, new VCFGenotypeEncoding(VCFConstants.EMPTY_ALLELE)); // convenience for lookup List vcfAltAlleles = new ArrayList(); int numTrailingBases = 0, numPaddingBases = 0; @@ -339,16 +322,16 @@ public class VCFWriter { List vcfGenotypeAttributeKeys = new ArrayList(); if ( vc.hasGenotypes() ) { - vcfGenotypeAttributeKeys.add(VCFGenotypeRecord.GENOTYPE_KEY); + vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); for ( String key : calcVCFGenotypeKeys(vc) ) { if ( allowedGenotypeAttributeKeys == null || allowedGenotypeAttributeKeys.contains(key) ) vcfGenotypeAttributeKeys.add(key); } } else if ( header.hasGenotypingData() ) { // this needs to be done in case all samples are no-calls - vcfGenotypeAttributeKeys.add(VCFGenotypeRecord.GENOTYPE_KEY); + vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); } - String genotypeFormatString = Utils.join(GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys); + String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys); List genotypeObjects = new ArrayList(vc.getGenotypes().size()); for ( Genotype g : vc.getGenotypesSortedByName() ) { @@ -362,27 +345,25 @@ public class VCFWriter { VCFGenotypeRecord vcfG = new VCFGenotypeRecord(g.getSampleName(), encodings, phasing); for ( String key : vcfGenotypeAttributeKeys ) { - if ( key.equals(VCFGenotypeRecord.GENOTYPE_KEY) ) + if ( key.equals(VCFConstants.GENOTYPE_KEY) ) continue; - Object val = g.hasAttribute(key) ? g.getAttribute(key) : MISSING_GENOTYPE_FIELD; + Object val = g.hasAttribute(key) ? g.getAttribute(key) : VCFConstants.MISSING_VALUE_v4; // some exceptions - if ( key.equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) { + if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) { if ( MathUtils.compareDoubles(g.getNegLog10PError(), Genotype.NO_NEG_LOG_10PERROR) == 0 ) - val = MISSING_GENOTYPE_FIELD; + val = VCFConstants.MISSING_VALUE_v4; else { - // TODO - check whether we need to saturate quality to 99 as in VCF3.3 coder. For now allow unbounded values - // val = Math.min(g.getPhredScaledQual(), VCFGenotypeRecord.MAX_QUAL_VALUE); - val = g.getPhredScaledQual(); + val = Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL); } - } else if ( key.equals(VCFGenotypeRecord.DEPTH_KEY) && val == null ) { + } else if ( key.equals(VCFConstants.DEPTH_KEY) && val == null ) { ReadBackedPileup pileup = (ReadBackedPileup)g.getAttribute(CalledGenotype.READBACKEDPILEUP_ATTRIBUTE_KEY); if ( pileup != null ) val = pileup.size(); - } else if ( key.equals(VCFGenotypeRecord.GENOTYPE_FILTER_KEY) ) { + } else if ( key.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { // VCF 4.0 key for no filters is "." val = g.isFiltered() ? Utils.join(";", Utils.sorted(g.getFilters())) : PASSES_FILTERS_STRING; } @@ -403,13 +384,13 @@ public class VCFWriter { if (numberUsedForFormatFields.containsKey(key)){ int numInFormatField = numberUsedForFormatFields.get(key); - if (numInFormatField>1 && val.equals(MISSING_GENOTYPE_FIELD)) { + if (numInFormatField>1 && val.equals(VCFConstants.MISSING_VALUE_v4)) { // If we have a missing field but multiple values are expected, we need to construct new string with all fields. // for example for Number =2, string has to be ".,." - StringBuilder v = new StringBuilder(MISSING_GENOTYPE_FIELD); + StringBuilder v = new StringBuilder(VCFConstants.MISSING_VALUE_v4); for ( int i = 1; i < numInFormatField; i++ ) { v.append(","); - v.append(MISSING_GENOTYPE_FIELD); + v.append(VCFConstants.MISSING_VALUE_v4); } newVal = v.toString(); } @@ -446,13 +427,13 @@ public class VCFWriter { builder.append(contig); - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(position); - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(ID); - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(referenceFromVC); - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); if ( vcfAltAlleles.size() > 0 ) { builder.append(vcfAltAlleles.get(0)); @@ -461,21 +442,21 @@ public class VCFWriter { builder.append(vcfAltAlleles.get(i)); } } else { - builder.append(EMPTY_ALLELE_FIELD); + builder.append(VCFConstants.EMPTY_ALLELE); } - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); if ( qual == -1 ) - builder.append(MISSING_GENOTYPE_FIELD); + builder.append(VCFConstants.MISSING_VALUE_v4); else - builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, qual)); + builder.append(String.format(VCFConstants.DOUBLE_PRECISION_FORMAT_STRING, qual)); - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(filters); - builder.append(FIELD_SEPARATOR); + builder.append(VCFConstants.FIELD_SEPARATOR); builder.append(createInfoString(infoFields)); if ( genotypeFormatString != null && genotypeFormatString.length() > 0 ) { @@ -509,12 +490,12 @@ public class VCFWriter { } throw new IllegalStateException("We have more genotype samples than the header specified; please check that samples aren't duplicated"); } - tempStr.append(FIELD_SEPARATOR + genotypeFormatString); + tempStr.append(VCFConstants.FIELD_SEPARATOR + genotypeFormatString); String[] genotypeFormatStrings = genotypeFormatString.split(":"); for ( String genotype : header.getGenotypeSamples() ) { - tempStr.append(FIELD_SEPARATOR); + tempStr.append(VCFConstants.FIELD_SEPARATOR); if ( gMap.containsKey(genotype) ) { VCFGenotypeRecord rec = gMap.get(genotype); String genotypeString = rec.toStringEncoding(vcfAltAlleles, genotypeFormatStrings, true); @@ -588,7 +569,7 @@ public class VCFWriter { if ( isFirst ) isFirst = false; else - info.append(INFO_FIELD_SEPARATOR); + info.append(VCFConstants.INFO_FIELD_SEPARATOR); info.append(entry.getKey()); @@ -612,7 +593,7 @@ public class VCFWriter { } } } - return info.length() == 0 ? EMPTY_INFO_FIELD : info.toString(); + return info.length() == 0 ? VCFConstants.EMPTY_INFO_FIELD : info.toString(); } private static String formatVCFField(String key, Object val) { @@ -651,7 +632,7 @@ public class VCFWriter { } if ( sawGoodQual ) - keys.add(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); + keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); return Utils.sorted(new ArrayList(keys)); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java index f375e6616..61744e0bc 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/recalibration/RecalibrationWalkersIntegrationTest.java @@ -17,7 +17,7 @@ public class RecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testCountCovariates1() { HashMap e = new HashMap(); - e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "" ); + e.put( validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam", "e2ea7507feb66651f52320c5a46433c2" ); e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SOLID.bam", "b2191ea11f528b9605b727d8a73dd1e1"); e.put( validationDataLocation + "NA12873.454.SRP000031.2009_06.chr1.10_20mb.bam", "596a9ec9cbc1da70481e45a5a588a41d" ); e.put( validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam", "507dbd3ba6f54e066d04c4d24f59c3ab" );