diff --git a/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java index e7fab8527..c9a4030b5 100644 --- a/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java @@ -1,3 +1,27 @@ +/* + * Copyright (c) 2010, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broad.tribble.vcf; import java.util.Arrays; @@ -56,6 +80,15 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF this.lineType = lineType; } + protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType, VCFHeaderVersion version) { + super(lineType.toString(), "", version); + this.name = name; + this.count = count; + this.type = type; + this.description = description; + this.lineType = lineType; + } + /** * create a VCF format header line * @@ -111,6 +144,12 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF type == other.type; } + public boolean equalsExcludingDescription(VCFCompoundHeaderLine other) { + return count == other.count && + type == other.type && + name.equals(other.name); + } + /** * do we allow flag (boolean) values? (i.e. booleans where you don't have specify the value, AQ means AQ=true) * @return true if we do, false otherwise diff --git a/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java b/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java index 98b423cfb..e9739132d 100644 --- a/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broad/tribble/vcf/VCFGenotypeRecord.java @@ -1,5 +1,7 @@ package org.broad.tribble.vcf; +import org.broadinstitute.sting.utils.Utils; + import java.util.*; @@ -35,7 +37,10 @@ public class VCFGenotypeRecord { // what kind of phasing this genotype has public enum PHASE { - UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN + UNPHASED("/"), PHASED("|"), PHASED_SWITCH_PROB("\\"); // , UNKNOWN + + String genotypeSeparator; + PHASE(String sep) { this.genotypeSeparator = sep; } } // our record @@ -98,14 +103,12 @@ public class VCFGenotypeRecord { */ static PHASE determinePhase(String phase) { // find the phasing information - if (phase.equals("/")) - return PHASE.UNPHASED; - else if (phase.equals("|")) - return PHASE.PHASED; - else if (phase.equals("\\")) - return PHASE.PHASED_SWITCH_PROB; - else - throw new IllegalArgumentException("Unknown genotype phasing parameter"); + for ( PHASE p : PHASE.values() ) { + if (phase.equals(p.genotypeSeparator)) + return p; + } + + throw new IllegalArgumentException("Unknown genotype phasing parameter: " + phase); } @@ -206,7 +209,7 @@ public class VCFGenotypeRecord { } public int getPloidy() { - return 2; + return mGenotypeAlleles.size(); } public VCFRecord getRecord() { @@ -214,32 +217,15 @@ public class VCFGenotypeRecord { } private String toGenotypeString(List altAlleles) { - String str = ""; - boolean first = true; + List alleleStrings = new ArrayList(altAlleles.size()); for (VCFGenotypeEncoding allele : mGenotypeAlleles) { if (allele.getType() == VCFGenotypeEncoding.TYPE.UNCALLED) - str += VCFGenotypeRecord.EMPTY_ALLELE; + alleleStrings.add(VCFGenotypeRecord.EMPTY_ALLELE); else - str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0); - if (first) { - switch (mPhaseType) { - case UNPHASED: - str += "/"; - break; - case PHASED: - str += "|"; - break; - case PHASED_SWITCH_PROB: - str += "\\"; - break; - case UNKNOWN: - throw new UnsupportedOperationException("Unknown phase type"); - } - first = false; - } + alleleStrings.add(String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0)); } - return str; + return Utils.join(mPhaseType.genotypeSeparator, alleleStrings); } @Override diff --git a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java index 7b8b69cbf..deeaea001 100755 --- a/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFInfoHeaderLine.java @@ -10,6 +10,10 @@ package org.broad.tribble.vcf; */ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine { + public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description, VCFHeaderVersion version) { + super(name, count, type, description, SupportedHeaderLineType.INFO, version); + } + public VCFInfoHeaderLine(String name, int count, VCFHeaderLineType type, String description) { super(name, count, type, description, SupportedHeaderLineType.INFO); } @@ -23,4 +27,4 @@ public class VCFInfoHeaderLine extends VCFCompoundHeaderLine { boolean allowFlagValues() { return true; } -} \ No newline at end of file +} diff --git a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java index 3466ce0b3..6ed7d9b13 100644 --- a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java +++ b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java @@ -136,7 +136,7 @@ public class VCFReaderUtils { public static VCFGenotypeRecord getVCFGenotype(String sampleName, String[] keyStrings, String genotypeString, String altAlleles[], char referenceBase) { // parameters to create the VCF genotype record HashMap tagToValue = new HashMap(); - VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; + VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNPHASED; List bases = new ArrayList(); for (String key : keyStrings) { diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java index 75eda244f..11d5908f5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java @@ -183,10 +183,10 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { * @param cache * @return */ - private static List parseGenotypeAlleles(String GT, List alleles, Map> cache) { + private List parseGenotypeAlleles(String GT, List alleles, Map> cache) { // this should cache results [since they are immutable] and return a single object for each genotype if ( GT.length() != 3 && GT.length() != 1 ) - throw new StingException("Unreasonable number of alleles: " + "GT=" + GT + " length=" + GT.length()); // 0/1 => barf on 10/0 + throw new VCFParserException("Unreasonable number of alleles: " + "GT=" + GT + " length=" + GT.length()); // 0/1 => barf on 10/0 List GTAlleles = cache.get(GT); @@ -258,7 +258,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { int count = 0; for (String attr : attributes) if (Collections.binarySearch(fields,attr) < 0) - throw new StingException("Unable to find field describing attribute " + attr); + throw new VCFParserException("Unable to find field describing attribute " + attr); } } @@ -282,7 +282,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { List alleles = new ArrayList(2); // we are almost always biallelic // ref if (!checkAllele(ref, true)) - throw new StingException("Unable to parse out correct reference allele, we saw = " + ref); + throw new VCFParserException("Unable to parse out correct reference allele, we saw = " + ref); Allele refAllele = Allele.create(ref, true); alleles.add(refAllele); @@ -301,13 +301,13 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { * @param isRef are we the reference allele? * @return true if the allele is fine, false otherwise */ - private static boolean checkAllele(String allele,boolean isRef) { + private boolean checkAllele(String allele,boolean isRef) { if (allele.contains("<")) { Utils.warnUser("We are currently unable to parse out CNV encodings in VCF, we saw the following allele = " + allele); return false; } else if ( ! Allele.acceptableAlleleBases(allele,isRef) ) { - throw new StingException("Unparsable vcf record with allele " + allele); + throw new VCFParserException("Unparsable vcf record with allele " + allele); } return true; } @@ -320,7 +320,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { */ private void parseSingleAllele(List alleles, String alt, boolean isRef) { if (!checkAllele(alt,isRef)) - throw new StingException("Unable to parse out correct alt allele, we saw = " + alt); + throw new VCFParserException("Unable to parse out correct alt allele, we saw = " + alt); Allele allele = Allele.create(alt, false); if ( ! allele.isNoCall() ) @@ -403,10 +403,16 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { } return new VariantContext(name, locAndAlleles.first, locAndAlleles.second, genotypes, qual, filters, attributes); -// } catch ( Exception e ) { -// // todo -- we need a local exception so that we can remember the location of the throw but also see the line -// throw new RuntimeException("Line " + lineNo + " generated parser exception " + e.getMessage() + "\nLine: " + Utils.join("\t", parts), e); -// } + } + + class VCFParserException extends StingException { + public VCFParserException(String msg) { + super("Line " + lineNo + " generated parser exception " + msg); + } + + public VCFParserException(String msg, Throwable throwable) { + super("Line " + lineNo + " generated parser exception " + msg, throwable); + } } /** @@ -439,7 +445,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { // check to see if the value list is longer than the key list, which is a problem if (nGTKeys < GTValueSplitSize) - throw new StingException("Too few keys for compared to the value string " + sampleName + ", keys = " + parts[8] + " values = " + parts[genotypeOffset]); + throw new VCFParserException("Too few keys for compared to the value string " + sampleName + ", keys = " + parts[8] + " values = " + parts[genotypeOffset]); int genotypeAlleleLocation = -1; if (nGTKeys > 1) { @@ -449,7 +455,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { gtAttributes.put(genotypeKeyArray[i],"."); else if (genotypeKeyArray[i].equals("GT")) if (i != 0) - throw new StingException("Saw GT at position " + i + ", it must be at the first position for genotypes. At location = " + locAndAlleles.first); + throw new VCFParserException("Saw GT at position " + i + ", it must be at the first position for genotypes. At location = " + locAndAlleles.first); else genotypeAlleleLocation = i; else if (genotypeKeyArray[i].equals("GQ")) @@ -464,7 +470,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { validateFields(gtAttributes.keySet(), new ArrayList(formatFields.keySet())); } // check to make sure we found a gentoype field - if (genotypeAlleleLocation < 0) throw new StingException("Unable to find required field GT for record " + locAndAlleles.first); + if (genotypeAlleleLocation < 0) throw new VCFParserException("Unable to find required field GT for record " + locAndAlleles.first); // assuming allele list length in the single digits, could be bad. Check for > 1 for haploid genotypes boolean phased = GTValueArray[genotypeAlleleLocation].length() > 1 && GTValueArray[genotypeAlleleLocation].charAt(1) == '|'; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java index 7a37fee76..fcb251e39 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java @@ -84,6 +84,7 @@ public class CombineVariants extends RodWalker { Set headerLines = smartMergeHeaders(vcfRods.values()); headerLines.add(new VCFHeaderLine("source", "CombineVariants")); + headerLines.add(new VCFInfoHeaderLine("set", 1, VCFHeaderLineType.String, "Source VCF for the merged record in CombineVariants", VCFHeaderVersion.VCF4_0)); vcfWriter.writeHeader(new VCFHeader(headerLines, samples)); } @@ -123,13 +124,18 @@ public class CombineVariants extends RodWalker { String otherName = ((VCFFilterHeaderLine) other).getName(); if ( ! lineName.equals(otherName) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); - } else { - String lineName = ((VCFCompoundHeaderLine) line).getName(); - String otherName = ((VCFCompoundHeaderLine) other).getName(); + } else if ( line instanceof VCFCompoundHeaderLine ) { + VCFCompoundHeaderLine compLine = (VCFCompoundHeaderLine)line; + VCFCompoundHeaderLine compOther = (VCFCompoundHeaderLine)other; // if the names are the same, but the values are different, we need to quit - if (lineName.equals(otherName) && !line.equals(other)) + if (! (compLine).equalsExcludingDescription(compOther) ) throw new IllegalStateException("Incompatible header types, collision between these two types: " + line + " " + other ); + if ( ! compLine.getDescription().equals(compOther) ) + logger.warn(String.format("Allowing unequal description fields through: keeping " + compOther + " excluding " + compLine)); + } else { + // we are not equal, but we're not anything special either + logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other)); } } else { map.put(key, line); @@ -179,4 +185,4 @@ public class CombineVariants extends RodWalker { if ( vcfWriter != null ) vcfWriter.close(); } -} \ No newline at end of file +} diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index f0fff117e..f815608a4 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -364,11 +364,7 @@ public class VCFWriter { continue; - Object val; - if (g.hasAttribute(key)) - val = g.getAttribute(key); - else - val = new String(MISSING_GENOTYPE_FIELD); + Object val = g.hasAttribute(key) ? g.getAttribute(key) : MISSING_GENOTYPE_FIELD; // some exceptions if ( key.equals(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY) ) {