From a69ea9b57ca64f917122941b9598abfc353c3dd9 Mon Sep 17 00:00:00 2001 From: aaron Date: Fri, 16 Oct 2009 04:11:34 +0000 Subject: [PATCH] Cleaning up the VCF code, adding lots of tests for a variety of edge cases. Two issues are still outstanding: updating the no call string with the standard 1000g decided on today, and fixing Eric's issue where not all the VCF sample names are present initially. also: their, I hope your happy Eric, from now on I'll try not to flout my awesomest grammer in the future accept when I need to illicit a strong response :-) git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1858 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/RodVCF.java | 27 +-- .../walkers/variantstovcf/VariantsToVCF.java | 19 +-- .../genotype/vcf/VCFGenotypeEncoding.java | 122 ++++++++++++++ .../utils/genotype/vcf/VCFGenotypeRecord.java | 10 +- .../vcf/VCFGenotypeWriterAdapter.java | 10 +- .../utils/genotype/vcf/VCFParameters.java | 8 +- .../sting/utils/genotype/vcf/VCFReader.java | 8 +- .../sting/utils/genotype/vcf/VCFRecord.java | 120 +++++++------ .../sting/gatk/refdata/RodVCFTest.java | 2 +- .../genotype/vcf/VCFGenotypeEncodingTest.java | 151 +++++++++++++++++ .../utils/genotype/vcf/VCFReaderTest.java | 24 +-- .../utils/genotype/vcf/VCFRecordTest.java | 157 +++++++++++++++--- .../utils/genotype/vcf/VCFWriterTest.java | 16 +- 13 files changed, 544 insertions(+), 130 deletions(-) create mode 100644 java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java create mode 100644 java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingTest.java diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 3b6d1d76c..46322d3e0 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -4,8 +4,11 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.genotype.*; +import org.broadinstitute.sting.utils.genotype.BasicGenotype; +import org.broadinstitute.sting.utils.genotype.DiploidGenotype; import org.broadinstitute.sting.utils.genotype.Genotype; +import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; +import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeEncoding; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; @@ -115,8 +118,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, this.assertNotNull(); if (!mCurrentRecord.hasAlternateAllele()) return false; - for (String alt : this.mCurrentRecord.getAlternateAlleles()) { - if (alt.length() != 1) + for (VCFGenotypeEncoding alt : this.mCurrentRecord.getAlternateAlleles()) { + if (alt.getType() != VCFGenotypeEncoding.TYPE.SINGLE_BASE) return false; } return true; @@ -132,8 +135,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, this.assertNotNull(); if (!mCurrentRecord.hasAlternateAllele()) return false; - for (String alt : this.mCurrentRecord.getAlternateAlleles()) { - if (alt.startsWith("I")) + for (VCFGenotypeEncoding alt : this.mCurrentRecord.getAlternateAlleles()) { + if (alt.getType() == VCFGenotypeEncoding.TYPE.INSERTION) return true; } return false; @@ -149,8 +152,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, this.assertNotNull(); if (!mCurrentRecord.hasAlternateAllele()) return false; - for (String alt : this.mCurrentRecord.getAlternateAlleles()) { - if (alt.startsWith("D")) + for (VCFGenotypeEncoding alt : this.mCurrentRecord.getAlternateAlleles()) { + if (alt.getType() == VCFGenotypeEncoding.TYPE.DELETION) return true; } return false; @@ -208,7 +211,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, public String getAlternateBases() { if (!this.isBiallelic()) throw new UnsupportedOperationException("We're not biallelic, so please call getAlternateBaseList instead"); - return this.mCurrentRecord.getAlternateAlleles().get(0); + return this.mCurrentRecord.getAlternateAlleles().get(0).toString(); } /** @@ -218,7 +221,10 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, */ @Override public List getAlternateBaseList() { - return this.mCurrentRecord.getAlternateAlleles(); + List list = new ArrayList(); + for (VCFGenotypeEncoding enc : mCurrentRecord.getAlternateAlleles()) + list.add(enc.toString()); + return list; } /** @@ -240,7 +246,8 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, @Override public char getAlternativeBaseForSNP() { if (!isSNP()) throw new IllegalStateException("we're not a SNP"); - return mCurrentRecord.getAlternateAlleles().get(0).charAt(0); + if (mCurrentRecord.getAlternateAlleles().size() != 1) throw new UnsupportedOperationException("We're not a biallelic VCF site"); + return (mCurrentRecord.getAlternateAlleles().get(0).toString()).charAt(0); } /** diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 4157878af..229cbdc5d 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -14,10 +14,7 @@ import org.broadinstitute.sting.utils.cmdLine.Argument; import org.broadinstitute.sting.utils.genotype.Genotype; import org.broadinstitute.sting.utils.genotype.VariantBackedByGenotype; import org.broadinstitute.sting.utils.genotype.Variation; -import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFHeader; -import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; -import org.broadinstitute.sting.utils.genotype.vcf.VCFWriter; +import org.broadinstitute.sting.utils.genotype.vcf.*; import java.io.File; import java.io.PrintStream; @@ -103,7 +100,7 @@ public class VariantsToVCF extends RefWalker { int[] alleleNames = {0, 1, 2, 3}; double snpQual = 0.0; int refbase = BaseUtils.simpleBaseToBaseIndex(ref.getBase()); - List alts = new ArrayList(); + List alts = new ArrayList(); for (String name : vcfheader.getGenotypeSamples()) { ReferenceOrderedDatum rod = tracker.lookup(sampleNamesToRods.get(name), null); if (rod != null) { @@ -118,10 +115,10 @@ public class VariantsToVCF extends RefWalker { if (!(rod instanceof VariantBackedByGenotype)) throw new IllegalArgumentException("The passed in variant type must be backed by genotype data"); Genotype genotype = ((VariantBackedByGenotype) rod).getCalledGenotype(); - List alleles = new ArrayList(); + List alleles = new ArrayList(); for (char base : genotype.getBases().toCharArray()) { - alleles.add(String.valueOf(base)); - if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(String.valueOf(base)); + alleles.add(new VCFGenotypeEncoding(String.valueOf(base))); + if (base != ref.getBase() && !alts.contains(String.valueOf(base))) alts.add(new VCFGenotypeEncoding(String.valueOf(base))); } int allele1 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(0)); int allele2 = BaseUtils.simpleBaseToBaseIndex(genotype.getBases().charAt(1)); @@ -141,9 +138,9 @@ public class VariantsToVCF extends RefWalker { snpQual += av.getNegLog10PError(); } else { Map str = new HashMap(); - List alleles = new ArrayList(); - alleles.add(String.valueOf(ref.getBase())); - alleles.add(String.valueOf(ref.getBase())); + List alleles = new ArrayList(); + alleles.add(new VCFGenotypeEncoding(String.valueOf(ref.getBase()))); + alleles.add(new VCFGenotypeEncoding(String.valueOf(ref.getBase()))); gt.add(new VCFGenotypeRecord(name, alleles, VCFGenotypeRecord.PHASE.UNPHASED, str)); numRefs++; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java new file mode 100644 index 000000000..df300de35 --- /dev/null +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncoding.java @@ -0,0 +1,122 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + + +/** + * @author aaron + *

+ * Class VCFGenotypeEncoding + *

+ * basic encoding class for genotype fields in VCF + */ +public class VCFGenotypeEncoding { + public enum TYPE { + SINGLE_BASE, + INSERTION, + DELETION, + UNCALLED + } + + // our length (0 for SINGLE_BASE), our bases, and our type + private final int mLength; + private final String mBases; + private final TYPE mType; + + // public constructor, that parses out the base string + public VCFGenotypeEncoding(String baseString) { + if ((baseString.length() == 1)) { + // are we an empty (no-call) genotype? + if (baseString.equals(VCFGenotypeRecord.EMPTY_GENOTYPE)) { + mBases = VCFGenotypeRecord.EMPTY_GENOTYPE; + mLength = 0; + mType = TYPE.UNCALLED; + } else if (!validBases(baseString)) { + throw new IllegalArgumentException("Alleles of length 1 must be one of A,C,G,T, " + baseString + " was passed in"); + } else { // we're a valid base + mBases = baseString.toUpperCase(); + mLength = 0; + mType = TYPE.SINGLE_BASE; + } + } else { // deletion or insertion + if (baseString.length() < 1 || (baseString.toUpperCase().charAt(0) != 'D' && baseString.toUpperCase().charAt(0) != 'I')) { + throw new IllegalArgumentException("Genotype encoding of " + baseString + " was passed in, but is not a valid deletion, insertion, base, or no call (.)"); + } + if (baseString.toUpperCase().charAt(0) == 'D') { + mLength = Integer.valueOf(baseString.substring(1, baseString.length())); + mBases = ""; + mType = TYPE.DELETION; + } else { // we're an I + mBases = baseString.substring(1, baseString.length()).toUpperCase(); + if (!validBases(mBases)) + throw new IllegalArgumentException("The insertion base string contained invalid bases -> " + baseString); + mLength = mBases.length(); + mType = TYPE.INSERTION; + } + } + } + + public int getLength() { + return mLength; + } + + public String getBases() { + return mBases; + } + + public TYPE getType() { + return mType; + } + + public boolean equals(Object obj) { + if (obj != null && (obj.getClass().equals(this.getClass()))) { + VCFGenotypeEncoding d = (VCFGenotypeEncoding) obj; + return (mType == d.mType) && (mBases.equals(d.mBases)) && (mLength == d.mLength); + } + return false; + } + + public int hashCode() { + // our underlying data is immutable, so this is safe (we won't strand a value in a hashtable somewhere + // when the data changes underneath, altering this value). + String str = this.mBases + String.valueOf(this.mLength) + this.mType.toString(); + return str.hashCode(); + } + + /** + * dump the string representation of this genotype encoding + * + * @return + */ + public String toString() { + StringBuilder builder = new StringBuilder(); + switch (mType) { + case SINGLE_BASE: + case UNCALLED: + builder.append(mBases); + break; + case INSERTION: + builder.append("I"); + builder.append(mBases); + break; + case DELETION: + builder.append("D"); + builder.append(mLength); + break; + } + return builder.toString(); + } + + /** + * ensure that string contains valid bases + * + * @param bases the bases to check + * + * @return true if they're all either A,C,G,T; false otherwise + */ + private static boolean validBases(String bases) { + for (char c : bases.toUpperCase().toCharArray()) { + if (c != 'A' && c != 'C' && c != 'G' && c != 'T') + return false; + } + return true; + } +} \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index b1823531a..0b8ae0fd7 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -27,7 +27,7 @@ public class VCFGenotypeRecord { private PHASE mPhaseType; // our bases(s) - private final List mGenotypeAlleles = new ArrayList(); + private final List mGenotypeAlleles = new ArrayList(); // our mapping of the format mFields to values private final Map mFields = new HashMap(); @@ -43,7 +43,7 @@ public class VCFGenotypeRecord { * @param phasing * @param otherFlags */ - public VCFGenotypeRecord(String sampleName, List genotypes, PHASE phasing, Map otherFlags) { + public VCFGenotypeRecord(String sampleName, List genotypes, PHASE phasing, Map otherFlags) { this.mSampleName = sampleName; if (genotypes != null) this.mGenotypeAlleles.addAll(genotypes); this.mPhaseType = phasing; @@ -78,7 +78,7 @@ public class VCFGenotypeRecord { return mSampleName; } - public List getAlleles() { + public List getAlleles() { return mGenotypeAlleles; } @@ -86,10 +86,10 @@ public class VCFGenotypeRecord { return mFields; } - public String toGenotypeString(List altAlleles) { + public String toGenotypeString(List altAlleles) { String str = ""; boolean first = true; - for (String allele : mGenotypeAlleles) { + for (VCFGenotypeEncoding allele : mGenotypeAlleles) { str += String.valueOf((altAlleles.contains(allele)) ? altAlleles.indexOf(allele) + 1 : 0); if (first) { switch (mPhaseType) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 117f22b64..116f7e798 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -199,8 +199,8 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { map.put("GQ", String.format("%.2f", qual)); params.addFormatItem("GQ"); - List alleles = createAlleleArray(gtype); - for (String allele : alleles) { + List alleles = createAlleleArray(gtype); + for (VCFGenotypeEncoding allele : alleles) { params.addAlternateBase(allele); } @@ -218,10 +218,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { * * @return a list of string representing the string array of alleles */ - private List createAlleleArray(Genotype gtype) { - List alleles = new ArrayList(); + private List createAlleleArray(Genotype gtype) { + List alleles = new ArrayList(); for (char allele : gtype.getBases().toCharArray()) { - alleles.add(String.valueOf(allele)); + alleles.add(new VCFGenotypeEncoding(String.valueOf(allele))); } return alleles; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java index 1cd45dc4a..b4931fc6f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFParameters.java @@ -18,7 +18,7 @@ class VCFParameters { private boolean initialized = false; private List genotypesRecord = new ArrayList(); private List formatList = new ArrayList(); - private List alternateBases = new ArrayList(); + private List alternateBases = new ArrayList(); public void setLocations(GenomeLoc location, char refBase) { // if we haven't set it up, we initialize the object @@ -64,12 +64,12 @@ class VCFParameters { formatList.add(item); } - public void addAlternateBase(String base) { - if (!alternateBases.contains(String.valueOf(base)) && !base.equals(String.valueOf(this.getReferenceBase()))) + public void addAlternateBase(VCFGenotypeEncoding base) { + if (!alternateBases.contains(base) && !base.toString().equals(String.valueOf(this.getReferenceBase()))) alternateBases.add(base); } - public List getAlternateBases() { + public List getAlternateBases() { return alternateBases; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 33c56e05d..d84691339 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -216,7 +216,7 @@ public class VCFReader implements Iterator, Iterable { // parameters to create the VCF genotype record Map tagToValue = new HashMap(); VCFGenotypeRecord.PHASE phase = VCFGenotypeRecord.PHASE.UNKNOWN; - List bases = new ArrayList(); + List bases = new ArrayList(); String keyStrings[] = formatString.split(":"); for (String key : keyStrings) { @@ -262,15 +262,15 @@ public class VCFReader implements Iterator, Iterable { * @param referenceBase the reference base * @param bases the list of bases for this genotype call */ - private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { + private static void addAllele(String alleleNumber, String[] altAlleles, char referenceBase, List bases) { int alleleValue = Integer.valueOf(alleleNumber); // check to make sure the allele value is within bounds if (alleleValue < 0 || alleleValue > altAlleles.length) throw new IllegalArgumentException("VCFReader: the allele value of " + alleleValue + " is out of bounds given the alternate allele list."); if (alleleValue == 0) - bases.add(String.valueOf(referenceBase)); + bases.add(new VCFGenotypeEncoding(String.valueOf(referenceBase))); else - bases.add(altAlleles[alleleValue - 1]); + bases.add(new VCFGenotypeEncoding(altAlleles[alleleValue - 1])); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 78e8d99a1..dc0daca50 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -5,7 +5,9 @@ import org.broadinstitute.sting.utils.Utils; import java.util.*; -/** the basic VCF record type */ +/** + * the basic VCF record type + */ public class VCFRecord { // commonly used strings that are in the standard public static final String FORMAT_FIELD_SEPERATOR = ":"; @@ -24,7 +26,7 @@ public class VCFRecord { // our id; set to '.' if not available private String mID; // the alternate bases - private final List mAlts = new ArrayList(); + private final List mAlts = new ArrayList(); // our qual value private double mQual; // our filter string @@ -67,7 +69,7 @@ public class VCFRecord { String contig, int position, String ID, - List altBases, + List altBases, double qual, String filters, Map infoFields, @@ -77,7 +79,7 @@ public class VCFRecord { this.mChrome = contig; this.setPosition(position); this.mID = ID; - for (String alt : altBases) + for (VCFGenotypeEncoding alt : altBases) this.addAlternateBase(alt); this.setQual(qual); this.setFilterString(filters); @@ -121,7 +123,7 @@ public class VCFRecord { case ALT: String values[] = columnValues.get(val).split(","); for (String alt : values) - addAlternateBase(alt); + addAlternateBase(new VCFGenotypeEncoding(alt)); break; case QUAL: this.setQual(Double.valueOf(columnValues.get(val))); @@ -156,18 +158,24 @@ public class VCFRecord { return (mGenotypeFields.size() > 0); } - /** @return the string for the chromosome that this VCF record is associated with */ + /** + * @return the string for the chromosome that this VCF record is associated with + */ public String getChromosome() { return this.mChrome; } - /** @return this VCF records position on the specified chromosome */ + /** + * @return this VCF records position on the specified chromosome + */ public long getPosition() { return this.mPosition; } - /** @return the ID value for this record */ + /** + * @return the ID value for this record + */ public String getID() { return this.mID; } @@ -186,7 +194,7 @@ public class VCFRecord { * * @return an array of strings representing the alt alleles, or null if there are none */ - public List getAlternateAlleles() { + public List getAlternateAlleles() { return this.mAlts; } @@ -194,7 +202,9 @@ public class VCFRecord { return getAlternateAlleles().size() > 0; } - /** @return the phred-scaled quality score */ + /** + * @return the phred-scaled quality score + */ public double getQual() { return this.mQual; } @@ -206,7 +216,7 @@ public class VCFRecord { */ public String[] getFilteringCodes() { if (mFilterString == null) return new String[]{"0"}; - return this.mFilterString.split(";"); + return this.mFilterString.split(FILTER_CODE_SEPERATOR); } public boolean hasFilteringCodes() { @@ -227,7 +237,9 @@ public class VCFRecord { return this.mInfoFields; } - /** @return the number of columnsof data we're storing */ + /** + * @return the number of columnsof data we're storing + */ public int getColumnCount() { if (this.hasGenotypeData()) return mGenotypeFields.size() + VCFHeader.HEADER_FIELDS.values().length; return VCFHeader.HEADER_FIELDS.values().length; @@ -242,7 +254,9 @@ public class VCFRecord { return this.mGenotypeFields; } - /** @return a List of the sample names */ + /** + * @return a List of the sample names + */ public String[] getSampleNames() { String names[] = new String[mGenotypeFields.size()]; int index = 0; @@ -287,24 +301,26 @@ public class VCFRecord { this.mFilterString = mFilterString; } - public void addGenotypeFields(VCFGenotypeRecord mGenotypeFields) { + public void addGenotypeField(VCFGenotypeRecord mGenotypeFields) { this.mGenotypeFields.add(mGenotypeFields); } - public void addAlternateBase(String base) { - if (base.length() == 1) { - char nuc = (char) ((base.charAt(0) > 96) ? base.charAt(0) - 32 : base.charAt(0)); - if (nuc != 'A' && nuc != 'C' && nuc != 'T' && nuc != 'G' && nuc != '.') - throw new IllegalArgumentException("Alternate base must be either A,C,T,G,. or if an indel it must contain length information: " + base); - } else { - // we must be an indel, check that the first character is I or D - char nuc = (char) ((base.charAt(0) > 96) ? base.charAt(0) - 32 : base.charAt(0)); - if (nuc != 'I' && nuc != 'D') - throw new IllegalArgumentException("Alternate bases of length greater then one must be an indel: " + base); - } - this.mAlts.add(base); + /** + * add an alternate base to our alternate base list. All bases are uppercased + * before being added to the list. + * + * @param base the base to add + */ + public void addAlternateBase(VCFGenotypeEncoding base) { + if (!mAlts.contains(base)) mAlts.add(base); } + /** + * add an info field to the record + * + * @param key the key, from the spec or a user created key + * @param value it's value as a string + */ public void addInfoField(String key, String value) { this.mInfoFields.put(key, value); } @@ -312,31 +328,29 @@ public class VCFRecord { /** * the generation of a string representation, which is used by the VCF writer * + * @param header the VCF header for this VCF Record * @return a string */ public String toStringRepresentation(VCFHeader header) { StringBuilder builder = new StringBuilder(); // CHROM \t POS \t ID \t REF \t ALT \t QUAL \t FILTER \t INFO - builder.append(getChromosome() + FIELD_SEPERATOR); - builder.append(getPosition() + FIELD_SEPERATOR); - builder.append(getID() + FIELD_SEPERATOR); - builder.append(getReferenceBase() + FIELD_SEPERATOR); + builder.append(getChromosome()); + builder.append(FIELD_SEPERATOR); + builder.append(getPosition()); + builder.append(FIELD_SEPERATOR); + builder.append(getID()); + builder.append(FIELD_SEPERATOR); + builder.append(getReferenceBase()); + builder.append(FIELD_SEPERATOR); String alts = ""; - for (String str : this.getAlternateAlleles()) alts += str + ","; + for (VCFGenotypeEncoding str : this.getAlternateAlleles()) alts += str.toString() + ","; builder.append((alts.length() > 0) ? alts.substring(0, alts.length() - 1) + FIELD_SEPERATOR : "." + FIELD_SEPERATOR); - builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING,getQual()) + FIELD_SEPERATOR); - builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes()) + FIELD_SEPERATOR); - String info = ""; - for (String str : this.getInfoValues().keySet()) { - if (str.equals(EMPTY_INFO_FIELD)) - info = EMPTY_INFO_FIELD; - else - info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR; - } - - if (info.length() > 1) builder.append(info.substring(0, info.length() - 1)); - else builder.append(info); + builder.append(String.format(DOUBLE_PRECISION_FORMAT_STRING, getQual())); + builder.append(FIELD_SEPERATOR); + builder.append(Utils.join(FILTER_CODE_SEPERATOR, getFilteringCodes())); + builder.append(FIELD_SEPERATOR); + builder.append(createInfoString()); if (this.hasGenotypeData()) { addGenotypeData(builder, header); @@ -344,6 +358,22 @@ public class VCFRecord { return builder.toString(); } + /** + * create the info string + * + * @return a string representing the infomation fields + */ + protected String createInfoString() { + String info = ""; + for (String str : this.getInfoValues().keySet()) { + if (str.equals(EMPTY_INFO_FIELD)) + return EMPTY_INFO_FIELD; + else + info += str + "=" + getInfoValues().get(str) + INFO_FIELD_SEPERATOR; + } + return (info.contains(INFO_FIELD_SEPERATOR)) ? info.substring(0, info.lastIndexOf(INFO_FIELD_SEPERATOR)) : info; + } + /** * add the genotype data * @@ -358,9 +388,7 @@ public class VCFRecord { Map gMap = genotypeListToMap(getVCFGenotypeRecords()); for (String genotype : header.getGenotypeSamples()) { - builder.append(FIELD_SEPERATOR); - if (gMap.containsKey(genotype)) { VCFGenotypeRecord rec = gMap.get(genotype); if (!rec.toGenotypeString(this.mAlts).equals("")) @@ -386,7 +414,6 @@ public class VCFRecord { * compare two VCF records * * @param other the other VCF record - * * @return true if they're equal */ public boolean equals(VCFRecord other) { @@ -406,7 +433,6 @@ public class VCFRecord { * create a genotype mapping from a list and their sample names * * @param list a list of genotype samples - * * @return a mapping of the sample name to VCF genotype record */ private static Map genotypeListToMap(List list) { diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java index 9a90fe51e..0c77a5aea 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java @@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest { @Test public void testToString() { // slightly altered line, due to map ordering - String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n"; + final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tDP=258;AF=0.786;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n"; RodVCF vcf = getVCFObject(); VCFReader reader = new VCFReader(vcfFile); Iterator iter = vcf.createIterator("VCF", vcfFile); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingTest.java new file mode 100644 index 000000000..f33ba73f1 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeEncodingTest.java @@ -0,0 +1,151 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.BaseTest; +import org.junit.Assert; +import org.junit.Test; + + +/** + * @author aaron + *

+ * Class VCFGenotypeEncodingTest + *

+ * test the VCFGenotypeEncoding class + */ +public class VCFGenotypeEncodingTest extends BaseTest { + @Test + public void testDecodingSingle() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A"); + Assert.assertTrue("A".equals(enc.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("C"); + Assert.assertTrue("C".equals(enc2.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("G"); + Assert.assertTrue("G".equals(enc3.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc4 = new VCFGenotypeEncoding("T"); + Assert.assertTrue("T".equals(enc4.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc5 = new VCFGenotypeEncoding("a"); + Assert.assertTrue("A".equals(enc5.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc6 = new VCFGenotypeEncoding("c"); + Assert.assertTrue("C".equals(enc6.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc7 = new VCFGenotypeEncoding("g"); + Assert.assertTrue("G".equals(enc7.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + + VCFGenotypeEncoding enc8 = new VCFGenotypeEncoding("t"); + Assert.assertTrue("T".equals(enc8.toString())); + Assert.assertEquals(0, enc.getLength()); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.SINGLE_BASE, enc.getType()); + } + + @Test(expected = IllegalArgumentException.class) + public void testDecodingSingleBadBase() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("E"); + } + + @Test(expected = IllegalArgumentException.class) + public void testDecodingSingleWrongBase() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("I"); + } + + @Test + public void testValidIndel() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGGC"); + Assert.assertEquals(4, enc.getLength()); + Assert.assertTrue(enc.getBases().equals("AGGC")); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.INSERTION, enc.getType()); + } + + @Test(expected = IllegalArgumentException.class) + public void testBadIndel() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("IAGRC"); + } + + @Test + public void testValidDel() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("D40"); + Assert.assertEquals(40, enc.getLength()); + Assert.assertTrue(enc.getBases().equals("")); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.DELETION, enc.getType()); + } + + @Test(expected = IllegalArgumentException.class) + public void testBadDel() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("DAGCT"); + } + + @Test + public void testValidNoCall() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("."); + Assert.assertEquals(0, enc.getLength()); + Assert.assertTrue(enc.getBases().equals(".")); + Assert.assertEquals(VCFGenotypeEncoding.TYPE.UNCALLED, enc.getType()); + } + + @Test(expected = IllegalArgumentException.class) + public void testBadNoCall() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding(".."); + } + + @Test + public void testEquals() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A"); + VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A"); + VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C"); + Assert.assertTrue(enc.equals(enc2)); + Assert.assertTrue(!enc.equals(enc3)); + enc = new VCFGenotypeEncoding("D40"); + enc2 = new VCFGenotypeEncoding("D40"); + enc3 = new VCFGenotypeEncoding("D41"); + Assert.assertTrue(enc.equals(enc2)); + Assert.assertTrue(!enc.equals(enc3)); + enc = new VCFGenotypeEncoding("IAAC"); + enc2 = new VCFGenotypeEncoding("IAAC"); + enc3 = new VCFGenotypeEncoding("IACG"); + Assert.assertTrue(enc.equals(enc2)); + Assert.assertTrue(!enc.equals(enc3)); + enc = new VCFGenotypeEncoding("."); + enc2 = new VCFGenotypeEncoding("."); + Assert.assertTrue(enc.equals(enc2)); + } + + @Test + public void testHashCode() { + VCFGenotypeEncoding enc = new VCFGenotypeEncoding("A"); + VCFGenotypeEncoding enc2 = new VCFGenotypeEncoding("A"); + VCFGenotypeEncoding enc3 = new VCFGenotypeEncoding("C"); + Assert.assertTrue(enc.hashCode() == enc2.hashCode()); + Assert.assertTrue(enc.hashCode() != enc3.hashCode()); + enc = new VCFGenotypeEncoding("D40"); + enc2 = new VCFGenotypeEncoding("D40"); + enc3 = new VCFGenotypeEncoding("D41"); + Assert.assertTrue(enc.hashCode() == enc2.hashCode()); + Assert.assertTrue(enc.hashCode() != enc3.hashCode()); + enc = new VCFGenotypeEncoding("IAAC"); + enc2 = new VCFGenotypeEncoding("IAAC"); + enc3 = new VCFGenotypeEncoding("IACG"); + Assert.assertTrue(enc.hashCode() == enc2.hashCode()); + Assert.assertTrue(enc.hashCode() != enc3.hashCode()); + enc = new VCFGenotypeEncoding("."); + enc2 = new VCFGenotypeEncoding("."); + Assert.assertTrue(enc.hashCode() == enc2.hashCode()); + } +} diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index dc215b05a..2c9144cc2 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -37,12 +37,12 @@ public class VCFReaderTest extends BaseTest { public void testBasicParsing() { String formatString = "GT:B:C:D"; String genotypeString = "0|1:2:3:4"; - String altAlleles[] = {"A","C","G","T"}; - char referenceBase = 'N'; + String altAlleles[] = {"A","G","T"}; + char referenceBase = 'C'; VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase); Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType()); - Assert.assertEquals("N",rec.getAlleles().get(0)); - Assert.assertEquals("A",rec.getAlleles().get(1)); + Assert.assertEquals("C",rec.getAlleles().get(0).toString()); + Assert.assertEquals("A",rec.getAlleles().get(1).toString()); Map values = rec.getFields(); Assert.assertEquals(3,values.size()); Assert.assertTrue(values.get("B").equals("2")); @@ -58,12 +58,12 @@ public class VCFReaderTest extends BaseTest { public void testMissingFieldParsing() { String formatString = "GT:B:C:D"; String genotypeString = "0|1:::4"; - String altAlleles[] = {"A","C","G","T"}; - char referenceBase = 'N'; + String altAlleles[] = {"A","G","T"}; + char referenceBase = 'C'; VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase); Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType()); - Assert.assertEquals("N",rec.getAlleles().get(0)); - Assert.assertEquals("A",rec.getAlleles().get(1)); + Assert.assertEquals("C",rec.getAlleles().get(0).toString()); + Assert.assertEquals("A",rec.getAlleles().get(1).toString()); Map values = rec.getFields(); Assert.assertEquals(3,values.size()); Assert.assertTrue(values.get("B").equals("")); @@ -78,12 +78,12 @@ public class VCFReaderTest extends BaseTest { public void testMissingAllFields() { String formatString = "GT:B:C:D"; String genotypeString = "0|1:::"; - String altAlleles[] = {"A","C","G","T"}; - char referenceBase = 'N'; + String altAlleles[] = {"A","G","T"}; + char referenceBase = 'C'; VCFGenotypeRecord rec = VCFReader.getVCFGenotype("test",formatString,genotypeString,altAlleles,referenceBase); Assert.assertEquals(VCFGenotypeRecord.PHASE.PHASED,rec.getPhaseType()); - Assert.assertEquals("N",rec.getAlleles().get(0)); - Assert.assertEquals("A",rec.getAlleles().get(1)); + Assert.assertEquals("C",rec.getAlleles().get(0).toString()); + Assert.assertEquals("A",rec.getAlleles().get(1).toString()); Map values = rec.getFields(); Assert.assertEquals(3,values.size()); Assert.assertTrue(values.get("B").equals("")); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java index 4a44d6e99..d49c7278c 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java @@ -11,36 +11,147 @@ import java.util.Map; /** - * - * @author aaron - * - * Class VCFRecordTest - * - * test the basic functionality of the vcf record + * @author aaron + *

+ * Class VCFRecordTest + *

+ * test the basic functionality of the vcf record */ public class VCFRecordTest extends BaseTest { - private VCFRecord makeFakeVCFRecord() { - List altBases = new ArrayList(); - altBases.add("C"); - altBases.add("D1"); - Map infoFields = new HashMap(); - infoFields.put("DP","50"); + /** + * create a fake VCF record + * + * @return a VCFRecord + */ + private static VCFRecord makeFakeVCFRecord(Map infoFields) { + List altBases = new ArrayList(); + altBases.add(new VCFGenotypeEncoding("C")); + altBases.add(new VCFGenotypeEncoding("D1")); List genotypeObjects = new ArrayList(); - Map keyValues = new HashMap(); - keyValues.put("AA","2"); - List Alleles = new ArrayList(); - Alleles.add("A"); - genotypeObjects.add(new VCFGenotypeRecord("SampleName", Alleles, VCFGenotypeRecord.PHASE.PHASED, keyValues)); - return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",genotypeObjects); + genotypeObjects.add(createGenotype("sample1", "A", "A")); + return new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", infoFields, "GT:AA", genotypeObjects); + } + + /** + * create a fake VCF genotype record + * + * @param name the name of the sample + * @param Allele1 the first allele + * @param Allele2 the second allele + * @return a VCFGenotypeRecord + */ + private static VCFGenotypeRecord createGenotype(String name, String Allele1, String Allele2) { + Map keyValues = new HashMap(); + keyValues.put("AA", "2"); + List Alleles = new ArrayList(); + Alleles.add(new VCFGenotypeEncoding(Allele1)); + Alleles.add(new VCFGenotypeEncoding(Allele2)); + return new VCFGenotypeRecord(name, Alleles, VCFGenotypeRecord.PHASE.PHASED, keyValues); + } + + @Test + public void testAddReduntantAlts() { + List altBases = new ArrayList(); + altBases.add(new VCFGenotypeEncoding("C")); + altBases.add(new VCFGenotypeEncoding("D1")); + altBases.add(new VCFGenotypeEncoding("D1")); + List genotypeObjects = new ArrayList(); + genotypeObjects.add(createGenotype("sample1", "A", "A")); + VCFRecord rec = new VCFRecord('A', "chr1", 1, "RANDOM", altBases, 0, ".", new HashMap(), "GT:AA", genotypeObjects); + Assert.assertEquals(2, rec.getAlternateAlleles().size()); + } + + @Test + public void testGetOneGenotype() { + Map infoFields = new HashMap(); + VCFRecord rec = makeFakeVCFRecord(infoFields); + List genotypeObjects = rec.getVCFGenotypeRecords(); + Assert.assertEquals(1, genotypeObjects.size()); + Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1")); + Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size()); + Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString()); + Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString()); + } + + @Test + public void testGetGenotypes() { + Map infoFields = new HashMap(); + VCFRecord rec = makeFakeVCFRecord(infoFields); + rec.addGenotypeField(createGenotype("sample2", "C", "A")); + List genotypeObjects = rec.getVCFGenotypeRecords(); + Assert.assertEquals(2, genotypeObjects.size()); + Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("sample1")); + Assert.assertEquals(2, genotypeObjects.get(0).getAlleles().size()); + Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(0).toString()); + Assert.assertEquals("A", genotypeObjects.get(0).getAlleles().get(1).toString()); + + // assert the second one + Assert.assertTrue(genotypeObjects.get(1).getSampleName().equals("sample2")); + Assert.assertEquals(2, genotypeObjects.get(1).getAlleles().size()); + Assert.assertEquals("C", genotypeObjects.get(1).getAlleles().get(0).toString()); + Assert.assertEquals("A", genotypeObjects.get(1).getAlleles().get(1).toString()); + + } + + @Test + public void testCreateInfoString() { + Map infoFields = new HashMap(); + VCFRecord rec = makeFakeVCFRecord(infoFields); + Assert.assertTrue(rec.createInfoString().equals(".")); + infoFields.put("DP", "50"); + VCFRecord rec2 = makeFakeVCFRecord(infoFields); + Assert.assertTrue(rec2.createInfoString().equals("DP=50")); + rec2.addInfoField("AB", "CD"); + Assert.assertTrue(rec2.createInfoString().equals("DP=50;AB=CD") || rec2.createInfoString().equals("AB=CD;DP=50")); } @Test - public void testGetGenotypes() { - VCFRecord rec = makeFakeVCFRecord(); - List genotypeObjects = rec.getVCFGenotypeRecords(); - Assert.assertEquals(1,genotypeObjects.size()); - Assert.assertTrue(genotypeObjects.get(0).getSampleName().equals("SampleName")); + public void testAddAlts() { + Map infoFields = new HashMap(); + VCFRecord rec = makeFakeVCFRecord(infoFields); + rec.addAlternateBase(new VCFGenotypeEncoding("T")); + rec.addAlternateBase(new VCFGenotypeEncoding("T")); + rec.addAlternateBase(new VCFGenotypeEncoding("T")); + rec.addAlternateBase(new VCFGenotypeEncoding("T")); + rec.addAlternateBase(new VCFGenotypeEncoding("T")); + Assert.assertEquals(3,rec.getAlternateAlleles().size()); } + + /** + * create a fake header of known quantity + * + * @return a fake VCF header + */ + public static VCFHeader createFakeHeader() { + Map metaData = new HashMap(); + List additionalColumns = new ArrayList(); + metaData.put("format", "VCRv3.2"); // required + metaData.put("two", "2"); + additionalColumns.add("FORMAT"); + additionalColumns.add("sample1"); + return new VCFHeader(metaData, additionalColumns); + } + + private static final String stringRep = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tDP=50\tGT:AA\t0|0:2"; + private static final String stringRep2 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2"; + //private static final String stringRep3 = "chr1\t1\tRANDOM\tA\tC,D1\t0.00\t.\tAB=CD;DP=50\tGT:AA\t0|0:2"; + + @Test + public void testStringRepresentation() { + Map infoFields = new HashMap(); + infoFields.put("DP", "50"); + VCFRecord rec = makeFakeVCFRecord(infoFields); + Map metaData = new HashMap(); + List additionalColumns = new ArrayList(); + String rep = rec.toStringRepresentation(createFakeHeader()); + Assert.assertTrue(stringRep.equals(rep)); + rec.addInfoField("AB", "CD"); + String rep2 = rec.toStringRepresentation(createFakeHeader()); + Assert.assertTrue(stringRep2.equals(rep2)); + //rec.addGenotypeField(createGenotype("sample3","A","D12")); + } + + } diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index 7d1a62345..8e7f793f1 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -24,7 +24,7 @@ public class VCFWriterTest extends BaseTest { /** test, using the writer and reader, that we can output and input a VCF file without problems */ @Test public void testBasicWriteAndRead() { - VCFHeader header = createFakeHeader(); + VCFHeader header = createFakeHeader(metaData,additionalColumns); VCFWriter writer = new VCFWriter(header,fakeVCFFile); writer.addRecord(createVCFRecord(header)); writer.addRecord(createVCFRecord(header)); @@ -45,7 +45,7 @@ public class VCFWriterTest extends BaseTest { * create a fake header of known quantity * @return a fake VCF header */ - private VCFHeader createFakeHeader() { + public static VCFHeader createFakeHeader(Map metaData, List additionalColumns) { metaData.put("format", "VCRv3.2"); // required metaData.put("two", "2"); additionalColumns.add("FORMAT"); @@ -60,9 +60,9 @@ public class VCFWriterTest extends BaseTest { * @return a VCFRecord */ private VCFRecord createVCFRecord(VCFHeader header) { - List altBases = new ArrayList(); - altBases.add("C"); - altBases.add("D1"); + List altBases = new ArrayList(); + altBases.add(new VCFGenotypeEncoding("C")); + altBases.add(new VCFGenotypeEncoding("D1")); Map infoFields = new HashMap(); infoFields.put("DP","50"); @@ -71,9 +71,9 @@ public class VCFWriterTest extends BaseTest { Map str = new HashMap(); str.put("bb","0"); - List myAlleles = new ArrayList(); - myAlleles.add("C"); - myAlleles.add("D1"); + List myAlleles = new ArrayList(); + myAlleles.add(new VCFGenotypeEncoding("C")); + myAlleles.add(new VCFGenotypeEncoding("D1")); gt.add(new VCFGenotypeRecord(name, myAlleles, VCFGenotypeRecord.PHASE.PHASED, str)); } return new VCFRecord('A',"chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt);