diff --git a/java/src/org/broad/tribble/vcf/VCF3Codec.java b/java/src/org/broad/tribble/vcf/VCF3Codec.java new file mode 100755 index 000000000..711b25c8d --- /dev/null +++ b/java/src/org/broad/tribble/vcf/VCF3Codec.java @@ -0,0 +1,124 @@ +package org.broad.tribble.vcf; + +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.exception.CodecLineParsingException; +import org.broad.tribble.util.LineReader; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Created by IntelliJ IDEA. + * User: delangel + * Date: Jul 13, 2010 + * Time: 3:57:01 PM + * To change this template use File | Settings | File Templates. + */ +public class VCF3Codec implements FeatureCodec { + + // we have to store the list of strings that make up the header until they're needed + private List headerStrings = new ArrayList(); + private VCFHeader header = null; + private VCFHeaderVersion version = VCFHeaderVersion.VCF3_3; + + + // some classes need to transform the line before + private LineTransform transformer = null; + + /** + * Fast path to get the location of the Feature for indexing + * @param line the input line to decode + * @return + */ + public Feature decodeLoc(String line) { + return reallyDecode(line, true); + } + + /** + * Decode a line as a Feature. + * + * @param line + * + * @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is + * a comment) + */ + public Feature decode(String line) { + return reallyDecode(line, false); + } + + private Feature reallyDecode(String line, boolean justLocationPlease ) { + // transform the line, if we have a transform to do + if (transformer != null) line = transformer.lineTransform(line); + if (line.startsWith("#")) + return null; + + // make a VCFRecord of the line and return it + VCFRecord rec = VCFReaderUtils.createRecord(line, header, justLocationPlease); + if ( ! justLocationPlease ) rec.setHeader(header); + return rec; + } + + /** + * Return the # of header lines for this file. We use this to parse out the header + * + * @return 0 + */ + public int readHeader(LineReader reader) { + String line = ""; + try { + while ((line = reader.readLine()) != null) { + if (line.startsWith("##")) { + headerStrings.add(line); + } + else if (line.startsWith("#")) { + headerStrings.add(line); + header = VCFReaderUtils.createHeader(headerStrings,version); + return headerStrings.size(); + } + else { + throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); + } + + } + } catch (IOException e) { + throw new RuntimeException("IO Exception ", e); + } + throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); + } + + /** + * @return VCFRecord.class + */ + public Class getFeatureType() { + return VCFRecord.class; + } + + public VCFHeader getHeader(Class clazz) throws ClassCastException { + if (!clazz.equals(VCFHeader.class)) + throw new ClassCastException("Unable to cast to expected type " + clazz + " from type " + VCFHeader.class); + return header; + } + + public static interface LineTransform { + public String lineTransform(String line); + } + + public LineTransform getTransformer() { + return transformer; + } + + public void setTransformer(LineTransform transformer) { + this.transformer = transformer; + } + + public VCFHeaderVersion getVersion() { + return version; + } + + public void setVersion(VCFHeaderVersion version) { + this.version = version; + } +} + diff --git a/java/src/org/broad/tribble/vcf/VCFCodec.java b/java/src/org/broad/tribble/vcf/VCFCodec.java index 0f6a378c0..510d09a6c 100644 --- a/java/src/org/broad/tribble/vcf/VCFCodec.java +++ b/java/src/org/broad/tribble/vcf/VCFCodec.java @@ -4,6 +4,7 @@ import org.broad.tribble.Feature; import org.broad.tribble.FeatureCodec; import org.broad.tribble.exception.CodecLineParsingException; import org.broad.tribble.util.LineReader; +import org.broadinstitute.sting.gatk.refdata.features.vcf4.VCF4Codec; import java.io.IOException; import java.util.ArrayList; @@ -18,110 +19,4 @@ import java.util.List; * * The codec for VCF, which relies on VCFReaderUtils to do most of the processing */ -public class VCFCodec implements FeatureCodec { - - // we have to store the list of strings that make up the header until they're needed - private List headerStrings = new ArrayList(); - private VCFHeader header = null; - private VCFHeaderVersion version = VCFHeaderVersion.VCF3_3; - - - // some classes need to transform the line before - private LineTransform transformer = null; - - /** - * Fast path to get the location of the Feature for indexing - * @param line the input line to decode - * @return - */ - public Feature decodeLoc(String line) { - return reallyDecode(line, true); - } - - /** - * Decode a line as a Feature. - * - * @param line - * - * @return Return the Feature encoded by the line, or null if the line does not represent a feature (e.g. is - * a comment) - */ - public Feature decode(String line) { - return reallyDecode(line, false); - } - - private Feature reallyDecode(String line, boolean justLocationPlease ) { - // transform the line, if we have a transform to do - if (transformer != null) line = transformer.lineTransform(line); - if (line.startsWith("#")) - return null; - - // make a VCFRecord of the line and return it - VCFRecord rec = VCFReaderUtils.createRecord(line, header, justLocationPlease); - if ( ! justLocationPlease ) rec.setHeader(header); - return rec; - } - - /** - * Return the # of header lines for this file. We use this to parse out the header - * - * @return 0 - */ - public int readHeader(LineReader reader) { - String line = ""; - try { - while ((line = reader.readLine()) != null) { - if (line.startsWith("##")) { - if ( line.startsWith("##fileformat") && ! line.startsWith("##fileformat=VCFv3" ) ) - throw new CodecLineParsingException("VCF codec can only parse VCF3 formated files. Your version line is " + line + ". If you want to parse VCF4, use VCF4 use VCF as the rod type"); - headerStrings.add(line); - } - else if (line.startsWith("#")) { - headerStrings.add(line); - header = VCFReaderUtils.createHeader(headerStrings,version); - return headerStrings.size(); - } - else { - throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); - } - - } - } catch (IOException e) { - throw new RuntimeException("IO Exception ", e); - } - throw new CodecLineParsingException("We never saw the required header line (starting with one #) for the input VCF file"); - } - - /** - * @return VCFRecord.class - */ - public Class getFeatureType() { - return VCFRecord.class; - } - - public VCFHeader getHeader(Class clazz) throws ClassCastException { - if (!clazz.equals(VCFHeader.class)) - throw new ClassCastException("Unable to cast to expected type " + clazz + " from type " + VCFHeader.class); - return header; - } - - public static interface LineTransform { - public String lineTransform(String line); - } - - public LineTransform getTransformer() { - return transformer; - } - - public void setTransformer(LineTransform transformer) { - this.transformer = transformer; - } - - public VCFHeaderVersion getVersion() { - return version; - } - - public void setVersion(VCFHeaderVersion version) { - this.version = version; - } -} +public class VCFCodec extends VCF4Codec {} diff --git a/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java index bb8d245ac..c78218b91 100644 --- a/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFCompoundHeaderLine.java @@ -82,7 +82,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF } protected VCFCompoundHeaderLine(String name, int count, VCFHeaderLineType type, String description, SupportedHeaderLineType lineType, VCFHeaderVersion version) { - super(lineType.toString(), "", version); + super(lineType.toString(), ""); this.name = name; this.count = count; this.type = type; @@ -98,7 +98,7 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF * */ protected VCFCompoundHeaderLine(String line, VCFHeaderVersion version, SupportedHeaderLineType lineType) { - super(lineType.toString(), "", version); + super(lineType.toString(), ""); Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Number","Type","Description")); name = mapping.get("ID"); count = version == VCFHeaderVersion.VCF4_0 ? diff --git a/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java index 9d9fd97eb..c3981d759 100755 --- a/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFFilterHeaderLine.java @@ -34,22 +34,17 @@ public class VCFFilterHeaderLine extends VCFHeaderLine implements VCFNamedHeader * @param version the vcf header version */ protected VCFFilterHeaderLine(String line, VCFHeaderVersion version) { - super("FILTER", "", version); + super("FILTER", ""); Map mapping = VCFHeaderLineTranslator.parseLine(version,line, Arrays.asList("ID","Description")); name = mapping.get("ID"); description = mapping.get("Description"); } protected String toStringEncoding() { - if (mVersion == VCFHeaderVersion.VCF3_3 || mVersion == VCFHeaderVersion.VCF3_2) - return String.format("FILTER=%s,\"%s\"", name, description); - else if (mVersion == VCFHeaderVersion.VCF4_0) { - Map map = new LinkedHashMap(); - map.put("ID", name); - map.put("Description", description); - return "FILTER=" + VCFHeaderLine.toStringEncoding(map); - } - else throw new RuntimeException("Unsupported VCFVersion " + mVersion); + Map map = new LinkedHashMap(); + map.put("ID", name); + map.put("Description", description); + return "FILTER=" + VCFHeaderLine.toStringEncoding(map); } public boolean equals(Object o) { diff --git a/java/src/org/broad/tribble/vcf/VCFHeader.java b/java/src/org/broad/tribble/vcf/VCFHeader.java index 2da95ea66..fc084062b 100644 --- a/java/src/org/broad/tribble/vcf/VCFHeader.java +++ b/java/src/org/broad/tribble/vcf/VCFHeader.java @@ -30,9 +30,6 @@ public class VCFHeader { // the header string indicator public static final String HEADER_INDICATOR = "#"; - // our header version - private VCFHeaderVersion version; - /** do we have genotying data? */ private boolean hasGenotypingData = false; @@ -70,7 +67,6 @@ public class VCFHeader { List toRemove = new ArrayList(); for ( VCFHeaderLine line : mMetaData ) if ( VCFHeaderVersion.isFormatString(line.getKey())) { - version = VCFHeaderVersion.toHeaderVersion(line.getValue(),line.getKey()); toRemove.add(line); } // remove old header lines for now, @@ -98,10 +94,7 @@ public class VCFHeader { */ public Set getMetaData() { Set lines = new LinkedHashSet(); - if (version == null) - lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString())); - else - lines.add(new VCFHeaderLine(version.getFormatString(), version.getVersionString())); + lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); lines.addAll(mMetaData); return lines; } @@ -129,19 +122,6 @@ public class VCFHeader { return HEADER_FIELDS.values().length + ((hasGenotypingData) ? mGenotypeSampleNames.size() + 1 : 0); } - /** - * convert the header to a new VCF version - * @param version the version to convert to - */ - public void setVersion(VCFHeaderVersion version) { - if (version.equals(this.version)) - return; // we're all set, do nothing - - // store the new version, and update each of the header lines - this.version = version; - for (VCFHeaderLine line : mMetaData) - line.setVersion(version); - } } diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java index f3dadccf5..b9ff7948f 100644 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java @@ -43,19 +43,6 @@ public class VCFHeaderLine implements Comparable { private String mKey = null; private String mValue = null; - protected VCFHeaderVersion mVersion = null; - - /** - * create a VCF header line - * - * @param key the key for this header line - * @param value the value for this header line - */ - public VCFHeaderLine(String key, String value, VCFHeaderVersion version) { - mKey = key; - mValue = value; - mVersion = version; - } /** * create a VCF header line @@ -66,7 +53,6 @@ public class VCFHeaderLine implements Comparable { public VCFHeaderLine(String key, String value) { mKey = key; mValue = value; - mVersion = VCFHeaderVersion.VCF3_3; } /** @@ -127,18 +113,6 @@ public class VCFHeaderLine implements Comparable { return toString().compareTo(other.toString()); } - /** - * set the version string, which resets the current stored string representation if the version changed - * @param version - */ - public void setVersion(VCFHeaderVersion version) { - if (!version.equals(this.mVersion)) this.stringRep = null; - this.mVersion = version; - } - - public VCFHeaderVersion getVersion() { - return mVersion; - } /** * create a string of a mapping pair for the target VCF version diff --git a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java index 9efe455c8..9eec59c56 100644 --- a/java/src/org/broad/tribble/vcf/VCFReaderUtils.java +++ b/java/src/org/broad/tribble/vcf/VCFReaderUtils.java @@ -17,7 +17,7 @@ public class VCFReaderUtils { * package protected so that the VCFReaderUtils can access this function * * @param headerStrings a list of header strings - * + * @param version Header version to parse * @return a VCF Header created from the list of stinrgs */ public static VCFHeader createHeader(List headerStrings, VCFHeaderVersion version) { @@ -53,7 +53,7 @@ public class VCFReaderUtils { else { int equals = str.indexOf("="); if ( equals != -1 ) - metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1),version)); + metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1))); } } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java index 014a3b5c5..9b78d3b8b 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java @@ -25,12 +25,11 @@ import java.util.*; */ public class VCF4Codec implements FeatureCodec, NameAwareCodec { - // a variant context flag for original allele strings - public static final String ORIGINAL_ALLELE_LIST = "ORIGINAL_ALLELE_LIST"; // we have to store the list of strings that make up the header until they're needed private VCFHeader header = null; + private VCFHeaderVersion version = VCFHeaderVersion.VCF4_0; // used to convert the index of the alternate allele in genotypes to a integer index private static int ZERO_CHAR = (byte)'0'; @@ -66,6 +65,9 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { private int lineNo = 0; + // some classes need to transform the line before + private LineTransform transformer = null; + /** * this method is a big hack, since I haven't gotten to updating the VCF header for the 4.0 updates * @param reader the line reader to take header lines from @@ -77,14 +79,22 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { String line = ""; try { + boolean foundHeaderVersion = false; while ((line = reader.readLine()) != null) { lineNo++; if (line.startsWith("##")) { - if ( line.startsWith("##fileformat") && ! line.startsWith("##fileformat=VCFv4" ) ) - throw new CodecLineParsingException("VCF4 codec can only parse VCF4 formated files. Your version line is " + line + ". If you want VCF3 parsing, use VCF as the rod type."); + String[] lineFields = line.substring(2).split("="); + if (lineFields.length == 2 && + VCFHeaderVersion.isVersionString(lineFields[1]) && VCFHeaderVersion.isFormatString(lineFields[0])) { + foundHeaderVersion = true; + this.version = VCFHeaderVersion.toHeaderVersion(lineFields[1]); + } headerStrings.add(line); } else if (line.startsWith("#")) { + if (!foundHeaderVersion) { + throw new CodecLineParsingException("We never saw a header line specifying VCF version"); + } return createHeader(headerStrings, line); } else { @@ -107,7 +117,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { */ public int createHeader(List headerStrings, String line) { headerStrings.add(line); - header = VCFReaderUtils.createHeader(headerStrings, VCFHeaderVersion.VCF4_0); + header = VCFReaderUtils.createHeader(headerStrings, this.version); // load the parsing fields Set headerLines = header.getMetaData(); @@ -226,12 +236,14 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { String[] split = str.split(","); for (String substring : split) { VCFHeaderLineType type = infoFields.get(key); - objects.add(type != null ? type.convert(substring,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : substring); +// objects.add(type != null ? type.convert(substring,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : substring); + objects.add(substring); } value = objects; } else { VCFHeaderLineType type = infoFields.get(key); - value = type != null ? type.convert(str,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : str; + //value = type != null ? type.convert(str,VCFCompoundHeaderLine.SupportedHeaderLineType.INFO) : str; + value = str; } //System.out.printf("%s %s%n", key, value); } else { @@ -269,17 +281,16 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { * @param qualString the quality string * @return return a double */ - private static Double parseQual(String qualString) { - // todo -- remove double once we deal with annoying VCFs from 1KG - if ( qualString.equals(".") ) - return VariantContext.NO_NEG_LOG_10PERROR; - else { - double q = Double.valueOf(qualString); - if ( q == -1 ) - return VariantContext.NO_NEG_LOG_10PERROR; - else - return Double.valueOf(qualString) / 10; - } + private Double parseQual(String qualString) { + if (qualString.equals(VCFConstants.MISSING_VALUE_v4)) + return VariantContext.NO_NEG_LOG_10PERROR; + else { + double q = Double.valueOf(qualString); + if ( q == -1 ) + return VariantContext.NO_NEG_LOG_10PERROR; + else + return Double.valueOf(qualString) / 10; + } } /** @@ -346,7 +357,14 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { Set fFields; // a PASS is simple (no filters) - if ( filterString.equals("PASS") ) { + String passString = VCFConstants.PASSES_FILTERS_v3; + if (this.version == VCFHeaderVersion.VCF4_0) + passString = VCFConstants.PASSES_FILTERS_v4; + + if ( filterString.equals(passString) ) { + return null; + } + if ( filterString.equals(VCFConstants.UNFILTERED)) { return null; } // else do we have the filter string cached? @@ -373,6 +391,7 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { * parse out the VCF line * * @param parts the parts split up + * @param parseGenotypes whether to parse genotypes or not * @return a variant context object */ private VariantContext parseVCFLine(String[] parts, boolean parseGenotypes) { @@ -398,7 +417,8 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { // find out our current location, and clip the alleles down to their minimum length Pair> locAndAlleles; if ( !isSingleNucleotideEvent(alleles) ) { - attributes.put(ORIGINAL_ALLELE_LIST,alleles); + if (this.version != VCFHeaderVersion.VCF4_0) + throw new VCFParserException("Saw Indel/non SNP event on a VCF 3.3 or earlier file. Please convert file to VCF4.0 with VCFTools before using the GATK on it"); locAndAlleles = clipAlleles(contig, pos, ref, alleles); } else { locAndAlleles = new Pair>(GenomeLocParser.createGenomeLoc(contig, pos), alleles); @@ -469,8 +489,14 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { if (nGTKeys >= 1) { gtAttributes = new HashMap(nGTKeys - 1); for (int i = 0; i < nGTKeys; i++) { - if (i >= GTValueSplitSize) - gtAttributes.put(genotypeKeyArray[i],"."); + if (i >= GTValueSplitSize) { + if (genotypeKeyArray[i].equals("GQ")) + GTQual = parseQual(VCFConstants.MISSING_VALUE_v4); + else if (genotypeKeyArray[i].equals("FT")) // deal with genotype filters here + genotypeFilters = parseFilters(VCFConstants.MISSING_VALUE_v4); + else + gtAttributes.put(genotypeKeyArray[i],VCFConstants.MISSING_VALUE_v4); + } else if (genotypeKeyArray[i].equals("GT")) if (i != 0) throw new VCFParserException("Saw GT at position " + i + ", it must be at the first position for genotypes. At location = " + locAndAlleles.first); @@ -480,9 +506,11 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { GTQual = parseQual(GTValueArray[i]); else if (genotypeKeyArray[i].equals("FT")) // deal with genotype filters here genotypeFilters = parseFilters(GTValueArray[i]); - else + else { + if (this.version != VCFHeaderVersion.VCF4_0 && GTValueArray[i].equals(VCFConstants.MISSING_GENOTYPE_QUALITY_v3)) + GTValueArray[i] = VCFConstants.MISSING_VALUE_v4; gtAttributes.put(genotypeKeyArray[i], GTValueArray[i]); - + } } // validate the format fields validateFields(gtAttributes.keySet(), new ArrayList(formatFields.keySet())); @@ -517,7 +545,22 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { static Pair> clipAlleles(String contig, long position, String ref, List unclippedAlleles) { List newAlleleList = new ArrayList(); - // find the preceeding string common to all alleles and the reference +/* //+ + boolean clipping = true; + int forwardClipping = 0; + while(clipping) { + for (Allele a : unclippedAlleles) { + if (a.length()-forwardClipping == 0) + clipping = false; + else if (a.getBases()[forwardClipping] != ref.getBytes()[forwardClipping]) + clipping = false; + if (clipping) forwardClipping++; + } + + + } + //- + */ // find the preceeding string common to all alleles and the reference boolean clipping = true; for (Allele a : unclippedAlleles) if (a.length() < 1 || (a.getBases()[0] != ref.getBytes()[0])) { @@ -576,6 +619,19 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { return name; } + public static interface LineTransform { + public String lineTransform(String line); + } + + public LineTransform getTransformer() { + return transformer; + } + + public void setTransformer(LineTransform transformer) { + this.transformer = transformer; + } + + /** * set the name of this codec * @param name @@ -583,4 +639,6 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { public void setName(String name) { this.name = name; } + + } diff --git a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java index f6945c7d2..573668020 100755 --- a/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java +++ b/java/src/org/broadinstitute/sting/oneoffprojects/walkers/VCF4WriterTestWalker.java @@ -62,7 +62,7 @@ public class VCF4WriterTestWalker extends RodWalker { protected static String line = null; final TreeSet samples = new TreeSet(); - VCF4Codec vcf4codec = new VCF4Codec(); + VCFCodec vcf4codec = new VCFCodec(); /** @@ -86,32 +86,25 @@ public class VCF4WriterTestWalker extends RodWalker { for( final ReferenceOrderedDataSource source : dataSources ) { final RMDTrack rod = source.getReferenceOrderedData(); if(rod.getName().equalsIgnoreCase(INPUT_ROD_NAME)) { - if (rod.getType().equals(vcf4codec.getClass())) { - try { - AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath())); - int lineNumber = vcf4codec.readHeader(lineReader); - out.printf("Read %d header lines%n", lineNumber); + try { + AsciiLineReader lineReader = new AsciiLineReader(new FileInputStream(rod.getFile().getAbsolutePath())); + int lineNumber = vcf4codec.readHeader(lineReader); + out.printf("Read %d header lines%n", lineNumber); - header = vcf4codec.getHeader(VCFHeader.class); - } - catch (FileNotFoundException e ) { - throw new StingException(e.getMessage()); - } - } else { - final VCFReader reader = new VCFReader(rod.getFile()); - final Set vcfSamples = reader.getHeader().getGenotypeSamples(); - samples.addAll(vcfSamples); - reader.close(); - header = new VCFHeader(hInfo, samples); + header = vcf4codec.getHeader(VCFHeader.class); } + catch (FileNotFoundException e ) { + throw new StingException(e.getMessage()); + } + + final Set vcfSamples = header.getGenotypeSamples(); + samples.addAll(vcfSamples); + } } - if ( header != null ) - header.setVersion(VCFHeaderVersion.VCF4_0); - vcfWriter.writeHeader(header); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java index 6f8d0384c..1c7ba925f 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/ProduceBeagleInputWalker.java @@ -86,10 +86,10 @@ public class ProduceBeagleInputWalker extends RodWalker { if ( vc_eval == null || vc_eval.isFiltered() ) return 0; - if (vc_eval.getType() != VariantContext.Type.SNP) + if (!vc_eval.isSNP()) return 0; - if (vc_eval.getAlleles().size()!= 2) + if (!vc_eval.isBiallelic()) return 0; // output marker ID to Beagle input file diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index bb5516e63..98c74c8d8 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.genotype.vcf; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.utils.GenomeLocParser; @@ -61,7 +62,7 @@ public class VCFUtils { List dataSources = toolkit.getRodDataSources(); for ( ReferenceOrderedDataSource source : dataSources ) { RMDTrack rod = source.getReferenceOrderedData(); - if ( rod.getRecordType().equals(VCFRecord.class) ) { + if ( rod.getRecordType().equals(VariantContext.class) ) { fields.addAll(rod.getHeader(VCFHeader.class).getMetaData()); } } @@ -139,7 +140,7 @@ public class VCFUtils { if ( map.containsKey(key) ) { VCFHeaderLine other = map.get(key); - if ( line.equals(other) || line.getVersion() != VCFHeaderVersion.VCF4_0 ) // todo -- remove me when everything is 4 + if ( line.equals(other) ) continue; else if ( ! line.getClass().equals(other.getClass()) ) throw new IllegalStateException("Incompatible header types: " + line + " " + other ); @@ -173,7 +174,6 @@ public class VCFUtils { if ( logger != null ) logger.warn(String.format("Ignoring header line already in map: this header line = " + line + " already present header = " + other)); } } else { - line.setVersion(VCFHeaderVersion.VCF4_0); // todo -- remove this when we finally have vcf3/4 unified headers map.put(key, line); //System.out.printf("Adding header line %s%n", line); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index ee49c95e9..51cfb044f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -201,9 +201,6 @@ public class VCFWriter { String paddingBases = ""; String trailingBases = ""; - ArrayList originalAlleles = (ArrayList)vc.getAttribute("ORIGINAL_ALLELE_LIST"); - - // search for reference allele and find trailing and padding at the end. // See first if all alleles have a base encoding (ie no deletions) // if so, add one common base to all alleles (reference at this location) @@ -217,50 +214,12 @@ public class VCFWriter { refString = new String(a.getBases()); } - if (originalAlleles != null) { - // if original allele info was filled when reading from a VCF4 file, - // determine whether there was a padding base(s) at the beginning and end. - byte previousBase = 0; - int cnt=0; - boolean firstBaseCommonInAllAlleles = true; - Allele originalReferenceAllele = null; - for (Allele originalAllele : originalAlleles){ - // if first base of allele is common to all of them, there may have been a common base deleted from all - byte firstBase = originalAllele.getBases()[0]; - if (cnt > 0) { - if (firstBase != previousBase) - firstBaseCommonInAllAlleles = false; - } - previousBase = firstBase; - cnt++; - - if (originalAllele.isReference()) - originalReferenceAllele = originalAllele; - } - - numTrailingBases = (firstBaseCommonInAllAlleles)? 1:0; - position -= numTrailingBases; - - if (originalReferenceAllele == null) - throw new IllegalStateException("At least one Allele must be reference"); - - String originalRef = new String(originalReferenceAllele.getBases()); - numPaddingBases = originalRef.length()-refString.length()-numTrailingBases; - - if (numTrailingBases > 0) { - trailingBases = originalRef.substring(0,numTrailingBases); - } - if (numPaddingBases > 0) - paddingBases = originalRef.substring(originalRef.length()-numPaddingBases,originalRef.length()); - } - else { - // Case where there is no original allele info, e.g. when reading from VCF3.3 or when vc was produced by the GATK. - if (!hasBasesInAllAlleles) { - trailingBases = new String(refBases); - numTrailingBases = 1; - position--; - } + // Case where there is no original allele info, e.g. when reading from VCF3.3 or when vc was produced by the GATK. + if (!hasBasesInAllAlleles) { + trailingBases = new String(refBases); + numTrailingBases = 1; + position--; } @@ -295,6 +254,7 @@ public class VCFWriter { // this needs to be done in case all samples are no-calls vcfGenotypeAttributeKeys.add(VCFConstants.GENOTYPE_KEY); } + String genotypeFormatString = Utils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, vcfGenotypeAttributeKeys); List genotypeObjects = new ArrayList(vc.getGenotypes().size()); @@ -378,10 +338,6 @@ public class VCFWriter { if ( key.equals("ID") ) continue; - // Original alleles are not for reporting but only for internal bookkeeping - if (key.equals("ORIGINAL_ALLELE_LIST")) - continue; - String outputValue = formatVCFField(key, elt.getValue()); if ( outputValue != null ) infoFields.put(key, outputValue); @@ -587,14 +543,21 @@ public class VCFWriter { Set keys = new HashSet(); boolean sawGoodQual = false; + boolean sawGenotypeFilter = false; for ( Genotype g : vc.getGenotypes().values() ) { keys.addAll(g.getAttributes().keySet()); if ( g.hasNegLog10PError() ) sawGoodQual = true; + if (g.isFiltered() && g.isCalled()) + sawGenotypeFilter = true; } if ( sawGoodQual ) keys.add(VCFConstants.GENOTYPE_QUALITY_KEY); + + if (sawGenotypeFilter) + keys.add(VCFConstants.GENOTYPE_FILTER_KEY); + return Utils.sorted(new ArrayList(keys)); } diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java index 81aa76909..0a222e27f 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4UnitTest.java @@ -205,20 +205,20 @@ public class VCF4UnitTest extends BaseTest { String clippedAlleleLine = "20\t14370\trs6054257\tGGG\tG\t29\tPASS\tNS=3;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|1:48:1:51,51\t0|0:48:1:51,51\t0|0:48:1:51,51"; @Test public void testClippedAllelesAddedAsAnnotation() { - TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); + // TODO - either modify or kill this test +/* TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); VariantContext context = (VariantContext)testSetup.codec.decode(clippedAlleleLine); Assert.assertTrue(context.hasAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST)); List alleles = (List)context.getAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST); Assert.assertEquals("Expected allele list of 2, got " + alleles.size(),2,alleles.size()); Assert.assertTrue(alleles.get(0).basesMatch("GGG")); Assert.assertTrue(alleles.get(1).basesMatch("G")); - } + */ } // test that when we don't clip the alleles, we don't see the annotation @Test public void testNoClippedAllelesNoAddedAsAnnotation() { TestSetup testSetup = new TestSetup().invoke(vcfGenotypeFile); VariantContext context = (VariantContext)testSetup.codec.decode(twoFewInfoLine); - Assert.assertTrue(!context.hasAttribute(VCF4Codec.ORIGINAL_ALLELE_LIST)); } // test that we're getting the right genotype for a multi-base polymorphism diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java index 696ee9526..8eabe0d6d 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/VariantsToVCFIntegrationTest.java @@ -70,11 +70,11 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingVCFInput() { List md5 = new ArrayList(); - md5.add("b423141ca600d581dc73e9b3dff4f782"); + md5.add("919eb499bfcc980a14825a0265e575e3"); WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-R " + oneKGLocation + "reference/human_b36_both.fasta" + - " -B variant,VCF," + validationDataLocation + "complexExample.vcf" + + " -B variant,VCF," + validationDataLocation + "complexExample.vcf4" + " -T VariantsToVCF" + " -o %s", 1, // just one output file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index 093a1fdaa..3124e805b 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -16,9 +16,9 @@ public class FastaAlternateReferenceIntegrationTest extends WalkerTest { executeTest("testFastaReference", spec1); WalkerTestSpec spec2 = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B indels,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", + "-T FastaAlternateReferenceMaker -R " + oneKGLocation + "reference/human_b36_both.fasta -B indels,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 -B snpmask,dbsnp,/humgen/gsa-scr1/GATK_Data/dbsnp_129_b36.rod -L 1:10,075,000-10,075,380;1:10,093,447-10,093,847;1:10,271,252-10,271,452 -o %s", 1, - Arrays.asList("3a48986c3832a768b478c3e95f994b0f")); + Arrays.asList("a7bfc673eaa202a668c1424a933671ad")); executeTest("testFastaAlternateReferenceIndels", spec2); WalkerTestSpec spec4 = new WalkerTestSpec( diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index 1d48e0b7e..ac1643ced 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("fa522e5b8c89c5a465ca03dd88bc2b8f")); + Arrays.asList("2385975931cd06fca452655bebf5c379")); executeTest("test clustered SNPs", spec); } @@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testMask() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -mask foo -B mask,VCF," + validationDataLocation + "vcfexample2.vcf -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("4d8a184417289c325410a5e545d9fc80")); + Arrays.asList("6743efa09985206819adcf8eaf5ff936")); executeTest("test mask", spec); } @@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("d48062f139d853a61e4f0bfb76bd695d")); + Arrays.asList("e054f57d3794ce8a57cae92f16886cf0")); executeTest("test filter #1", spec); } @@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("791f89c6da7bdde79913754d9ddb72eb")); + Arrays.asList("db09a1f7ff523087ea0f6a56f3febfe7")); executeTest("test filter #2", spec); } @@ -56,7 +56,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilterWithSeparateNames() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --filterName ABF -filter 'AlleleBalance < 70.0' --filterName FSF -filter 'FisherStrand == 1.4' -B variant,VCF," + validationDataLocation + "vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("174caf880477f3edf68fe6722f227017")); + Arrays.asList("edbd505d8d55b4ba71f99d7006871db0")); executeTest("test filter with separate names #2", spec); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 31839e320..0a6f2e660 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("9ebe61dcb5112e7e745412d7767d101a")); + Arrays.asList("e3f402fbbb6bbb4f60b1aa0549989d85")); executeTest("testConfidence2", spec2); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index b441ff453..d9eb04bc1 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -29,9 +29,9 @@ public class IndelRealignerIntegrationTest extends WalkerTest { @Test public void testRealignerKnownsOnly() { - String[] md5s = {"7084d4e543bc756730ab306768028530", "1091436c40d5ba557d85662999cc0c1d"}; + String[] md5s = {"83bc0c9a7d8872b552c6cbd994672c3b", "92bf331b672a03d63c26e4d9d3615f5b"}; WalkerTestSpec spec = new WalkerTestSpec( - "-T IndelRealigner -noPG -LOD 1.0 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10076000 -compress 1 -targetIntervals " + validationDataLocation + "NA12878.indels.intervals -B knownIndels,VCF," + validationDataLocation + "NA12878.indels.vcf -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -knownsOnly", + "-T IndelRealigner -noPG -LOD 1.0 -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10023000-10076000 -compress 1 -targetIntervals " + validationDataLocation + "NA12878.indels.intervals -B knownIndels,VCF," + validationDataLocation + "NA12878.indels.vcf4 -O %s -stats %s --sortInCoordinateOrderEvenThoughItIsHighlyUnsafe -knownsOnly", 2, Arrays.asList(md5s)); executeTest("test realigner known indels only", spec); diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java index 9a3ff8d3d..9722071c2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/PickSequenomProbesIntegrationTest.java @@ -8,10 +8,10 @@ import java.util.Arrays; public class PickSequenomProbesIntegrationTest extends WalkerTest { @Test public void testProbes() { - String testVCF = validationDataLocation + "complexExample.vcf"; + String testVCF = validationDataLocation + "complexExample.vcf4"; String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T PickSequenomProbes -L 1:10,000,000-11,000,000 -B input,VCF,"+testVCF+" -o %s"; WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("0f356354a4a78ff62b2848431ec11262")); + Arrays.asList("46d2acea95d36725db63af61ee963ce6")); executeTest("Test probes", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java index 22e084659..675aa0fa2 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/sequenom/SequenomValidationConverterIntegrationTest.java @@ -11,7 +11,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest { String testPedFile = validationDataLocation + "Sequenom_Test_File.txt"; String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s"; WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("2e273d400b4b69e39c34e465b200b192")); + Arrays.asList("2dab4630f40b76c0762de83fcbb60d09")); executeTest("Test SNPs", spec); } @@ -20,7 +20,7 @@ public class SequenomValidationConverterIntegrationTest extends WalkerTest { String testPedFile = validationDataLocation + "pilot2_indel_validation.renamed.ped"; String testArgs = "-R "+oneKGLocation+"reference/human_b36_both.fasta -T SequenomValidationConverter -B sequenom,Plink,"+testPedFile+" -o %s"; WalkerTest.WalkerTestSpec spec = new WalkerTestSpec(testArgs, 1, - Arrays.asList("e15a63fc49ec25ebcae60a28a5f3f830")); + Arrays.asList("845b9a15ac947052ddded5b79228e5ec")); executeTest("Test Indels", spec); } } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index cdcd18fe0..65f70f7b8 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -37,7 +37,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { @Test public void testVariantRecalibrator() { HashMap e = new HashMap(); - e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "1f7adb28007d77e65c02112480f56663" ); + e.put( validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf", "a1acb90f0695cbe33c290403113ac3e1" ); for ( Map.Entry entry : e.entrySet() ) { String vcf = entry.getKey(); diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java index 0f920c27f..7bd2dabeb 100644 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/validation/RodSystemValidationIntegrationTest.java @@ -35,9 +35,9 @@ public class RodSystemValidationIntegrationTest extends WalkerTest { public void testComplexVCFPileup() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString1KG() + " -B eval,VCF," + validationDataLocation + "MultiSample.vcf" + - " -B eval2,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf" + " -B eval2,VCF," + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4" , 1, - Arrays.asList("c775c995c9fc09c66db51a694511d07b")); + Arrays.asList("216b5de6f58be6cf3286ed5261772904")); executeTest("testComplexVCFPileup", spec); } diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelectIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelectIntegrationTest.java index b6757da0b..e4f6ee26e 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelectIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VariantSelectIntegrationTest.java @@ -41,7 +41,7 @@ public class VariantSelectIntegrationTest extends WalkerTest { public void testVCFSelect1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'AF == 0.50' -L 1:10001290-10048590 ", 1, - Arrays.asList("8b358e0cfa35de022a37360a6f28a839")); + Arrays.asList("8fd97a99174483920f84aaf54eeb9dd9")); executeTest("testVCFSelect1", spec); } @@ -49,7 +49,7 @@ public class VariantSelectIntegrationTest extends WalkerTest { public void testVCFSelect2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -L 1:10001290-10048590 ", 1, - Arrays.asList("8e991b9d6d610c8f89c8557756fc8e34")); + Arrays.asList("ecb61d798ff5e0b003d2986500bf4462")); executeTest("testVCFSelect2", spec); } @@ -57,7 +57,7 @@ public class VariantSelectIntegrationTest extends WalkerTest { public void testVCFSelectOr() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6' -match 'AF == 0.50' -L 1:10001290-10048590 ", 1, - Arrays.asList("7bd064c8d8bf5389fcd0b78a7c32b599")); + Arrays.asList("7c23412a0abe28057a78cd532b752019")); executeTest("testVCFSelectOr", spec); } @@ -65,7 +65,7 @@ public class VariantSelectIntegrationTest extends WalkerTest { public void testVCFSelectAnd() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF," + validationDataLocation + "vcfexample3.vcf -match 'HomopolymerRun == 6 && AF == 0.50' -L 1:10001290-10048590 ", 1, - Arrays.asList("5af565836fa926feaa130715b93188bc")); + Arrays.asList("71cc18348a9d5de7270613bedb79880a")); executeTest("testVCFSelectAnd", spec); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java index a2c2b4027..a6224974c 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterUnitTest.java @@ -1,8 +1,13 @@ package org.broadinstitute.sting.utils.genotype.vcf; +import org.broad.tribble.util.AsciiLineReader; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele; +import org.broadinstitute.sting.gatk.contexts.variantcontext.Genotype; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; +import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.StingException; import org.broadinstitute.sting.utils.GenomeLocParser; import org.junit.Assert; @@ -10,7 +15,9 @@ import org.junit.Test; import org.junit.BeforeClass; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.IOException; import java.util.*; import net.sf.picard.reference.IndexedFastaSequenceFile; @@ -34,30 +41,47 @@ public class VCFWriterUnitTest extends BaseTest { GenomeLocParser.setupRefContigOrdering(seq); } - @Test - public void emptyTest() { - } - /** test, using the writer and reader, that we can output and input a VCF file without problems */ - //@Test + @Test public void testBasicWriteAndRead() { VCFHeader header = createFakeHeader(metaData,additionalColumns); VCFWriter writer = new VCFWriter(fakeVCFFile); writer.writeHeader(header); - writer.addRecord(createVCFRecord(header)); - writer.addRecord(createVCFRecord(header)); + writer.add(createVC(header),"A".getBytes()); + writer.add(createVC(header),"A".getBytes()); writer.close(); - VCFReader reader = new VCFReader(fakeVCFFile); - int counter = 0; - // validate what we're reading in - validateHeader(reader.getHeader()); - for (VCFRecord rec : reader) { - counter++; + VCFCodec reader = new VCFCodec(); + AsciiLineReader lineReader; + + try { + lineReader = new AsciiLineReader(new FileInputStream(fakeVCFFile)); + int lineNumber = reader.readHeader(lineReader); } - Assert.assertEquals(2,counter); - reader.close(); - new File(fakeVCFFile + TribbleRMDTrackBuilder.linearIndexExtension).delete(); - //fakeVCFFile.delete(); + catch (FileNotFoundException e ) { + throw new StingException(e.getMessage()); + } + + int counter = 0; + + // validate what we're reading in + validateHeader(reader.getHeader(VCFHeader.class)); + try { + while(true) { + String line = lineReader.readLine(); + if (line == null) + break; + + VariantContext vc = (VariantContext)reader.decode(line); + counter++; + } + Assert.assertEquals(2,counter); + new File(fakeVCFFile + TribbleRMDTrackBuilder.linearIndexExtension).delete(); + fakeVCFFile.delete(); + } + catch (IOException e ) { + throw new StingException(e.getMessage()); + } + } /** @@ -67,8 +91,8 @@ public class VCFWriterUnitTest extends BaseTest { * @return a fake VCF header */ public static VCFHeader createFakeHeader(Set metaData, Set additionalColumns) { - metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString(),VCFHeaderVersion.VCF3_3)); - metaData.add(new VCFHeaderLine("two", "2",VCFHeaderVersion.VCF3_3)); + metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString())); + metaData.add(new VCFHeaderLine("two", "2")); additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); @@ -80,23 +104,29 @@ public class VCFWriterUnitTest extends BaseTest { * @param header the VCF header * @return a VCFRecord */ - private VCFRecord createVCFRecord(VCFHeader header) { - List altBases = new ArrayList(); - altBases.add(new VCFGenotypeEncoding("C")); - altBases.add(new VCFGenotypeEncoding("D1")); - Map infoFields = new HashMap(); - infoFields.put("DP","50"); + private VariantContext createVC(VCFHeader header) { - List gt = new ArrayList(); + GenomeLoc loc = GenomeLocParser.createGenomeLoc("chr1",1); + List alleles = new ArrayList(); + Set filters = null; + Map attributes = new HashMap(); + Map genotypes = new HashMap(); + + alleles.add(Allele.create("A",true)); + alleles.add(Allele.create("CC",false)); + alleles.add(Allele.create("-",false)); + + attributes.put("DP","50"); for (String name : header.getGenotypeSamples()) { - List myAlleles = new ArrayList(); - myAlleles.add(new VCFGenotypeEncoding("C")); - myAlleles.add(new VCFGenotypeEncoding("D1")); - VCFGenotypeRecord rec = new VCFGenotypeRecord(name, myAlleles, VCFGenotypeRecord.PHASE.PHASED); - rec.setField("bb", "0"); - gt.add(rec); + Map gtattributes = new HashMap(); + gtattributes.put("BB","1"); + Genotype gt = new Genotype(name,alleles.subList(1,2),0,null,gtattributes,true); + + genotypes.put(name,gt); + } - return new VCFRecord("A","chr1",1,"RANDOM",altBases,0,".",infoFields, "GT:AA",gt); + return new VariantContext("RANDOM",loc, alleles, genotypes, 0, filters, attributes); + }