diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java index 708d87d6c..bc6a2b44d 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/RodVCF.java @@ -90,7 +90,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, public boolean hasStrandBias() { assertNotNull(); - return this.mCurrentRecord.getInfoValues().containsKey("SB"); + return this.mCurrentRecord.getInfoValues().containsKey(VCFRecord.STRAND_BIAS_KEY); } /** @@ -99,7 +99,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, * @return StrandBias with the stored slod */ public double getStrandBias() { - return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get("SB")) : 0.0; + return hasStrandBias() ? Double.valueOf(this.mCurrentRecord.getInfoValues().get(VCFRecord.STRAND_BIAS_KEY)) : 0.0; } /** @return the VARIANT_TYPE of the current variant */ @@ -291,8 +291,7 @@ public class RodVCF extends BasicReferenceOrderedDatum implements VariationRod, } public RodVCF next() { - mCurrentRecord = mReader.next(); - return new RodVCF(name, mCurrentRecord, mReader); + return new RodVCF(name, mReader.next(), mReader); } public void remove() { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java index 12b02db72..c72de0155 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java @@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; public class DepthOfCoverage extends StandardVariantAnnotation { @@ -12,9 +13,9 @@ public class DepthOfCoverage extends StandardVariantAnnotation { return String.format("%d", depth); } - public String getKeyName() { return "DP"; } + public String getKeyName() { return VCFRecord.DEPTH_KEY; } - public String getDescription() { return "DP,1,Integer,\"Total Depth\""; } + public String getDescription() { return getKeyName() + ",1,Integer,\"Total Depth\""; } public boolean useZeroQualityReads() { return false; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java index 7c684d3ae..b1d86115b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java @@ -4,6 +4,8 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.utils.genotype.vcf.VCFRecord; + import net.sf.samtools.SAMRecord; import java.util.List; @@ -20,9 +22,9 @@ public class RMSMappingQuality extends StandardVariantAnnotation { return String.format("%.2f", rms); } - public String getKeyName() { return "MQ"; } + public String getKeyName() { return VCFRecord.RMS_MAPPING_QUALITY_KEY; } - public String getDescription() { return "MQ,1,Float,\"RMS Mapping Quality\""; } + public String getDescription() { return getKeyName() + ",1,Float,\"RMS Mapping Quality\""; } public boolean useZeroQualityReads() { return true; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index b43b0bc97..204acc37a 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -117,10 +117,11 @@ public class VariantAnnotator extends RodWalker { } // setup the header fields - Map hInfo = new HashMap(); - hInfo.put("format", VCFWriter.VERSION); - hInfo.put("source", "VariantAnnotator"); - hInfo.put("reference", this.getToolkit().getArguments().referenceFile.getName()); + Set hInfo = new HashSet(); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.add("source=VariantAnnotator"); + hInfo.add("annotatorReference=" + getToolkit().getArguments().referenceFile.getName()); + hInfo.addAll(getVCFAnnotationDescriptions(requestedAnnotations)); vcfHeader = new VCFHeader(hInfo, samples); vcfWriter = new VCFWriter(vcfHeader, VCF_OUT); @@ -172,6 +173,40 @@ public class VariantAnnotator extends RodWalker { return 1; } + // option #1: don't specify annotations to be used: standard annotations are used by default + public static Set getVCFAnnotationDescriptions() { + if ( standardAnnotations == null ) + determineAllAnnotations(); + + TreeSet descriptions = new TreeSet(); + for ( VariantAnnotation annotation : standardAnnotations.values() ) + descriptions.add("INFO=" + annotation.getDescription()); + + return descriptions; + } + + // option #2: specify that all possible annotations be used + public static Set getAllVCFAnnotationDescriptions() { + if ( standardAnnotations == null ) + determineAllAnnotations(); + + TreeSet descriptions = new TreeSet(); + for ( VariantAnnotation annotation : allAnnotations.values() ) + descriptions.add("INFO=" + annotation.getDescription()); + + return descriptions; + } + + // option #3: specify the exact annotations to be used + public static Set getVCFAnnotationDescriptions(Collection annotations) { + + TreeSet descriptions = new TreeSet(); + for ( VariantAnnotation annotation : annotations ) + descriptions.add("INFO=" + annotation.getDescription()); + + return descriptions; + } + // option #1: don't specify annotations to be used: standard annotations are used by default public static Map getAnnotations(ReferenceContext ref, AlignmentContext context, Variation variation) { if ( standardAnnotations == null ) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java index 2b1501f5c..2fd6bfe47 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceWalker.java @@ -61,16 +61,6 @@ public class CallsetConcordanceWalker extends RodWalker { logger.debug("Uniquified sample mapping: " + entry.getKey().first + "/" + entry.getKey().second + " -> " + entry.getValue()); } - // set up the header fields - Map hInfo = new HashMap(); - hInfo.put("format", VCFWriter.VERSION); - hInfo.put("source", "CallsetConcordance"); - hInfo.put("reference", getToolkit().getArguments().referenceFile.getName()); - hInfo.put("explanation", "This file represents a concordance test of various call sets - NOT the output from a multi-sample caller"); - VCFHeader header = new VCFHeader(hInfo, samples); - - vcfWriter = new VCFWriter(header, OUTPUT); - // initialize requested concordance types requestedTypes = new ArrayList(); if (TYPES != null) { @@ -110,6 +100,25 @@ public class CallsetConcordanceWalker extends RodWalker { throw new StingException("The requested concordance type (" + requestedType + ") isn't a valid concordance option"); } } + + // set up the header fields + Set hInfo = new HashSet(); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.add("source=CallsetConcordance"); + hInfo.add("note=\"This file represents a concordance test of various call sets - NOT the output from a multi-sample caller\""); + hInfo.addAll(getVCFAnnotationDescriptions(requestedTypes)); + VCFHeader header = new VCFHeader(hInfo, samples); + + vcfWriter = new VCFWriter(header, OUTPUT); + } + + public static Set getVCFAnnotationDescriptions(Collection types) { + + TreeSet descriptions = new TreeSet(); + for ( ConcordanceType type : types ) + descriptions.add("INFO=" + type.getInfoDescription()); + + return descriptions; } public Integer map(RefMetaDataTracker rodData, ReferenceContext ref, AlignmentContext context) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java index eb8e993cd..f28408948 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/ConcordanceType.java @@ -11,4 +11,5 @@ public interface ConcordanceType { public void initialize(Map args, Set samples); public String computeConcordance(Map samplesToRecords, ReferenceContext ref); public String getInfoName(); + public String getInfoDescription(); } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java index f1daf55d0..f45c15b6b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/IndelSubsets.java @@ -101,5 +101,6 @@ public class IndelSubsets implements ConcordanceType { return Math.max(leftRun, rightRun); } - public String getInfoName() { return "IndelSubsets"; } + public String getInfoName() { return "IndelSubsets"; } + public String getInfoDescription() { return getInfoName() + ",1,String,\"Indel-related subsets\""; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java index bd90f47f8..7db2cb5d3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/NWayVenn.java @@ -39,4 +39,5 @@ public class NWayVenn implements ConcordanceType { } public String getInfoName() { return "NwayVenn"; } + public String getInfoDescription() { return getInfoName() + ",1,String,\"N-way Venn split\""; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java index e0e1af943..0ccb0bfb3 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SNPGenotypeConcordance.java @@ -95,4 +95,5 @@ public class SNPGenotypeConcordance implements ConcordanceType { } public String getInfoName() { return "SnpConcordance"; } + public String getInfoDescription() { return getInfoName() + ",1,String,\"SNP concordance test\""; } } \ No newline at end of file diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java index fa14a3a20..642aba236 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/concordance/SimpleVenn.java @@ -59,4 +59,5 @@ public class SimpleVenn implements ConcordanceType { } public String getInfoName() { return "Venn"; } + public String getInfoDescription() { return getInfoName() + ",1,String,\"2-way Venn split\""; } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java index 97ff0b63b..0cabd3d1b 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationWalker.java @@ -43,10 +43,10 @@ public class VariantFiltrationWalker extends RodWalker { private void initializeVcfWriter(RodVCF rod) { // setup the header fields - Map hInfo = new HashMap(); - hInfo.put("format", VCFWriter.VERSION); - hInfo.put("source", "VariantFiltration"); - hInfo.put("reference", this.getToolkit().getArguments().referenceFile.getName()); + Set hInfo = new HashSet(); + hInfo.addAll(VCFUtils.getHeaderFields(getToolkit())); + hInfo.add("source=" + "VariantFiltration"); + hInfo.add("reference=" + getToolkit().getArguments().referenceFile.getName()); VCFHeader header = new VCFHeader(hInfo, rod.getHeader().getGenotypeSamples()); writer = new VCFWriter(header, out); @@ -151,6 +151,10 @@ public class VariantFiltrationWalker extends RodWalker { filterString.append(";" + rec.getFilterString()); rec.setFilterString(filterString.toString()); } + // otherwise, if it's not already filtered, set it to "passing filters" + else if ( !rec.isFiltered() ) { + rec.setFilterString(VCFRecord.PASSES_FILTERS); + } if ( writer == null ) initializeVcfWriter(variant); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 9e4b670c2..379d4f329 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -133,17 +133,31 @@ public class UnifiedGenotyper extends LocusWalker headerInfo = new HashSet(); + if ( UAC.ALL_ANNOTATIONS ) + headerInfo.addAll(VariantAnnotator.getAllVCFAnnotationDescriptions()); + else + headerInfo.addAll(VariantAnnotator.getVCFAnnotationDescriptions()); + headerInfo.add("INFO=AF,1,Float,\"Allele Frequency\""); + headerInfo.add("INFO=NS,1,Integer,\"Number of Samples With Data\""); + if ( !UAC.NO_SLOD ) + headerInfo.add("INFO=SB,1,Float,\"Strand Bias\""); + // create the output writer stream if ( VARIANTS_FILE != null ) writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), VARIANTS_FILE, "UnifiedGenotyper", this.getToolkit().getArguments().referenceFile.getName(), - samples); + samples, + headerInfo); else writer = GenotypeWriterFactory.create(UAC.VAR_FORMAT, GenomeAnalysisEngine.instance.getSAMFileHeader(), out, "UnifiedGenotyper", this.getToolkit().getArguments().referenceFile.getName(), - samples); + samples, + headerInfo); + callsMetrics = new CallMetrics(); } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java index 1719cdeb3..84bc2e1e8 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCF.java @@ -50,18 +50,17 @@ public class VariantsToVCF extends RefWalker { } public static VCFHeader getHeader(GATKArgumentCollection args, Set sampleNames) { - Map metaData = new HashMap(); - Set additionalColumns = new HashSet(); // Don't output the data for now because it kills our unit test MD5s and is optional // TODO - figure out what to do here //Calendar cal = Calendar.getInstance(); //metaData.put("fileDate", String.format("%d%02d%02d", cal.get(Calendar.YEAR), cal.get(Calendar.MONTH), cal.get(Calendar.DAY_OF_MONTH))); - metaData.put("format", "VCRv3.2"); - metaData.put("source", "VariantsToVCF"); - metaData.put("reference", args.referenceFile.getAbsolutePath()); + Set metaData = new HashSet(); + metaData.add("source=VariantsToVCF"); + metaData.add("reference=" + args.referenceFile.getAbsolutePath()); + Set additionalColumns = new HashSet(); additionalColumns.add("FORMAT"); additionalColumns.addAll(sampleNames); diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java index 86b0a6cab..f42dc83d1 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/vcftools/VCFSubsetWalker.java @@ -29,13 +29,12 @@ public class VCFSubsetWalker extends RefWalker, VCFWriter> private VCFWriter vwriter = null; public void initializeWriter() { - Map metaData = new HashMap(); + + Set metaData = new HashSet(); + metaData.add("source=VariantsToVCF"); + metaData.add("reference=" + this.getToolkit().getArguments().referenceFile.getAbsolutePath()); + Set additionalColumns = new HashSet(); - - metaData.put("format", "VCRv3.2"); - metaData.put("source", "VariantsToVCF"); - metaData.put("reference", this.getToolkit().getArguments().referenceFile.getAbsolutePath()); - additionalColumns.add("FORMAT"); additionalColumns.addAll(SAMPLES); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java index 4f8f5f0a7..ea82403aa 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriter.java @@ -30,9 +30,9 @@ import java.util.List; /** * @author aaron *

- * Class Genotype + * Class GenotypeWriter *

- * The interface for storing genotype calls. + * The interface for writing genotype calls. */ public interface GenotypeWriter { @@ -55,8 +55,9 @@ public interface GenotypeWriter { /** * add a multi-sample call if we support it * @param genotypes the list of genotypes, that are backed by sample information + * @param variation the variation */ - public void addMultiSampleCall(List genotypes, VariationCall metadata); + public void addMultiSampleCall(List genotypes, VariationCall variation); /** * @return true if we support multisample, false otherwise diff --git a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java index 19f585856..0f6840975 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/GenotypeWriterFactory.java @@ -33,6 +33,7 @@ public class GenotypeWriterFactory { * @param source the source * @param referenceName the ref name * @param sampleNames the sample names + * @param headerInfo the optional header info fields * @return the genotype writer object */ public static GenotypeWriter create(GENOTYPE_FORMAT format, @@ -40,7 +41,8 @@ public class GenotypeWriterFactory { File destination, String source, String referenceName, - Set sampleNames ) { + Set sampleNames, + Set headerInfo) { switch (format) { case GLF: return new GLFWriter(header.toString(), destination); @@ -49,7 +51,7 @@ public class GenotypeWriterFactory { case GELI_BINARY: return new GeliAdapter(destination, header); case VCF: - return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames); + return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo); default: throw new StingException("Genotype writer " + format.toString() + " is not implemented"); } @@ -60,14 +62,15 @@ public class GenotypeWriterFactory { PrintStream destination, String source, String referenceName, - Set sampleNames ) { + Set sampleNames, + Set headerInfo) { switch (format) { case GELI: return new GeliTextWriter(destination); case GLF: return new GLFWriter(header.toString(), destination); case VCF: - return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames); + return new VCFGenotypeWriterAdapter(source, referenceName, destination, sampleNames, headerInfo); default: throw new StingException("Genotype writer to " + format.toString() + " to standard output is not implemented"); } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java index f6c9ca700..a2f90b287 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeRecord.java @@ -16,12 +16,25 @@ import java.util.Map; *

*/ public class VCFGenotypeRecord implements Genotype { - // the symbols for an empty genotype + + // key names + public static final String GENOTYPE_KEY = "GT"; + public static final String GENOTYPE_QUALITY_KEY = "GQ"; + public static final String DEPTH_KEY = "DP"; + public static final String HAPLOTYPE_QUALITY_KEY = "HQ"; + public static final String FILTER_KEY = "FT"; + public static final String OLD_DEPTH_KEY = "RD"; + + // the values for empty fields public static final String EMPTY_GENOTYPE = "./."; public static final String EMPTY_ALLELE = "."; - + public static final int MISSING_GENOTYPE_QUALITY = -1; public static final int MISSING_DEPTH = -1; + public static final int MISSING_HAPLOTYPE_QUALITY = -1; + public static final String UNFILTERED = "."; + public static final double MAX_QUAL_VALUE = 99.0; + // what kind of phasing this genotype has public enum PHASE { UNPHASED, PHASED, PHASED_SWITCH_PROB, UNKNOWN @@ -51,14 +64,21 @@ public class VCFGenotypeRecord implements Genotype { * @param otherFlags other flags */ public VCFGenotypeRecord(String sampleName, List genotypes, PHASE phasing, Map otherFlags) { - this.mSampleName = sampleName; + mSampleName = sampleName; if (genotypes != null) this.mGenotypeAlleles.addAll(genotypes); - this.mPhaseType = phasing; - if (otherFlags != null) this.mFields.putAll(otherFlags); + mPhaseType = phasing; + if (otherFlags != null) { + // we need to be backwards compatible + if ( otherFlags.containsKey(OLD_DEPTH_KEY) ) { + otherFlags.put(DEPTH_KEY, otherFlags.get(OLD_DEPTH_KEY)); + otherFlags.remove(OLD_DEPTH_KEY); + } + mFields.putAll(otherFlags); + } } public void setVCFRecord(VCFRecord record) { - this.mRecord = record; + mRecord = record; } public void setSampleName(String name) { @@ -102,16 +122,11 @@ public class VCFGenotypeRecord implements Genotype { } public double getNegLog10PError() { - return ( mFields.containsKey("GQ") ? Double.valueOf(mFields.get("GQ")) / 10.0 : 0.0); + return ( mFields.containsKey(GENOTYPE_QUALITY_KEY) ? Double.valueOf(mFields.get(GENOTYPE_QUALITY_KEY)) / 10.0 : MISSING_GENOTYPE_QUALITY); } public int getReadCount() { - int depth = MISSING_DEPTH; - if ( mFields.containsKey("RD") ) - depth = Integer.valueOf(mFields.get("RD")); - else if ( mFields.containsKey("DP") ) - depth = Integer.valueOf(mFields.get("DP")); - return depth; + return ( mFields.containsKey(DEPTH_KEY) ? Integer.valueOf(mFields.get(DEPTH_KEY)) : MISSING_DEPTH); } public GenomeLoc getLocation() { @@ -227,11 +242,26 @@ public class VCFGenotypeRecord implements Genotype { StringBuilder builder = new StringBuilder(); builder.append(toGenotypeString(altAlleles)); for (String field : mFields.keySet()) { - if (mFields.get(field).equals("")) continue; builder.append(VCFRecord.GENOTYPE_FIELD_SEPERATOR); - builder.append(mFields.get(field)); - + if (mFields.get(field).equals("")) + builder.append(getMissingFieldValue(field)); + else + builder.append(mFields.get(field)); } return builder.toString(); } + + public static String getMissingFieldValue(String field) { + String result = ""; + if ( field.equals(GENOTYPE_QUALITY_KEY) ) + result = String.valueOf(MISSING_GENOTYPE_QUALITY); + else if ( field.equals(DEPTH_KEY) ) + result = String.valueOf(MISSING_DEPTH); + else if ( field.equals(FILTER_KEY) ) + result = UNFILTERED; + // TODO -- support haplotype quality + //else if ( field.equals(HAPLOTYPE_QUALITY_KEY) ) + // result = String.valueOf(MISSING_HAPLOTYPE_QUALITY); + return result; + } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index 9bf5f08bd..87c3d33de 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -27,23 +27,23 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class); - public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set sampleNames) { + public VCFGenotypeWriterAdapter(String source, String referenceName, File writeTo, Set sampleNames, Set headerInfo) { mReferenceName = referenceName; mSource = source; mSampleNames.addAll(sampleNames); - initializeHeader(); + initializeHeader(headerInfo); if (writeTo == null) throw new RuntimeException("VCF output file must not be null"); mWriter = new VCFWriter(mHeader, writeTo); } - public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set sampleNames) { + public VCFGenotypeWriterAdapter(String source, String referenceName, OutputStream writeTo, Set sampleNames, Set headerInfo) { mReferenceName = referenceName; mSource = source; mSampleNames.addAll(sampleNames); - initializeHeader(); + initializeHeader(headerInfo); if (writeTo == null) throw new RuntimeException("VCF output stream must not be null"); mWriter = new VCFWriter(mHeader, writeTo); @@ -51,15 +51,18 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { /** * initialize this VCF header + * + * @param optionalHeaderInfo the optional header fields */ - private void initializeHeader() { - Map hInfo = new HashMap(); + private void initializeHeader(Set optionalHeaderInfo) { + Set hInfo = new TreeSet(); // setup the header fields - hInfo.put("format", VCFWriter.VERSION); - hInfo.put("source", mSource); - hInfo.put("reference", mReferenceName); - + hInfo.add(VCFHeader.FULL_FORMAT_LINE); + hInfo.add("source=" + mSource); + hInfo.add("reference=" + mReferenceName); + hInfo.addAll(optionalHeaderInfo); + // setup the sample names mHeader = new VCFHeader(hInfo, mSampleNames); } @@ -114,7 +117,7 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { throw new IllegalArgumentException("Only VCFVariationCall objects should be passed in to the VCF writers"); VCFParameters params = new VCFParameters(); - params.addFormatItem("GT"); + params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY); // get the location and reference if ( genotypes.size() == 0 ) { @@ -166,10 +169,10 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { VCFRecord vcfRecord = new VCFRecord(params.getReferenceBase(), params.getContig(), params.getPosition(), - (dbSnpID == null ? "." : dbSnpID), + (dbSnpID == null ? VCFRecord.EMPTY_ID_FIELD : dbSnpID), params.getAlternateBases(), qual, - "0", + VCFRecord.UNFILTERED, infoFields, params.getFormatString(), params.getGenotypesRecords()); @@ -189,15 +192,15 @@ public class VCFGenotypeWriterAdapter implements GenotypeWriter { Map infoFields = new HashMap(); if ( locusdata != null ) { if ( locusdata.getSLOD() != null ) - infoFields.put("SB", String.format("%.2f", locusdata.getSLOD())); + infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", locusdata.getSLOD())); if ( locusdata.hasNonRefAlleleFrequency() ) - infoFields.put("AF", String.format("%.2f", locusdata.getNonRefAlleleFrequency())); + infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", locusdata.getNonRefAlleleFrequency())); Map otherFields = locusdata.getFields(); if ( otherFields != null ) { infoFields.putAll(otherFields); } } - infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); + infoFields.put(VCFRecord.SAMPLE_NUMBER_KEY, String.valueOf(params.getGenotypesRecords().size())); return infoFields; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java index d06bbdd59..4a8af1604 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFHeader.java @@ -1,7 +1,5 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.apache.log4j.Logger; - import java.util.*; @@ -14,13 +12,25 @@ import java.util.*; */ public class VCFHeader { + public static final String FILE_FORMAT_KEY = "fileformat="; + public static final String OLD_FILE_FORMAT_KEY = "format="; // from version 3.2 + + + /** the current vcf version we support. */ + public static final String VCF_VERSION_HEADER = "VCFv"; + public static final String OLD_VCF_VERSION_HEADER = "VCRv"; // from version 3.2 + public static final double VCF_VERSION_NUMBER = 3.3; + public static final String VCF_VERSION = VCF_VERSION_HEADER + VCF_VERSION_NUMBER; + + public static final String FULL_FORMAT_LINE = FILE_FORMAT_KEY + VCF_VERSION; + // the manditory header fields public enum HEADER_FIELDS { CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO } // the associated meta data - private final Map mMetaData = new HashMap(); + private final Set mMetaData; // the list of auxillary tags private final Set mGenotypeSampleNames = new LinkedHashSet(); @@ -31,22 +41,17 @@ public class VCFHeader { // the header string indicator public static final String HEADER_INDICATOR = "#"; - /** our log, which we use to capture anything from this class */ - private static Logger logger = Logger.getLogger(VCFHeader.class); /** do we have genotying data? */ private boolean hasGenotypingData = false; - /** the current vcf version we support. */ - private static final String VCF_VERSION = "VCRv3.2"; - /** * create a VCF header, given a list of meta data and auxillary tags * * @param metaData the meta data associated with this header */ - public VCFHeader(Map metaData) { - for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key)); + public VCFHeader(Set metaData) { + mMetaData = new TreeSet(metaData); checkVCFVersion(); } @@ -56,8 +61,8 @@ public class VCFHeader { * @param metaData the meta data associated with this header * @param genotypeSampleNames the genotype format field, and the sample names */ - public VCFHeader(Map metaData, Set genotypeSampleNames) { - for (String key : metaData.keySet()) mMetaData.put(key, metaData.get(key)); + public VCFHeader(Set metaData, Set genotypeSampleNames) { + mMetaData = new TreeSet(metaData); for (String col : genotypeSampleNames) { if (!col.equals("FORMAT")) mGenotypeSampleNames.add(col); @@ -71,13 +76,34 @@ public class VCFHeader { * or the version is not present */ public void checkVCFVersion() { - if (mMetaData.containsKey("format")) { - if (mMetaData.get("format").equals(VCF_VERSION)) - return; - throw new RuntimeException("VCFHeader: VCF version of " + mMetaData.get("format") + - " doesn't match the supported version of " + VCF_VERSION); + String version = null; + for ( String field : mMetaData ) { + if ( field.startsWith(FILE_FORMAT_KEY) ) { + version = field.substring(FILE_FORMAT_KEY.length()); + break; + } + else if ( field.startsWith(OLD_FILE_FORMAT_KEY) ) { + version = field.substring(OLD_FILE_FORMAT_KEY.length()); + break; + } } - throw new RuntimeException("VCFHeader: VCF version isn't present"); + + if ( version == null ) + mMetaData.add(FULL_FORMAT_LINE); + else if ( !isSupportedVersion(version) ) + throw new RuntimeException("VCF version " + version + + " is not yet supported; only version " + VCF_VERSION + " and earlier can be used"); + } + + private boolean isSupportedVersion(String version) { + if ( !version.startsWith(VCF_VERSION_HEADER) && !version.startsWith(OLD_VCF_VERSION_HEADER) ) + return false; + try { + double dVersion = Double.valueOf(version.substring(VCF_VERSION_HEADER.length())); + return dVersion <= VCF_VERSION_NUMBER; + } catch (Exception e) { } + + return false; } /** @@ -96,9 +122,9 @@ public class VCFHeader { /** * get the meta data, associated with this header * - * @return a map of the meta data + * @return a set of the meta data */ - public Map getMetaData() { + public Set getMetaData() { return mMetaData; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 2e4ce0482..4d071a0ee 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -21,9 +21,6 @@ public class VCFReader implements Iterator, Iterable { // our next record private VCFRecord mNextRecord = null; - // a pattern we use for detecting meta data and header lines - private static Pattern pMeta = Pattern.compile("^" + VCFHeader.METADATA_INDICATOR + "\\s*(\\S+)\\s*=\\s*(\\S+)\\s*$"); - // our pattern matching for the genotype mFields private static final Pattern gtPattern = Pattern.compile("([0-9\\.]+)([\\\\|\\/])([0-9\\.]*)"); @@ -142,24 +139,11 @@ public class VCFReader implements Iterator, Iterable { * @return a VCF Header created from the list of stinrgs */ protected VCFHeader createHeader(List headerStrings) { - Map metaData = new HashMap(); + Set metaData = new TreeSet(); Set auxTags = new LinkedHashSet(); // iterate over all the passed in strings - for (String str : headerStrings) { - Matcher matcher = pMeta.matcher(str); - if (matcher.matches()) { - String metaKey; - String metaValue = ""; - if (matcher.groupCount() < 1) continue; - if (matcher.groupCount() == 2) metaValue = matcher.group(2); - metaKey = matcher.group(1); - metaData.put(metaKey, metaValue); - } - } - - // iterate over all the passed in strings - for (String str : headerStrings) { // TODO: fix, we shouldn't loop over every line - if (str.startsWith("#") && !str.startsWith("##")) { + for ( String str : headerStrings ) { + if ( !str.startsWith("##") ) { String[] strings = str.substring(1).split("\\s+"); // the columns should be in order according to Richard Durbin int arrayIndex = 0; @@ -177,8 +161,11 @@ public class VCFReader implements Iterator, Iterable { auxTags.add(strings[arrayIndex]); arrayIndex++; } + } else { + metaData.add(str.substring(2)); } } + return new VCFHeader(metaData, auxTags); } @@ -266,7 +253,7 @@ public class VCFReader implements Iterator, Iterable { nextDivider = (genotypeString.indexOf(":") > genotypeString.length()) ? genotypeString.length() : genotypeString.indexOf(":"); parse = genotypeString.substring(0, nextDivider); } - if (key.equals("GT")) { + if (key.equals(VCFGenotypeRecord.GENOTYPE_KEY)) { Matcher m = gtPattern.matcher(parse); if (!m.matches()) throw new RuntimeException("VCFReader: Unable to match GT genotype flag to it's expected pattern, the field was: " + parse); @@ -274,6 +261,8 @@ public class VCFReader implements Iterator, Iterable { addAllele(m.group(1), altAlleles, referenceBase, bases); if (m.group(3).length() > 0) addAllele(m.group(3), altAlleles, referenceBase, bases); } else { + if ( parse.length() == 0 ) + parse = VCFGenotypeRecord.getMissingFieldValue(key); tagToValue.put(key, parse); } if (nextDivider + 1 >= genotypeString.length()) nextDivider = genotypeString.length() - 1; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java index 9b863108e..b8a437b36 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFRecord.java @@ -11,13 +11,31 @@ import java.util.*; */ public class VCFRecord implements Variation, VariantBackedByGenotype { + // standard info field keys + public static final String ANCESTRAL_ALLELE_KEY = "AA"; + public static final String ALLELE_COUNT_KEY = "AC"; + public static final String ALLELE_FREQUENCY_KEY = "AF"; + public static final String ALLELE_NUMBER_KEY = "AN"; + public static final String RMS_BASE_QUALITY_KEY = "BQ"; + public static final String DBSNP_KEY = "DB"; + public static final String DEPTH_KEY = "DP"; + public static final String HAPMAP2_KEY = "H2"; + public static final String RMS_MAPPING_QUALITY_KEY = "MQ"; + public static final String SAMPLE_NUMBER_KEY = "NS"; + public static final String STRAND_BIAS_KEY = "SB"; + // commonly used strings that are in the standard public static final String FORMAT_FIELD_SEPERATOR = ":"; public static final String GENOTYPE_FIELD_SEPERATOR = ":"; public static final String FIELD_SEPERATOR = "\t"; public static final String FILTER_CODE_SEPERATOR = ";"; public static final String INFO_FIELD_SEPERATOR = ";"; + + // default values + public static final String UNFILTERED = "."; + public static final String PASSES_FILTERS = "0"; public static final String EMPTY_INFO_FIELD = "."; + public static final String EMPTY_ID_FIELD = "."; public static final String DOUBLE_PRECISION_FORMAT_STRING = "%.2f"; // the reference base @@ -139,13 +157,12 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { String vals[] = columnValues.get(val).split(";"); for (String alt : vals) { String keyVal[] = alt.split("="); - if (keyVal.length == 1 && keyVal[0].equals(".")) { + if ( keyVal.length == 1 ) this.addInfoField(keyVal[0], ""); - break; - } - if (keyVal.length != 2) + else if (keyVal.length == 2) + this.addInfoField(keyVal[0], keyVal[1]); + else throw new IllegalArgumentException("info field key-value pair did not parse into key->value pair: " + alt); - this.addInfoField(keyVal[0], keyVal[1]); } break; } @@ -231,14 +248,14 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { } public double getNonRefAlleleFrequency() { - if ( mInfoFields.containsKey("AF") ) { - return Double.valueOf(mInfoFields.get("AF")); + if ( mInfoFields.containsKey(ALLELE_FREQUENCY_KEY) ) { + return Double.valueOf(mInfoFields.get(ALLELE_FREQUENCY_KEY)); } else { // this is the poor man's AF - if ( mInfoFields.containsKey("AC") && mInfoFields.containsKey("AN")) { - String splt[] = mInfoFields.get("AC").split(","); + if ( mInfoFields.containsKey(ALLELE_COUNT_KEY) && mInfoFields.containsKey(ALLELE_NUMBER_KEY)) { + String splt[] = mInfoFields.get(ALLELE_COUNT_KEY).split(","); if ( splt.length > 0 ) { - return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get("AN"))); + return (Double.valueOf(splt[0]) / Double.valueOf(mInfoFields.get(ALLELE_NUMBER_KEY))); } } } @@ -250,9 +267,13 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { if ( !hasAlternateAllele() ) return VARIANT_TYPE.REFERENCE; - // TODO -- figure out what to do about records with more than one type - VCFGenotypeEncoding encoding = mAlts.get(0); - switch ( encoding.getType() ) { + VCFGenotypeEncoding.TYPE type = mAlts.get(0).getType(); + for (int i = 1; i < mAlts.size(); i++) { + if ( mAlts.get(i).getType() != type ) + throw new IllegalStateException("The record contains multiple encoding types"); + } + + switch ( type ) { case SINGLE_BASE: return VARIANT_TYPE.SNP; case DELETION: @@ -309,23 +330,20 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { /** * get the filter criteria * - * @return an array of strings representing the filtering criteria, or 0 is none are applied + * @return an array of strings representing the filtering criteria, or UNFILTERED if none are applied */ public String[] getFilteringCodes() { - if (mFilterString == null) return new String[]{"0"}; - return this.mFilterString.split(FILTER_CODE_SEPERATOR); + if (mFilterString == null) return new String[]{UNFILTERED}; + return mFilterString.split(FILTER_CODE_SEPERATOR); } public boolean isFiltered() { String[] codes = getFilteringCodes(); - if ( codes.length > 1 ) return true; - else if ( codes[0].equals(".") || codes[0].equals("0") ) return false; - else return true; + return !codes[0].equals(UNFILTERED) && !codes[0].equals(PASSES_FILTERS); } public boolean hasFilteringCodes() { - // todo --- currently always returns true - return getFilteringCodes() != null; + return mFilterString != null; } public String getFilterString() { @@ -567,7 +585,9 @@ public class VCFRecord implements Variation, VariantBackedByGenotype { if (!this.mLoc.equals(other.mLoc)) return false; if (!this.mID.equals(other.mID)) return false; if (this.mQual != other.mQual) return false; - if (!this.mFilterString.equals(other.mFilterString)) return false; + if ( this.mFilterString == null ) { + if ( other.mFilterString != null ) return false; + } else if ( !this.mFilterString.equals(other.mFilterString) ) return false; if (!this.mInfoFields.equals(other.mInfoFields)) return false; if (!this.mGenotypeFields.equals(other.mGenotypeFields)) return false; return true; diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java index a140d1fbe..9aa619100 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFUtils.java @@ -20,12 +20,38 @@ public class VCFUtils { private VCFUtils() { } + /** + * Gets the header fields from all VCF rods input by the user + * + * @param toolkit GATK engine + * + * @return a set of all fields + */ + public static Set getHeaderFields(GenomeAnalysisEngine toolkit) { + + // keep a map of sample name to occurrences encountered + TreeSet fields = new TreeSet(); + + // iterate to get all of the sample names + List dataSources = toolkit.getRodDataSources(); + for ( ReferenceOrderedDataSource source : dataSources ) { + ReferenceOrderedData rod = source.getReferenceOrderedData(); + if ( rod.getType().equals(RodVCF.class) ) { + VCFReader reader = new VCFReader(rod.getFile()); + fields.addAll(reader.getHeader().getMetaData()); + reader.close(); + } + } + + return fields; + } + /** * Gets the sample names from all VCF rods input by the user and uniquifies them if there is overlap * (e.g. sampleX.1, sampleX.2, ...) * When finished, samples contains the uniquified sample names and rodNamesToSampleNames contains a mapping * from rod/sample pairs to the new uniquified names - * + * * @param toolkit GATK engine * @param samples set to store the sample names * @param rodNamesToSampleNames mapping of rod/sample pairs to new uniquified sample names @@ -104,7 +130,7 @@ public class VCFUtils { public static VCFRecord mergeRecords(List rods, Map, String> rodNamesToSampleNames) { VCFParameters params = new VCFParameters(); - params.addFormatItem("GT"); + params.addFormatItem(VCFGenotypeRecord.GENOTYPE_KEY); // keep track of the locus specific data so we can merge them intelligently int totalReadDepth = 0; @@ -122,7 +148,9 @@ public class VCFUtils { if ( params.getPosition() < 1 ) params.setLocations(rod.getLocation(), call.getReference()); params.addGenotypeRecord(createVCFGenotypeRecord(params, call, rod.mCurrentRecord)); - totalReadDepth += call.getReadCount(); + int depth = call.getReadCount(); + if ( depth > 0 ) + totalReadDepth += call.getReadCount(); } // set the overall confidence to be the max entry we see @@ -142,14 +170,14 @@ public class VCFUtils { } Map infoFields = new HashMap(); - infoFields.put("DP", String.format("%d", totalReadDepth)); - infoFields.put("NS", String.valueOf(params.getGenotypesRecords().size())); + infoFields.put(VCFRecord.DEPTH_KEY, String.format("%d", totalReadDepth)); + infoFields.put(VCFRecord.SAMPLE_NUMBER_KEY, String.valueOf(params.getGenotypesRecords().size())); // set the overall strand bias and allele frequency to be the average of all entries we've seen if ( SLODsSeen > 0 ) - infoFields.put("SB", String.format("%.2f", (totalSLOD/(double)SLODsSeen))); + infoFields.put(VCFRecord.STRAND_BIAS_KEY, String.format("%.2f", (totalSLOD/(double)SLODsSeen))); if ( freqsSeen > 0 ) - infoFields.put("AF", String.format("%.2f", (totalFreq/(double)freqsSeen))); + infoFields.put(VCFRecord.ALLELE_FREQUENCY_KEY, String.format("%.2f", (totalFreq/(double)freqsSeen))); return new VCFRecord(params.getReferenceBase(), params.getContig(), @@ -175,13 +203,12 @@ public class VCFUtils { public static VCFGenotypeRecord createVCFGenotypeRecord(VCFParameters params, VCFGenotypeRecord gtype, VCFRecord vcfrecord) { Map map = new HashMap(); - // calculate the RMS mapping qualities and the read depth - int readDepth = gtype.getReadCount(); - map.put("RD", String.valueOf(readDepth)); - params.addFormatItem("RD"); - double qual = 10.0 * gtype.getNegLog10PError(); - map.put("GQ", String.format("%.2f", qual)); - params.addFormatItem("GQ"); + // calculate the genotype quality and the read depth + map.put(VCFGenotypeRecord.DEPTH_KEY, String.valueOf(gtype.getReadCount())); + params.addFormatItem(VCFGenotypeRecord.DEPTH_KEY); + double qual = Math.min(10.0 * gtype.getNegLog10PError(), VCFGenotypeRecord.MAX_QUAL_VALUE); + map.put(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%.2f", qual)); + params.addFormatItem(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); List alleles = createAlleleArray(gtype); for (VCFGenotypeEncoding allele : alleles) { @@ -208,12 +235,11 @@ public class VCFUtils { Map map = new HashMap(); // calculate the RMS mapping qualities and the read depth - int readDepth = gtype.getReadCount(); - map.put("RD", String.valueOf(readDepth)); - params.addFormatItem("RD"); - double qual = gtype.getNegLog10PError(); - map.put("GQ", String.format("%.2f", qual)); - params.addFormatItem("GQ"); + map.put(VCFGenotypeRecord.DEPTH_KEY, String.valueOf(gtype.getReadCount())); + params.addFormatItem(VCFGenotypeRecord.DEPTH_KEY); + double qual = Math.min(10.0 * gtype.getNegLog10PError(), VCFGenotypeRecord.MAX_QUAL_VALUE); + map.put(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY, String.format("%.2f", qual)); + params.addFormatItem(VCFGenotypeRecord.GENOTYPE_QUALITY_KEY); List alleles = createAlleleArray(gtype); for (VCFGenotypeEncoding allele : alleles) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index 34bbc0de1..c133d5619 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -2,14 +2,14 @@ package org.broadinstitute.sting.utils.genotype.vcf; import java.io.*; +import java.util.TreeSet; /** * this class writers VCF files */ public class VCFWriter { - public static final String VERSION = "VCRv3.2"; - + // the VCF header we're storing private VCFHeader mHeader; @@ -49,10 +49,25 @@ public class VCFWriter { mWriter = new BufferedWriter( new OutputStreamWriter(location)); try { - // write the header meta-data out - for (String metadata : header.getMetaData().keySet()) { - mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "=" + header.getMetaData().get(metadata) + "\n"); + // the fileformat field needs to be written first + TreeSet allMetaData = new TreeSet(header.getMetaData()); + for ( String metadata : allMetaData ) { + if ( metadata.startsWith(VCFHeader.FILE_FORMAT_KEY) ) { + mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "\n"); + break; + } + else if ( metadata.startsWith(VCFHeader.OLD_FILE_FORMAT_KEY) ) { + mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeader.FILE_FORMAT_KEY + metadata.substring(VCFHeader.OLD_FILE_FORMAT_KEY.length()) + "\n"); + break; + } } + + // write the rest of the header meta-data out + for ( String metadata : header.getMetaData() ) { + if ( !metadata.startsWith(VCFHeader.FILE_FORMAT_KEY) && !metadata.startsWith(VCFHeader.OLD_FILE_FORMAT_KEY) ) + mWriter.write(VCFHeader.METADATA_INDICATOR + metadata + "\n"); + } + // write out the column line StringBuilder b = new StringBuilder(); b.append(VCFHeader.HEADER_INDICATOR); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java index 99f5501a6..278860b0d 100755 --- a/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/RodVCFTest.java @@ -91,7 +91,7 @@ public class RodVCFTest extends BaseTest { @Test public void testToString() { // slightly altered line, due to map ordering - final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5\n"; + final String firstLine = "20\t14370\trs6054257\tG\tA\t29.00\t0\tAF=0.786;DP=258;NS=58\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:\n"; RodVCF vcf = getVCFObject(); VCFReader reader = new VCFReader(vcfFile); Iterator iter = vcf.createIterator("VCF", vcfFile); @@ -105,7 +105,7 @@ public class RodVCFTest extends BaseTest { // verify the first line too if (first) { if (!firstLine.equals(rec1.toStringEncoding(mHeader) + "\n")) { - fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + firstLine); + fail("VCF record rec1.toStringEncoding() != expected string :\n" + rec1.toStringEncoding(mHeader) + "\n" + firstLine); } first = false; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java index e8a6c50c6..accfd5d4d 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SecondBaseSkewIntegrationTest.java @@ -33,7 +33,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { +"-B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " +"-vcf %s -sample variant -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list"; - String md5_for_this_test = "a2ec1d36e77da56b4a11eef34d73296e"; + String md5_for_this_test = "f7e67c353d3113447d1b9c8c39de6ed0"; WalkerTestSpec spec = new WalkerTestSpec(test_args,1, Arrays.asList(md5_for_this_test)); executeTest("Testing on E2 annotated but not Q2 annotated file ",spec); @@ -49,7 +49,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { +"-B variant,Variants,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pilot_pool3_raw_calls.geli " +"-vcf %s -sample variant -L /humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_test_intervals.interval_list"; - String md5_for_this_test = "f105fd8a7ae7026a55107b86e768553a"; + String md5_for_this_test = "3eee411119888fc4633870a91ed2093d"; WalkerTestSpec spec = new WalkerTestSpec(test_args,1, Arrays.asList(md5_for_this_test)); executeTest("Testing on bam file without 2bb annotations ",spec); @@ -61,7 +61,7 @@ public class SecondBaseSkewIntegrationTest extends WalkerTest { + " -R /seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta -A SecondBaseSkew" + " -sample variant -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/FHS_pileup_test_chr15.vcf" + " -vcf %s -L chr15:46347148"; - String expected_md5 = "160a8e3826eb745bcfe2f463f73e1ec7"; + String expected_md5 = "c70dfb30c3caa9184604f88bc7f62a07"; WalkerTestSpec spec = new WalkerTestSpec(test_args,1,Arrays.asList(expected_md5)); executeTest("Testing on locus with many indels", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index e2495f6e5..3b603d11a 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -34,8 +34,8 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public static String secondBaseTestmd5( int testNo ) { switch ( testNo ) { - case 1: return "8f5b4b29eefb25d6b7b9e32a1c90f144"; - case 2: return "3a53d945b38e1fc87a801f23115222fe"; + case 1: return "bf64bac186fd682018dd7f0419d90190"; + case 2: return "67f40627b12be31efe02c9d853fbcf37"; default: throw new StingException("Impossible test has been run: secondbasetest number "+testNo); } } @@ -50,7 +50,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("0ff01251afdabafb4e137357b25be72a")); + Arrays.asList("6903f8b31820ce7a56230b99f9a9309c")); executeTest("test file has annotations, not asking for annotations, #1", spec); } @@ -58,7 +58,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("227f38b2f322780187eacd0c98ace3e6")); + Arrays.asList("5298c24e956361d209f14ac6138a3bbd")); executeTest("test file has annotations, not asking for annotations, #2", spec); } @@ -66,7 +66,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9a30487ad885f4d49569032fe6463af3")); + Arrays.asList("31ddecd3118bd6aea00bcd0369a1b32f")); executeTest("test file has annotations, asking for annotations, #1", spec); } @@ -74,7 +74,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testHasAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("ef01d735ea0bcfeb6e7394c65f2a1938")); + Arrays.asList("553c55f3bcf31a6b1d9a2c1d27fde480")); executeTest("test file has annotations, asking for annotations, #2", spec); } @@ -82,7 +82,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("18d85a9711d56bdf7e2327b83d6745e2")); + Arrays.asList("b46f395864cb71b887d69342f56e7fdb")); executeTest("test file doesn't have annotations, not asking for annotations, #1", spec); } @@ -90,7 +90,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsNotAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("da51f18d189fb0fd804822c81b786e0f")); + Arrays.asList("aaae89c48fcab615fe4204220ec62859")); executeTest("test file doesn't have annotations, not asking for annotations, #2", spec); } @@ -98,7 +98,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1, - Arrays.asList("7e6ceb79e9a1f104723299ed68b236c6")); + Arrays.asList("fa1071b292fd2e9db5c36410ffd44fdb")); executeTest("test file doesn't have annotations, asking for annotations, #1", spec); } @@ -106,7 +106,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { public void testNoAnnotsAsking2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -standard -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample3empty.vcf -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1, - Arrays.asList("87748d4c80ff76701dd01d7b0f803249")); + Arrays.asList("9d133a91949e477523a039a0a52ac2a8")); executeTest("test file doesn't have annotations, asking for annotations, #2", spec); } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java index f8a39a6f5..8fc2e3b36 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/concordance/CallsetConcordanceIntegrationTest.java @@ -14,7 +14,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSimpleVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn", 1, - Arrays.asList("851b68004874f3a2e76d795e7401f8a0")); + Arrays.asList("2c7e18901dbf27bac9f36b3dbee063c6")); executeTest("testSimpleVenn", spec); } @@ -22,7 +22,7 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testSNPConcordance() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SNPGenotypeConcordance:qscore=5", 1, - Arrays.asList("7afb56b30257fe2d66bee7a029d75685")); + Arrays.asList("c21d59fc3194c39c662d2e74b53dcf9c")); executeTest("testSNPConcordance", spec); } @@ -30,7 +30,15 @@ public class CallsetConcordanceIntegrationTest extends WalkerTest { public void testNWayVenn() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -B set3,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/CEU.sample.vcf -CT NWayVenn", 1, - Arrays.asList("f452c04c600ad10c054f18b0c77b53d5")); + Arrays.asList("2b38ae235edd10773dbee0bfae036e35")); executeTest("testNWayVenn", spec); } + + @Test + public void testMulti() { + WalkerTestSpec spec = new WalkerTestSpec( + baseTestString() + " -B set1,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example1.vcf -B set2,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.example2.vcf -CT SimpleVenn -CT NWayVenn -CT SNPGenotypeConcordance:qscore=5", 1, + Arrays.asList("9bcc83aadac00a160cef20e7126368ee")); + executeTest("testMulti", spec); + } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java index d00775a12..2e66a323b 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltrationIntegrationTest.java @@ -16,7 +16,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testNoAction() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("2408d449fbe7bf74099cc53d2d97c248")); + Arrays.asList("49817e684effce7b6f3d5776dc781988")); executeTest("test no action", spec); } @@ -24,7 +24,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testClusteredSnps() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -window 10 -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("58a2d4cd3d3ba1460833b45b9b8455c2")); + Arrays.asList("ee5e4e00bf25a912e8cab3e768fa0e7d")); executeTest("test clustered SNPs", spec); } @@ -32,7 +32,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testMask() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -mask foo -B mask,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("9fc2cb210b3595159f34ddfba5a2e572")); + Arrays.asList("77451c3ba8a070343e69157cdaf2be92")); executeTest("test mask", spec); } @@ -40,7 +40,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter1() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'DoC < 20 || FisherStrand > 20.0' -filterName foo -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("5effa4b4fdd4dd33a373561637a5d86e")); + Arrays.asList("3b44be79d676536bd6c0f32774091fee")); executeTest("test filter #1", spec); } @@ -48,7 +48,7 @@ public class VariantFiltrationIntegrationTest extends WalkerTest { public void testFilter2() { WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " -filter 'AlleleBalance < 70.0 && FisherStrand == 1.4' -filterName bar -B variant,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample2.vcf -L 1:10,020,000-10,021,000", 1, - Arrays.asList("5e0077148eda3b4274fbef1048902d47")); + Arrays.asList("baa425fa8e0761a05733a1a9d62d02ff")); executeTest("test filter #2", spec); } } \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 49a3bf108..07cc2371b 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -47,7 +47,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,400-10,024,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("ad7024c3c880a451d2f5537797b49beb")); + Arrays.asList("68fcdca40df72b3c703bab846c5f0bbd")); executeTest("testMultiSamplePilot1 - Point Estimate EM", spec); } @@ -55,7 +55,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2PointEM() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,010,000 -bm empirical -gm EM_POINT_ESTIMATE -confidence 30", 1, - Arrays.asList("7709af47dde8e127e0e36e86073e2cb1")); + Arrays.asList("eb0cd5494ae4c1781bfa00b6c4146993")); executeTest("testMultiSamplePilot2 - Point Estimate EM", spec); } @@ -68,7 +68,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testPooled1() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, - Arrays.asList("1905bc65b1abb56c776558d562de5ea1")); + Arrays.asList("ec1aeb69d7d54a7ced1ce625146d1d59")); executeTest("testPooled1", spec); } @@ -81,7 +81,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("aaecb9fe822a42576500a91973baff03")); + Arrays.asList("e27552dad05ddf17403aaa7176b9cfe2")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @@ -89,7 +89,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("d36a8ba5ddf1265ab5be2ed390fa56e1")); + Arrays.asList("3618711e41b7e37f47b995d39adbc76b")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @@ -97,7 +97,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R /broad/1KG/reference/human_b36_both.fasta -I /humgen/gsa-scr1/GATK_Data/Validation_Data/NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm empirical -gm JOINT_ESTIMATE -confidence 30", 1, - Arrays.asList("029706a60440660c6a636091e9489122")); + Arrays.asList("2a32add40319ab2de44951624df2be4b")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java index f8e94f0ed..89cb7ff5c 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/variantstovcf/VariantsToVCFIntegrationTest.java @@ -21,7 +21,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testVariantsToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("0b96a8046d2a06bd87f57df8bac1678d"); + md5.add("a94c15f2e8905fd3e98301375cf0f42a"); /** * the above MD5 was calculated from running the following command: @@ -50,7 +50,7 @@ public class VariantsToVCFIntegrationTest extends WalkerTest { @Test public void testGenotypesToVCFUsingGeliInput() { List md5 = new ArrayList(); - md5.add("09660faa7cfad8af36602f79461c0605"); + md5.add("6b18f33e25edbd2154c17a949656644b"); /** * the above MD5 was calculated from running the following command: diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java index 0d56101f3..ecf2c3a8e 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFHeaderTest.java @@ -18,7 +18,7 @@ import java.util.*; public class VCFHeaderTest extends BaseTest { private Set headerFields = new LinkedHashSet(); - private Map metaData = new HashMap(); + private Set metaData = new HashSet(); private Set additionalColumns = new HashSet(); /** @@ -26,8 +26,8 @@ public class VCFHeaderTest extends BaseTest { */ @Test public void testHeaderConstructor() { - metaData.put("format","VCRv3.2"); - metaData.put("two","2"); + metaData.add(VCFHeader.FULL_FORMAT_LINE); // required + metaData.add("two=2"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); // this should create a header that is valid @@ -40,12 +40,7 @@ public class VCFHeaderTest extends BaseTest { Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index],field); index++; } - index = 0; - for (String key: header.getMetaData().keySet()) { - Assert.assertEquals(header.getMetaData().get(key),metaData.get(key)); - index++; - } - Assert.assertEquals(metaData.size(),index); + Assert.assertEquals(metaData.size(), header.getMetaData().size()); index = 0; for (String key: header.getGenotypeSamples()) { Assert.assertTrue(additionalColumns.contains(key)); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java new file mode 100755 index 000000000..46cc91051 --- /dev/null +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFIntegrationTest.java @@ -0,0 +1,18 @@ +package org.broadinstitute.sting.utils.genotype.vcf; + +import org.broadinstitute.sting.WalkerTest; +import org.junit.Test; + +import java.util.Arrays; + +public class VCFIntegrationTest extends WalkerTest { + + @Test + public void test1() { + // Read in and then emit each record + WalkerTestSpec spec = new WalkerTestSpec( + "-T PrintRODs -R /broad/1KG/reference/human_b36_both.fasta -L 1:10,000,000-10,050,000 -o %s -B vcf,VCF,/humgen/gsa-scr1/GATK_Data/Validation_Data/complexExample.vcf", 1, + Arrays.asList("26ad7a663d0f247ac26ce5490edd7ec0")); + executeTest("test vcf", spec); + } +} \ No newline at end of file diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java index 15f32b443..a198ea16b 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFReaderTest.java @@ -20,7 +20,7 @@ public class VCFReaderTest extends BaseTest { private static final File vcfFile = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/vcfexample.vcf"); private static final File multiSampleVCF = new File("/humgen/gsa-scr1/GATK_Data/Validation_Data/MultiSample.vcf"); - private static final String VCF_MIXUP_FILE = "/humgen/gsa-scr1/GATK_Data/Validation_Data/mixedup.vcf"; + private static final String VCF_MIXUP_FILE = "/humgen/gsa-scr1/GATK_Data/Validation_Data/mixedup.v2.vcf"; private static IndexedFastaSequenceFile seq; diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java index 09a0c8ba7..537506413 100755 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFRecordTest.java @@ -140,10 +140,10 @@ public class VCFRecordTest extends BaseTest { * @return a fake VCF header */ public static VCFHeader createFakeHeader() { - Map metaData = new HashMap(); + Set metaData = new HashSet(); + metaData.add(VCFHeader.FULL_FORMAT_LINE); // required + metaData.add("two=2"); Set additionalColumns = new HashSet(); - metaData.put("format", "VCRv3.2"); // required - metaData.put("two", "2"); additionalColumns.add("FORMAT"); additionalColumns.add("sample1"); return new VCFHeader(metaData, additionalColumns); diff --git a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java index edfd3fca3..61b6da2c1 100644 --- a/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java +++ b/java/test/org/broadinstitute/sting/utils/genotype/vcf/VCFWriterTest.java @@ -22,7 +22,7 @@ import java.util.*; */ public class VCFWriterTest extends BaseTest { private Set headerFields = new LinkedHashSet(); - private Map metaData = new HashMap(); + private Set metaData = new HashSet(); private Set additionalColumns = new HashSet(); private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf"); @@ -62,9 +62,9 @@ public class VCFWriterTest extends BaseTest { * create a fake header of known quantity * @return a fake VCF header */ - public static VCFHeader createFakeHeader(Map metaData, Set additionalColumns) { - metaData.put("format", "VCRv3.2"); // required - metaData.put("two", "2"); + public static VCFHeader createFakeHeader(Set metaData, Set additionalColumns) { + metaData.add(VCFHeader.FULL_FORMAT_LINE); // required + metaData.add("two=2"); additionalColumns.add("FORMAT"); additionalColumns.add("extra1"); additionalColumns.add("extra2"); @@ -109,12 +109,7 @@ public class VCFWriterTest extends BaseTest { Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field); index++; } - index = 0; - for (String key : header.getMetaData().keySet()) { - Assert.assertEquals(header.getMetaData().get(key), metaData.get(key)); - index++; - } - Assert.assertEquals(metaData.size(), index); + Assert.assertEquals(metaData.size(), header.getMetaData().size()); index = 0; for (String key : header.getGenotypeSamples()) { Assert.assertTrue(additionalColumns.contains(key));