diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/GenotypeConcordance.java b/archive/java/src/org/broadinstitute/sting/vcftools/GenotypeConcordance.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/GenotypeConcordance.java rename to archive/java/src/org/broadinstitute/sting/vcftools/GenotypeConcordance.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFApplyCuts.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFApplyCuts.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFApplyCuts.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFApplyCuts.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFCallRates.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFCallRates.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFCallRates.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFHomogenizer.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFHomogenizer.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFHomogenizer.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFHomogenizer.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFMerge.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFMerge.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFMerge.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFMerge.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFOptimize.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFOptimize.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFOptimize.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFOptimize.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFSequenomAnalysis.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFSequenomAnalysis.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFSequenomAnalysis2.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFSequenomAnalysis2.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFSequenomAnalysis2.java diff --git a/java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java b/archive/java/src/org/broadinstitute/sting/vcftools/VCFTool.java similarity index 100% rename from java/src/org/broadinstitute/sting/playground/tools/vcf/VCFTool.java rename to archive/java/src/org/broadinstitute/sting/vcftools/VCFTool.java diff --git a/java/src/org/broad/tribble/vcf/VCFHeader.java b/java/src/org/broad/tribble/vcf/VCFHeader.java index 02544d94d..800056e16 100644 --- a/java/src/org/broad/tribble/vcf/VCFHeader.java +++ b/java/src/org/broad/tribble/vcf/VCFHeader.java @@ -53,7 +53,9 @@ public class VCFHeader { * @param genotypeSampleNames the genotype format field, and the sample names */ public VCFHeader(Set metaData, Set genotypeSampleNames) { - mMetaData = new TreeSet(metaData); + mMetaData = new TreeSet(); + if ( metaData != null ) + mMetaData.addAll(metaData); for (String col : genotypeSampleNames) { if (!col.equals("FORMAT")) mGenotypeSampleNames.add(col); diff --git a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java index 627651892..a0021187c 100644 --- a/java/src/org/broad/tribble/vcf/VCFHeaderLine.java +++ b/java/src/org/broad/tribble/vcf/VCFHeaderLine.java @@ -83,7 +83,7 @@ public class VCFHeaderLine implements Comparable { /** * Should be overloaded in sub classes to do subclass specific * - * @return + * @return the string encoding */ protected String toStringEncoding() { return mKey + "=" + mValue; @@ -99,6 +99,13 @@ public class VCFHeaderLine implements Comparable { return toString().compareTo(other.toString()); } + /** + * @param line the line + * @return true if the line is a VCF meta data line, or false if it is not + */ + public static boolean isHeaderLine(String line) { + return line != null && line.length() > 0 && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1)); + } /** * create a string of a mapping pair for the target VCF version diff --git a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java index ca2b32ffd..e86efa408 100755 --- a/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java +++ b/java/src/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContext.java @@ -163,6 +163,7 @@ import java.util.*; public class VariantContext implements Feature { // to enable tribble intergration protected InferredGeneticContext commonInfo = null; public final static double NO_NEG_LOG_10PERROR = InferredGeneticContext.NO_NEG_LOG_10PERROR; + public final static String REFERENCE_BASE_FOR_INDEL_KEY = "REFERENCE_BASE_FOR_INDEL"; /** The location of this VariantContext */ private GenomeLoc loc; @@ -913,6 +914,16 @@ public class VariantContext implements Feature { // to enable tribble intergrati // // --------------------------------------------------------------------------------------------------------- + // the indel base that gets stripped off for indels + public boolean hasReferenceBaseForIndel() { + return hasAttribute(REFERENCE_BASE_FOR_INDEL_KEY); + } + + // the indel base that gets stripped off for indels + public byte getReferenceBaseForIndel() { + return hasReferenceBaseForIndel() ? (Byte)getAttribute(REFERENCE_BASE_FOR_INDEL_KEY) : (byte)'N'; + } + private void determineType() { if ( type == null ) { switch ( getNAlleles() ) { diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java index 0cab18e38..c635cd0fc 100755 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/GenotypeWriterStorage.java @@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.genotype.*; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.StingException; +import org.broad.tribble.vcf.VCFHeader; /** * Provides temporary storage for GenotypeWriters. @@ -71,11 +72,11 @@ public abstract class GenotypeWriterStorage implements this.stream = null; writer = GenotypeWriterFactory.create(stub.getFormat(), file); Set samples = SampleUtils.getSAMFileSamples(stub.getSAMFileHeader()); - GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), samples, null); + GenotypeWriterFactory.writeHeader(writer, stub.getSAMFileHeader(), new VCFHeader(null, samples)); } - public void addCall(VariantContext vc, String refAllele) { - writer.addCall(vc,refAllele); + public void addCall(VariantContext vc, byte ref) { + writer.addCall(vc, ref); } public void close() { diff --git a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java index cd0f6cbef..d4edf7fdd 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java +++ b/java/src/org/broadinstitute/sting/gatk/io/storage/VCFGenotypeWriterStorage.java @@ -1,13 +1,10 @@ package org.broadinstitute.sting.gatk.io.storage; -import org.broad.tribble.vcf.VCFHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; import org.broadinstitute.sting.gatk.io.stubs.GenotypeWriterStub; -import org.broadinstitute.sting.utils.genotype.vcf.VCFReader; import java.io.File; -import java.util.Set; /** * Provides temporary and permanent storage for genotypes in VCF format. @@ -36,19 +33,18 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage sampleNames, Set headerInfo) { - ((VCFGenotypeWriter)writer).writeHeader(sampleNames,headerInfo); + public void writeHeader(VCFHeader header) { + ((VCFGenotypeWriter)writer).writeHeader(header); } /** - * Add a given VCF record to the given output. - * @param vcfRecord Record to add. + * Add a given VCF file to the writer. + * @param file file from which to add records */ - public void addRecord(VCFRecord vcfRecord) { - ((VCFGenotypeWriter)writer).addRecord(vcfRecord); + public void append(File file) { + ((VCFGenotypeWriter)writer).append(file); } /** @@ -56,11 +52,7 @@ public class VCFGenotypeWriterStorage extends GenotypeWriterStorage implements St /** * @{inheritDoc} */ - public void addCall(VariantContext vc, String refAllele) { - outputTracker.getStorage(this).addCall(vc,refAllele); + public void addCall(VariantContext vc, byte ref) { + outputTracker.getStorage(this).addCall(vc,ref); } /** diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java index d0cec6619..26b9be20e 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFGenotypeWriterStub.java @@ -1,14 +1,12 @@ package org.broadinstitute.sting.gatk.io.stubs; -import org.broad.tribble.vcf.VCFHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.vcf.VCFGenotypeWriter; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import java.io.File; import java.io.PrintStream; -import java.util.Set; /** * Stub providing a passthrough for VCF files. @@ -46,18 +44,17 @@ public class VCFGenotypeWriterStub extends GenotypeWriterStub /** * initialize this VCF header * - * @param sampleNames the sample names - * @param headerInfo the optional header fields + * @param header the header */ - public void writeHeader(Set sampleNames, Set headerInfo) { - outputTracker.getStorage(this).writeHeader(sampleNames,headerInfo); + public void writeHeader(VCFHeader header) { + outputTracker.getStorage(this).writeHeader(header); } /** - * Add a given VCF record to the given output. - * @param vcfRecord Record to add. + * Add a given VCF file to the writer. + * @param file file from which to add records */ - public void addRecord(VCFRecord vcfRecord) { - outputTracker.getStorage(this).addRecord(vcfRecord); + public void append(File file) { + outputTracker.getStorage(this).append(file); } } diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java index 2542e1bd0..03c0889c0 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/vcf4/VCF4Codec.java @@ -417,6 +417,9 @@ public class VCF4Codec implements FeatureCodec, NameAwareCodec { Set filters = parseFilters(filter); Map attributes = parseInfo(info, id); + // set the reference base for indels in the attributes + attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte((byte)ref.charAt(0))); + // find out our current location, and clip the alleles down to their minimum length Pair> locAndAlleles; if ( !isSingleNucleotideEvent(alleles) ) { diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java index 3adc29e5e..fb164bc9f 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/BatchedCallsMerger.java @@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFHeader; import java.util.*; @@ -108,7 +109,7 @@ public class BatchedCallsMerger extends LocusWalker imp UG_engine.samples = samples; // initialize the header - GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), samples, headerLines); + GenotypeWriterFactory.writeHeader(writer, GenomeAnalysisEngine.instance.getSAMFileHeader(), new VCFHeader(headerLines, samples)); } public VariantContext map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { @@ -175,7 +176,7 @@ public class BatchedCallsMerger extends LocusWalker imp return sum; try { - writer.addCall(value, new String(value.getReference().getBases())); + writer.addCall(value, value.getReference().getBases()[0]); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(e.getMessage() + "; this is often caused by using the --assume_single_sample_reads argument with the wrong sample name"); } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index e9f399766..0b151c101 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -123,7 +123,7 @@ public class UnifiedGenotyper extends LocusWalker getHeaderInfo() { @@ -211,7 +211,7 @@ public class UnifiedGenotyper extends LocusWalker sampleNames, - Set headerInfo) { + VCFHeader vcfHeader) { // VCF if ( writer instanceof VCFGenotypeWriter ) { - if ( headerInfo == null ) - headerInfo = new HashSet(VCFGenotypeRecord.getSupportedHeaderStrings(VCFHeaderVersion.VCF3_3)); - ((VCFGenotypeWriter)writer).writeHeader(sampleNames, headerInfo); + ((VCFGenotypeWriter)writer).writeHeader(vcfHeader); } // GELI else if ( writer instanceof GeliGenotypeWriter ) { diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java index 373a0ea00..779d6bfa5 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliAdapter.java @@ -107,9 +107,9 @@ public class GeliAdapter implements GeliGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add - * @param refAllele not used by this writer + * @param refBase not used by this writer */ - public void addCall(VariantContext vc, String refAllele) { + public void addCall(VariantContext vc, byte refBase) { if ( writer == null ) throw new IllegalStateException("The Geli Header must be written before calls can be added"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java index da79dcff3..5338a0ae3 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/geli/GeliTextWriter.java @@ -68,9 +68,9 @@ public class GeliTextWriter implements GeliGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add - * @param refAllele required by the inteface; not used by this writer. + * @param refBase required by the inteface; not used by this writer. */ - public void addCall(VariantContext vc, String refAllele) { + public void addCall(VariantContext vc, byte refBase) { char ref = vc.getReference().toString().charAt(0); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java index c5b26bb32..17b2b71bb 100755 --- a/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/glf/GLFWriter.java @@ -148,9 +148,9 @@ public class GLFWriter implements GLFGenotypeWriter { * Add a genotype, given a variant context * * @param vc the variant context representing the call to add - * @param refAllele not used by this writer + * @param refBase not used by this writer */ - public void addCall(VariantContext vc, String refAllele) { + public void addCall(VariantContext vc, byte refBase) { if ( headerText == null ) throw new IllegalStateException("The GLF Header must be written before calls can be added"); diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java index 63abb54fd..035d9e22f 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriter.java @@ -1,10 +1,9 @@ package org.broadinstitute.sting.utils.genotype.vcf; -import org.broad.tribble.vcf.VCFHeaderLine; -import org.broad.tribble.vcf.VCFRecord; +import org.broad.tribble.vcf.VCFHeader; import org.broadinstitute.sting.utils.genotype.GenotypeWriter; -import java.util.Set; +import java.io.File; /** * An extension of the GenotypeWriter interface with support @@ -17,14 +16,14 @@ public interface VCFGenotypeWriter extends GenotypeWriter { /** * initialize this VCF header * - * @param sampleNames the sample names - * @param headerInfo the optional header fields + * @param header the header */ - public void writeHeader(Set sampleNames, Set headerInfo); + public void writeHeader(VCFHeader header); /** - * Add a given VCF record to the given output. - * @param vcfRecord Record to add. + * Add a given VCF file to the writer. + * @param file file from which to add records */ - public void addRecord(VCFRecord vcfRecord); + public void append(File file); + } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java index c32f6c0f3..91008f7af 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFGenotypeWriterAdapter.java @@ -1,96 +1,127 @@ +/* + * Copyright (c) 2010. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + package org.broadinstitute.sting.utils.genotype.vcf; -import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils; -import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; +import org.broadinstitute.sting.utils.StingException; +import org.broad.tribble.vcf.VCFHeaderLine; +import org.broad.tribble.vcf.VCFHeader; +import org.broad.tribble.vcf.VCFFormatHeaderLine; +import org.broad.tribble.vcf.VCFCodec; +import org.broad.tribble.source.BasicFeatureSource; +import org.broad.tribble.index.Index; -import java.io.File; -import java.io.OutputStream; -import java.util.*; +import java.io.*; +import java.util.HashSet; +import java.util.Iterator; /** - * @author aaron, ebanks + * @author ebanks *

* Class VCFGenotypeWriterAdapter *

* Adapt the VCF writter to the genotype output system */ -public class VCFGenotypeWriterAdapter implements VCFGenotypeWriter { - - // our VCF objects - private VCFWriter mWriter = null; - private VCFHeader mHeader = null; - private final Set mSampleNames = new LinkedHashSet(); - - // our log, which we want to capture anything from this class - protected static Logger logger = Logger.getLogger(VCFGenotypeWriterAdapter.class); +public class VCFGenotypeWriterAdapter extends VCFWriter implements VCFGenotypeWriter { // allowed genotype format strings - private Set allowedGenotypeFormatStrings = null; + private HashSet allowedGenotypeFormatKeys = null; public VCFGenotypeWriterAdapter(File writeTo) { - if (writeTo == null) throw new RuntimeException("VCF output file must not be null"); - mWriter = new VCFWriter(writeTo); + super(writeTo); } public VCFGenotypeWriterAdapter(OutputStream writeTo) { - if (writeTo == null) throw new RuntimeException("VCF output stream must not be null"); - mWriter = new VCFWriter(writeTo); + super(writeTo); } - public void addRecord(VCFRecord vcfRecord) { - mWriter.addRecord(vcfRecord); + public void addCall(VariantContext vc, byte ref) { + vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys); + add(vc, new byte[]{ref}); } - /** - * initialize this VCF header - * - * @param sampleNames the sample names - * @param headerInfo the optional header fields - */ - public void writeHeader(Set sampleNames, Set headerInfo) { - mSampleNames.addAll(sampleNames); - - // set up the header fields - Set hInfo = new TreeSet(); - hInfo.add(new VCFHeaderLine(VCFHeaderVersion.VCF3_3.getFormatString(), VCFHeaderVersion.VCF3_3.getVersionString())); - - // set up the allowed genotype format fields - if ( headerInfo != null ) { - for ( VCFHeaderLine field : headerInfo ) { - hInfo.add(field); - if ( field instanceof VCFFormatHeaderLine) { - if ( allowedGenotypeFormatStrings == null ) - allowedGenotypeFormatStrings = new HashSet(); - allowedGenotypeFormatStrings.add(((VCFFormatHeaderLine)field).getName()); - } + public void writeHeader(VCFHeader header) { + for ( VCFHeaderLine field : header.getMetaData() ) { + if ( field instanceof VCFFormatHeaderLine ) { + if ( allowedGenotypeFormatKeys == null ) + allowedGenotypeFormatKeys = new HashSet(); + allowedGenotypeFormatKeys.add(((VCFFormatHeaderLine)field).getName()); } } - - // set up the sample names - mHeader = new VCFHeader(hInfo, mSampleNames); - mWriter.writeHeader(mHeader); + super.writeHeader(header); } - /** finish writing, closing any open files. */ - public void close() { - mWriter.close(); + public void append(File file) { + // if we don't need to restrict the FORMAT fields, then read blindly + if ( allowedGenotypeFormatKeys == null ) + blindAppend(file); + else + smartAppend(file); } - /** - * Add a genotype, given a variant context - * - * @param vc the variant context representing the call to add - * @param refAllele currently has to be a single character representing the reference base (the base - * immediately preceding the event in case of indels) - */ - public void addCall(VariantContext vc, String refAllele) { - if ( mHeader == null ) - throw new IllegalStateException("The VCF Header must be written before records can be added"); + private void blindAppend(File file) { + try { + BufferedReader reader = new BufferedReader(new FileReader(file)); + String line = reader.readLine(); + while ( line != null ) { + if ( !VCFHeaderLine.isHeaderLine(line) ) { + mWriter.write(line); + mWriter.write("\n"); + } + line = reader.readLine(); + } + + reader.close(); + } catch (IOException e) { + throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e); + } + } + + private void smartAppend(File file) { + try { + VCFCodec codec = new VCFCodec(); + Index index = TribbleRMDTrackBuilder.loadIndex(file, codec, false); + BasicFeatureSource vcfReader = new BasicFeatureSource(file.getAbsolutePath(),index,codec); + Iterator iterator = vcfReader.iterator(); + while ( iterator.hasNext() ) { + VariantContext vc = iterator.next(); + vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatKeys); + add(vc, new byte[]{vc.getReferenceBaseForIndel()}); + } + vcfReader.close(); + } catch (FileNotFoundException e) { + throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e); + } catch (IOException e) { + throw new StingException("Error reading file " + file + " in VCFGenotypeWriter: ", e); + } + + - vc = VariantContextUtils.purgeUnallowedGenotypeAttributes(vc, allowedGenotypeFormatStrings); - mWriter.add(vc, new byte[]{(byte)refAllele.charAt(0)}); } } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java index 109e54408..c125451f9 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFReader.java @@ -10,19 +10,19 @@ import org.broad.tribble.index.Index; import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.vcf.*; import org.broadinstitute.sting.gatk.refdata.tracks.builders.TribbleRMDTrackBuilder; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; import org.broadinstitute.sting.utils.StingException; -/** The VCFReader class, which given a valid vcf file, parses out the header and VCF records */ -public class VCFReader implements Iterator, Iterable { +/** The VCFReader class, which given a valid vcf file, parses out the header and VariantContexts */ +public class VCFReader implements Iterator, Iterable { // our VCF header private VCFHeader mHeader; // our iterator - private Iterator iterator; + private Iterator iterator; - // our feature reader; so we can close it - private FeatureSource vcfReader = null; + private FeatureSource vcfReader = null; /** * Create a VCF reader, given a VCF file @@ -37,6 +37,7 @@ public class VCFReader implements Iterator, Iterable { * Create a VCF reader, given a VCF file * * @param vcfFile the vcf file to write + * @param createIndexOnDisk do we need to create an index on disk? */ public VCFReader(File vcfFile, boolean createIndexOnDisk) { initialize(vcfFile, null, createIndexOnDisk); @@ -46,6 +47,7 @@ public class VCFReader implements Iterator, Iterable { * Create a VCF reader, given a VCF file * * @param vcfFile the vcf file to write + * @param transform the line transformer to use, if any */ public VCFReader(File vcfFile, VCFCodec.LineTransform transform) { initialize(vcfFile, transform, true); @@ -79,7 +81,7 @@ public class VCFReader implements Iterator, Iterable { * @return an instance of an index */ private Index createIndex(File vcfFile, boolean createIndexOnDisk) { - Index index = null; + Index index; try { index = TribbleRMDTrackBuilder.loadIndex(vcfFile, new VCFCodec(), createIndexOnDisk); } catch (IOException e) { @@ -96,11 +98,11 @@ public class VCFReader implements Iterator, Iterable { } /** - * return the next available VCF record. Make sure to check availability with a call to hasNext! + * return the next available VariantContext. Make sure to check availability with a call to hasNext! * - * @return a VCFRecord, representing the next record in the file + * @return a VariantContext, representing the next record in the file */ - public VCFRecord next() { + public VariantContext next() { return iterator.next(); } @@ -116,7 +118,7 @@ public class VCFReader implements Iterator, Iterable { return this.mHeader; } - public Iterator iterator() { + public Iterator iterator() { return this; } diff --git a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java index e4920f915..c7878e7ec 100644 --- a/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java +++ b/java/src/org/broadinstitute/sting/utils/genotype/vcf/VCFWriter.java @@ -21,13 +21,13 @@ import java.util.*; public class VCFWriter { // the VCF header we're storing - private VCFHeader mHeader = null; + protected VCFHeader mHeader = null; // the print stream we're writting to - private BufferedWriter mWriter; + protected BufferedWriter mWriter; // were filters applied? - private boolean filtersWereAppliedToContext = false; + protected boolean filtersWereAppliedToContext = false; /** * create a VCF writer, given a file to write to @@ -164,7 +164,7 @@ public class VCFWriter { mWriter.write(VCFConstants.FIELD_SEPARATOR); // ALT - if ( vc.getAlternateAlleles().size() > 0 ) { + if ( vc.isVariant() ) { Allele altAllele = vc.getAlternateAllele(0); alleleMap.put(altAllele, "1"); String alt = makeAlleleString(altAllele, vc.isIndel(), refBases[0]); @@ -198,7 +198,7 @@ public class VCFWriter { Map infoFields = new TreeMap(); for ( Map.Entry field : vc.getAttributes().entrySet() ) { String key = field.getKey(); - if ( key.equals("ID") ) + if ( key.equals("ID") || key.equals(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY) ) continue; String outputValue = formatVCFField(field.getValue()); diff --git a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java index aa54609a3..07661ab89 100755 --- a/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/contexts/variantcontext/VariantContextIntegrationTest.java @@ -18,11 +18,11 @@ public class VariantContextIntegrationTest extends WalkerTest { static HashMap expectations = new HashMap(); static { - expectations.put("-L 1:1-10000 --printPerLocus", "63fd69e4ab430b79fb213dd27b58ae1c"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "276ed96efaaffc2fc1c3b3deb4e04d1d"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "a37f7bc34c1824688d3e475945c19d5a"); - expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "1715a6e0daf873f2e2cd10cb56085174"); - expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "bf33ab1ed65da7f56c02ca7956d9c31e"); + expectations.put("-L 1:1-10000 --printPerLocus", "cdd4cb53670a6c0f26e21bc220579654"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly", "7e1d9e181cc489aff847528664cf35bf"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsStartinAtCurrentPosition", "2226dc7ec9a21d8f0e86dc1b934b1d3e"); + expectations.put("-L 1:1-10000 --printPerLocus --takeFirstOnly --onlyContextsStartinAtCurrentPosition", "272a9dff802d1d9f391ad53bc8c23da8"); + expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType SNP", "7444609b931d10cfc1fdccca9b4a2ab5"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL", "629ffd6b3b9ea1bce29cb715576f5c8a"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType INDEL --onlyContextsStartinAtCurrentPosition", "d4b812b2fec231f8f5b61d6f26cf86a5"); expectations.put("-L 1:1-10000 --printPerLocus --onlyContextsOfType MIXED", "546e8e546f2cdfba31f91ed083137c42"); diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java index ee7c0d784..07139898b 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/VariantContextAdaptorsUnitTest.java @@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.refdata; import edu.mit.broad.picard.genotype.geli.GeliFileReader; import edu.mit.broad.picard.genotype.geli.GenotypeLikelihoods; -import net.sf.samtools.SAMFileReader; import net.sf.samtools.util.CloseableIterator; import net.sf.picard.reference.IndexedFastaSequenceFile; import org.broad.tribble.gelitext.GeliTextCodec; @@ -15,8 +14,6 @@ import org.broadinstitute.sting.utils.genotype.GenotypeWriter; import org.broadinstitute.sting.utils.genotype.GenotypeWriterFactory; import org.broadinstitute.sting.utils.genotype.geli.GeliGenotypeWriter; import org.broadinstitute.sting.utils.genotype.geli.GeliTextWriter; -import org.broadinstitute.sting.utils.genotype.glf.GLFSingleCall; -import org.broadinstitute.sting.utils.genotype.glf.GLFWriter; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -81,7 +78,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { geliText = (GeliTextFeature)codec.decode(line); records.add(geliText); // we know they're all single calls in the reference file VariantContext vc = VariantContextAdaptors.toVariantContext("Geli",geliText, null); - if (vc != null) gw.addCall(vc,null); + if (vc != null) gw.addCall(vc,(byte)' '); line = readLine(reader); } gw.close(); // close the file @@ -144,7 +141,7 @@ public class VariantContextAdaptorsUnitTest extends BaseTest { rodGELI gel = new rodGELI("myROD",iterator.next()); records.add(gel); VariantContext vc = VariantContextAdaptors.toVariantContext("myROD",gel, null); - if (vc != null) gw.addCall(vc,null); + if (vc != null) gw.addCall(vc,(byte)' '); } iterator.close(); gw.close(); // close the file diff --git a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java index 2d68a9e5d..158ac9602 100644 --- a/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java +++ b/java/test/org/broadinstitute/sting/gatk/refdata/tracks/builders/TribbleRMDTrackBuilderUnitTest.java @@ -60,7 +60,7 @@ public class TribbleRMDTrackBuilderUnitTest extends BaseTest { Assert.assertTrue(classes.size() > 0); } - @Test + //@Test public void testBuilderIndexUnwriteable() { File vcfFile = new File(validationDataLocation + "/ROD_validation/relic.vcf"); try { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 0a6f2e660..9b1074146 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -61,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { // // -------------------------------------------------------------------------------------------------------------- - //@Test + @Test public void testParallelization() { String md5 = "fc5798b2ef700e60fa032951bab9607d"; @@ -73,7 +73,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1, Arrays.asList(md5)); - executeTest("test parallelization (multithread)", spec2); + executeTest("test parallelization (2 threads)", spec2); + + WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( + "-T UnifiedGenotyper -R " + oneKGLocation + "reference/human_b36_both.fasta -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 4", 1, + Arrays.asList(md5)); + executeTest("test parallelization (4 threads)", spec3); } // --------------------------------------------------------------------------------------------------------------