From a9a1c499fd7ec9295d488d5c272337dad852703a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 09:28:03 -0400 Subject: [PATCH 1/7] Update md5 in VariantRecalibrationWalkers test for BCF2 -- only encoding differences --- .../VariantRecalibrationWalkersIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java index d1ecbb0bf..b780bcd00 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrationWalkersIntegrationTest.java @@ -76,7 +76,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest { VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches - "1cdf8c9ee77d91d1ba7f002573108bad", // recal file + "74c10fc15f9739a938b7138909fbde04", // recal file "62fda105e14b619a1c263855cf56af1d"); // cut VCF @DataProvider(name = "VRBCFTest") From 9dc694b2e9fe4a476636d751297d2bed26e783df Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 10:01:10 -0400 Subject: [PATCH 2/7] Meaningful error message and keeping tmp file when mergeInfo fails -- BCF2 is failing for some reason when merging tmp. files with parallel combine variants. ThreadLocalOutputTracker no longer sets deleteOnExit on the tmp file, as this prevents debugging. And it's unnecessary because each mergeInto was deleting files as appropriate -- MergeInfo in VariantContextWriterStorage only deletes the intermediate output if an error occurs --- .../broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java | 2 +- .../sting/gatk/io/storage/VariantContextWriterStorage.java | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java index 999deddd1..636787c69 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/ThreadLocalOutputTracker.java @@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker { try { tempFile = File.createTempFile( stub.getClass().getName(), null ); - tempFile.deleteOnExit(); + //tempFile.deleteOnExit(); } catch( IOException ex ) { throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() ); diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 161179f84..72f8581dd 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -194,6 +194,9 @@ public class VariantContextWriterStorage implements Storage Date: Thu, 16 Aug 2012 10:53:22 -0400 Subject: [PATCH 3/7] Cleanup BCF2Codec -- Remove FORBID_SYMBOLIC global that is no longer necessary -- all error handling goes via error() function --- .../sting/utils/codecs/bcf2/BCF2Codec.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index ac6348f80..fc0b3c4a9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -51,7 +51,6 @@ import java.util.Map; */ public final class BCF2Codec implements FeatureCodec { final protected static Logger logger = Logger.getLogger(BCF2Codec.class); - private final static boolean FORBID_SYMBOLICS = false; private final static int ALLOWED_MAJOR_VERSION = 2; private final static int MIN_MINOR_VERSION = 1; @@ -178,7 +177,7 @@ public final class BCF2Codec implements FeatureCodec { contigNames.add(contig.getID()); } } else { - throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header"); + error("Didn't find any contig lines in BCF2 file header"); } // create the string dictionary @@ -271,7 +270,7 @@ public final class BCF2Codec implements FeatureCodec { final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) - throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " + + error("GATK currently doesn't support reading BCF2 files with " + "different numbers of samples per record. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); @@ -343,9 +342,6 @@ public final class BCF2Codec implements FeatureCodec { if ( isRef ) ref = alleleBases; alleles.add(allele); - - if ( FORBID_SYMBOLICS && allele.isSymbolic() ) - throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles"); } assert ref != null; @@ -496,7 +492,7 @@ public final class BCF2Codec implements FeatureCodec { return gtFieldDecoders.getDecoder(field); } - private final void error(final String message) throws RuntimeException { + private void error(final String message) throws RuntimeException { throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos)); } } From 7a247df922d7cdea7f3348f3b1c3737e92d2df6b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 10:54:52 -0400 Subject: [PATCH 4/7] Added -bcf argument to VCFWriter output to force BCF regardless of file extension -- Now possible to do -o /dev/stdout -bcf -l DEBUG > tmp.bcf and create a valid BCF2 file -- Cleanup code to make sure extensions easier by moving to a setX model in VariantContextWriterStub --- .../VCFWriterArgumentTypeDescriptor.java | 53 ++++++++++++++----- .../io/stubs/VariantContextWriterStub.java | 45 +++++++++------- 2 files changed, 66 insertions(+), 32 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index 09766f127..5e1132d45 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -47,6 +47,7 @@ import java.util.List; public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; public static final String SITES_ONLY_ARG_NAME = "sites_only"; + public static final String FORCE_BCF = "bcf"; public static final HashSet SUPPORTED_ZIPPED_SUFFIXES = new HashSet(); // @@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { @Override public List createArgumentDefinitions( ArgumentSource source ) { - return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition()); + return Arrays.asList( + createDefaultArgumentDefinition(source), + createNoCommandLineHeaderArgumentDefinition(), + createSitesOnlyArgumentDefinition(), + createBCFArgumentDefinition() ); } /** @@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { if(!source.isRequired()) throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); - VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); + VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); engine.addOutput(stub); return stub; } @@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { if(writerFile == null && !source.isRequired()) throw new MissingArgumentValueException(defaultArgumentDefinition); - // Should we compress the output stream? - boolean compress = isCompressed(writerFileName); - - boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches); - boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches); - // Create a stub for the given object. - VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes) - : new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes); + final VariantContextWriterStub stub = (writerFile != null) + ? new VariantContextWriterStub(engine, writerFile, argumentSources) + : new VariantContextWriterStub(engine, defaultOutputStream, argumentSources); + + stub.setCompressed(isCompressed(writerFileName)); + stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches)); + stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches)); + stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches)); // WARNING: Side effects required by engine! parsingEngine.addTags(stub,getArgumentTags(matches)); @@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { } /** - * Creates the optional compression level argument for the BAM file. - * @return Argument definition for the BAM file itself. Will not be null. + * Creates the optional no_header argument for the VCF file. + * @return Argument definition for the VCF file itself. Will not be null. */ private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() { return new ArgumentDefinition( ArgumentIOType.ARGUMENT, @@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { } /** - * Creates the optional compression level argument for the BAM file. - * @return Argument definition for the BAM file itself. Will not be null. + * Creates the optional sites_only argument definition + * @return Argument definition for the VCF file itself. Will not be null. */ private ArgumentDefinition createSitesOnlyArgumentDefinition() { return new ArgumentDefinition( ArgumentIOType.ARGUMENT, @@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { null ); } + /** + * Creates the optional bcf argument definition + * @return Argument definition for the VCF file itself. Will not be null. + */ + private ArgumentDefinition createBCFArgumentDefinition() { + return new ArgumentDefinition( ArgumentIOType.ARGUMENT, + boolean.class, + FORCE_BCF, + FORCE_BCF, + "force BCF output, regardless of the file's extension", + false, + true, + false, + true, + null, + null, + null, + null ); + } + /** * Returns true if the file will be compressed. * @param writerFileName Name of the file diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java index bea7172ea..260a7efda 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java @@ -79,7 +79,7 @@ public class VariantContextWriterStub implements Stub, Var /** * Should we emit a compressed output stream? */ - private final boolean isCompressed; + private boolean isCompressed = false; /** * A hack: push the argument sources into the VCF header so that the VCF header @@ -90,12 +90,17 @@ public class VariantContextWriterStub implements Stub, Var /** * Should the header be written out? A hidden argument. */ - private final boolean skipWritingCommandLineHeader; + private boolean skipWritingCommandLineHeader = false; /** * Should we not write genotypes even when provided? */ - private final boolean doNotWriteGenotypes; + private boolean doNotWriteGenotypes = false; + + /** + * Should we force BCF writing regardless of the file extension? + */ + private boolean forceBCF = false; /** * Connects this stub with an external stream capable of serving the @@ -108,19 +113,13 @@ public class VariantContextWriterStub implements Stub, Var * * @param engine engine. * @param genotypeFile file to (ultimately) create. - * @param isCompressed should we compress the output stream? * @param argumentSources sources. - * @param skipWritingCommandLineHeader skip writing header. - * @param doNotWriteGenotypes do not write genotypes. */ - public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { + public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection argumentSources) { this.engine = engine; this.genotypeFile = genotypeFile; this.genotypeStream = null; - this.isCompressed = isCompressed; this.argumentSources = argumentSources; - this.skipWritingCommandLineHeader = skipWritingCommandLineHeader; - this.doNotWriteGenotypes = doNotWriteGenotypes; } /** @@ -128,19 +127,13 @@ public class VariantContextWriterStub implements Stub, Var * * @param engine engine. * @param genotypeStream stream to (ultimately) write. - * @param isCompressed should we compress the output stream? * @param argumentSources sources. - * @param skipWritingCommandLineHeader skip writing header. - * @param doNotWriteGenotypes do not write genotypes. */ - public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { + public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection argumentSources) { this.engine = engine; this.genotypeFile = null; this.genotypeStream = new PrintStream(genotypeStream); - this.isCompressed = isCompressed; this.argumentSources = argumentSources; - this.skipWritingCommandLineHeader = skipWritingCommandLineHeader; - this.doNotWriteGenotypes = doNotWriteGenotypes; } /** @@ -167,6 +160,22 @@ public class VariantContextWriterStub implements Stub, Var return isCompressed; } + public void setCompressed(boolean compressed) { + isCompressed = compressed; + } + + public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) { + this.skipWritingCommandLineHeader = skipWritingCommandLineHeader; + } + + public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) { + this.doNotWriteGenotypes = doNotWriteGenotypes; + } + + public void setForceBCF(boolean forceBCF) { + this.forceBCF = forceBCF; + } + /** * Gets the master sequence dictionary from the engine associated with this stub * @link GenomeAnalysisEngine.getMasterSequenceDictionary @@ -187,7 +196,7 @@ public class VariantContextWriterStub implements Stub, Var if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); - if ( getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile()) ) + if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) ) options.add(Options.FORCE_BCF); return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); From 52bfe8db8a7e472f2282be89372b0c10021ab10d Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 10:56:07 -0400 Subject: [PATCH 5/7] Make sure the storage writer is closed before running mergeInfo in multi-threaded output management -- It's not clear this is cause of GSA-484 but it will help confirm that it's not the cause --- .../sting/gatk/io/storage/VariantContextWriterStorage.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java index 72f8581dd..0f5290db7 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VariantContextWriterStorage.java @@ -61,6 +61,7 @@ public class VariantContextWriterStorage implements Storage Date: Thu, 16 Aug 2012 12:39:54 -0400 Subject: [PATCH 6/7] GSA-485: Remove repairVCFHeader from GATK codebase -- Removed half-a*ssed attempt to automatically repair VCF files with bad headers, which allowed users to provide a replacement header overwriting the file's actually header on the fly. Not a good idea, really. Eric has promised to create a utility that walks through a VCF file and creates a meaningful header field based on the file's contents (if this ever becomes a priority) --- .../sting/gatk/GenomeAnalysisEngine.java | 15 ++--------- .../arguments/GATKArgumentCollection.java | 9 ------- .../gatk/refdata/tracks/FeatureManager.java | 8 ++---- .../gatk/refdata/tracks/RMDTrackBuilder.java | 16 +---------- .../sting/utils/codecs/vcf/VCFCodec.java | 27 ------------------- .../variantcontext/writer/VCFWriter.java | 2 +- .../utils/codecs/vcf/VCFIntegrationTest.java | 7 +---- 7 files changed, 7 insertions(+), 77 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 56fcf0652..55107833d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -849,20 +849,9 @@ public class GenomeAnalysisEngine { SAMSequenceDictionary sequenceDictionary, GenomeLocParser genomeLocParser, ValidationExclusion.TYPE validationExclusionType) { - VCFHeader header = null; - if ( getArguments().repairVCFHeader != null ) { - try { - final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader)); - header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue(); - pbs.close(); - } catch ( IOException e ) { - throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e); - } - } + final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType); - RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType); - - List dataSources = new ArrayList(); + final List dataSources = new ArrayList(); for (RMDTriplet fileDescriptor : referenceMetaDataFiles) dataSources.add(new ReferenceOrderedDataSource(fileDescriptor, builder, diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java index 4c9235b58..06177868a 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java @@ -384,14 +384,5 @@ public class GATKArgumentCollection { @Hidden public boolean USE_SLOW_GENOTYPES = false; // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed - - /** - * The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file - * and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other - * VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these - * to fill in any missing ones in input VCF files. - */ - @Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false) - public File repairVCFHeader = null; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java index b5d5deedb..a2fe94641 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/FeatureManager.java @@ -85,18 +85,16 @@ public class FeatureManager { private final PluginManager pluginManager; private final Collection featureDescriptors = new TreeSet(); - private final VCFHeader headerForRepairs; private final boolean lenientVCFProcessing; /** * Construct a FeatureManager without a master VCF header */ public FeatureManager() { - this(null, false); + this(false); } - public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) { - this.headerForRepairs = headerForRepairs; + public FeatureManager(final boolean lenientVCFProcessing) { this.lenientVCFProcessing = lenientVCFProcessing; pluginManager = new PluginManager(FeatureCodec.class, "Codecs", "Codec"); @@ -255,8 +253,6 @@ public class FeatureManager { ((NameAwareCodec)codex).setName(name); if ( codex instanceof ReferenceDependentFeatureCodec ) ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); - if ( codex instanceof VCFCodec ) - ((VCFCodec)codex).setHeaderForRepairs(headerForRepairs); if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing ) ((AbstractVCFCodec)codex).disableOnTheFlyModifications(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java index e183fe169..81fe73075 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java +++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java @@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager { * please talk through your approach with the SE team. * @param dict Sequence dictionary to use. * @param genomeLocParser Location parser to use. - * @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification. */ public RMDTrackBuilder(final SAMSequenceDictionary dict, final GenomeLocParser genomeLocParser, - final VCFHeader headerForRepairs, ValidationExclusion.TYPE validationExclusionType) { this.dict = dict; this.validationExclusionType = validationExclusionType; this.genomeLocParser = genomeLocParser; - this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); + this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); } /** @@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager { return featureManager; } - /** - * Same as full constructor but makes one without a header for repairs - * @param dict - * @param genomeLocParser - * @param validationExclusionType - */ - public RMDTrackBuilder(final SAMSequenceDictionary dict, - final GenomeLocParser genomeLocParser, - ValidationExclusion.TYPE validationExclusionType) { - this(dict, genomeLocParser, null, validationExclusionType); - } - /** * create a RMDTrack of the specified type * diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java index da5b18831..4df1efee7 100755 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFCodec.java @@ -49,13 +49,6 @@ public class VCFCodec extends AbstractVCFCodec { // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; - /** - * A VCF header the contains master info/filter/format records that we use to 'fill in' - * any missing records from our input VCF header. This allows us to repair headers on - * the fly - */ - private VCFHeader headerForRepairs = null; - /** * @param reader the line reader to take header lines from * @return the number of header lines @@ -88,8 +81,6 @@ public class VCFCodec extends AbstractVCFCodec { } headerStrings.add(line); super.parseHeaderFromLines(headerStrings, version); - if ( headerForRepairs != null ) - this.header = repairHeader(this.header, headerForRepairs); return this.header; } else { @@ -103,24 +94,6 @@ public class VCFCodec extends AbstractVCFCodec { throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file"); } - private final VCFHeader repairHeader(final VCFHeader readHeader, final VCFHeader masterHeader) { - final Set lines = VCFUtils.smartMergeHeaders(Arrays.asList(readHeader, masterHeader), log); - return new VCFHeader(lines, readHeader.getGenotypeSamples()); - } - - /** - * Tells this VCFCodec to repair the incoming header files with the information in masterHeader - * - * @param headerForRepairs - */ - public void setHeaderForRepairs(final VCFHeader headerForRepairs) { - if ( headerForRepairs != null ) - log.info("Using master VCF header to repair missing files from incoming VCFs"); - this.headerForRepairs = headerForRepairs; - } - - - /** * parse the filter string, first checking to see if we already have parsed it in a previous attempt * diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index ea968e153..db74f2263 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -563,6 +563,6 @@ class VCFWriter extends IndexingVariantContextWriter { + " at " + vc.getChr() + ":" + vc.getStart() + " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have" + " complete VCF headers by default. This error can be disabled with the engine argument" - + " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader"); + + " -U LENIENT_VCF_PROCESSING"); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index 71fc1d464..b2a4ac2da 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -92,7 +92,7 @@ public class VCFIntegrationTest extends WalkerTest { // // - // Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working + // Tests to ensure that -U LENIENT_VCF_PROCESS // // @@ -106,11 +106,6 @@ public class VCFIntegrationTest extends WalkerTest { runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true); } - @Test - public void testPassingOnVCFWithoutHeadersRepairingHeaders() { - runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false); - } - private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) { final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf"; final String baseCommand = "-R " + b37KGReference From 132cdfd9c16efd506b448f1ceb315b0240393f4b Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 16 Aug 2012 13:00:35 -0400 Subject: [PATCH 7/7] GSA-488: MLEAC > AN error when running variant eval fixed --- .../varianteval/stratifications/AlleleCount.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java index 50c5526e4..00a593768 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java @@ -19,6 +19,8 @@ import java.util.*; * it computes the AC from the genotypes themselves. If no AC can be computed, 0 is used. */ public class AlleleCount extends VariantStratifier { + int nchrom; + @Override public void initialize() { // we can only work with a single eval VCF, and it must have genotypes @@ -26,7 +28,8 @@ public class AlleleCount extends VariantStratifier { throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf"); // There are 2 x n sample chromosomes for diploids - int nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2; + // TODO -- generalize to handle multiple ploidy + nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2; if ( nchrom < 2 ) throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample"); @@ -52,8 +55,10 @@ public class AlleleCount extends VariantStratifier { } // make sure that the AC isn't invalid - if ( AC > eval.getCalledChrCount() ) - throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d is larger than the possible called chromosome count (%d)", AC, eval.getChr(), eval.getStart(), eval.getCalledChrCount())); + if ( AC > nchrom ) + throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d " + + "is larger than the number of chromosomes over all samples (%d)", AC, + eval.getChr(), eval.getStart(), nchrom)); return Collections.singletonList((Object) AC); } else {