Merge branch 'master' of ssh://gsa2.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Eric Banks 2012-08-16 13:05:36 -04:00
commit 611d9b61e2
14 changed files with 93 additions and 121 deletions

View File

@ -849,20 +849,9 @@ public class GenomeAnalysisEngine {
SAMSequenceDictionary sequenceDictionary, SAMSequenceDictionary sequenceDictionary,
GenomeLocParser genomeLocParser, GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) { ValidationExclusion.TYPE validationExclusionType) {
VCFHeader header = null; final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);
if ( getArguments().repairVCFHeader != null ) {
try {
final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
pbs.close();
} catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
}
}
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType); final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles) for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
dataSources.add(new ReferenceOrderedDataSource(fileDescriptor, dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
builder, builder,

View File

@ -384,14 +384,5 @@ public class GATKArgumentCollection {
@Hidden @Hidden
public boolean USE_SLOW_GENOTYPES = false; public boolean USE_SLOW_GENOTYPES = false;
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed // TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
/**
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
* VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these
* to fill in any missing ones in input VCF files.
*/
@Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
public File repairVCFHeader = null;
} }

View File

@ -119,7 +119,7 @@ public class ThreadLocalOutputTracker extends OutputTracker {
try { try {
tempFile = File.createTempFile( stub.getClass().getName(), null ); tempFile = File.createTempFile( stub.getClass().getName(), null );
tempFile.deleteOnExit(); //tempFile.deleteOnExit();
} }
catch( IOException ex ) { catch( IOException ex ) {
throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() ); throw new UserException.BadTmpDir("Unable to create temporary file for stub: " + stub.getClass().getName() );

View File

@ -61,6 +61,7 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
protected final File file; protected final File file;
protected OutputStream stream; protected OutputStream stream;
protected final VariantContextWriter writer; protected final VariantContextWriter writer;
boolean closed = false;
/** /**
* Constructs an object which will write directly into the output file provided by the stub. * Constructs an object which will write directly into the output file provided by the stub.
@ -172,10 +173,13 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
if(file != null) if(file != null)
logger.debug("Closing temporary file " + file.getAbsolutePath()); logger.debug("Closing temporary file " + file.getAbsolutePath());
writer.close(); writer.close();
closed = true;
} }
public void mergeInto(VariantContextWriterStorage target) { public void mergeInto(VariantContextWriterStorage target) {
try { try {
if ( ! closed )
throw new ReviewedStingException("Writer not closed, but we are merging into the file!");
final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin"; final String targetFilePath = target.file != null ? target.file.getAbsolutePath() : "/dev/stdin";
logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath)); logger.debug(String.format("Merging %s into %s",file.getAbsolutePath(),targetFilePath));
@ -194,6 +198,9 @@ public class VariantContextWriterStorage implements Storage<VariantContextWriter
} }
source.close(); source.close();
file.delete(); // this should be last to aid in debugging when the process fails
} catch (UserException e) {
throw new ReviewedStingException("BUG: intermediate file " + file + " is malformed, got error while reading", e);
} catch (IOException e) { } catch (IOException e) {
throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e); throw new UserException.CouldNotReadInputFile(file, "Error reading file in VCFWriterStorage: ", e);
} }

View File

@ -47,6 +47,7 @@ import java.util.List;
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header"; public static final String NO_HEADER_ARG_NAME = "no_cmdline_in_header";
public static final String SITES_ONLY_ARG_NAME = "sites_only"; public static final String SITES_ONLY_ARG_NAME = "sites_only";
public static final String FORCE_BCF = "bcf";
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>(); public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
// //
@ -96,7 +97,11 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) { public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
return Arrays.asList( createDefaultArgumentDefinition(source), createNoCommandLineHeaderArgumentDefinition(),createSitesOnlyArgumentDefinition()); return Arrays.asList(
createDefaultArgumentDefinition(source),
createNoCommandLineHeaderArgumentDefinition(),
createSitesOnlyArgumentDefinition(),
createBCFArgumentDefinition() );
} }
/** /**
@ -117,7 +122,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); VariantContextWriterStub stub = new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
engine.addOutput(stub); engine.addOutput(stub);
return stub; return stub;
} }
@ -141,15 +146,15 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if(writerFile == null && !source.isRequired()) if(writerFile == null && !source.isRequired())
throw new MissingArgumentValueException(defaultArgumentDefinition); throw new MissingArgumentValueException(defaultArgumentDefinition);
// Should we compress the output stream?
boolean compress = isCompressed(writerFileName);
boolean skipWritingCmdLineHeader = argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches);
boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches);
// Create a stub for the given object. // Create a stub for the given object.
VariantContextWriterStub stub = (writerFile != null) ? new VariantContextWriterStub(engine, writerFile, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes) final VariantContextWriterStub stub = (writerFile != null)
: new VariantContextWriterStub(engine, defaultOutputStream, compress, argumentSources, skipWritingCmdLineHeader, doNotWriteGenotypes); ? new VariantContextWriterStub(engine, writerFile, argumentSources)
: new VariantContextWriterStub(engine, defaultOutputStream, argumentSources);
stub.setCompressed(isCompressed(writerFileName));
stub.setDoNotWriteGenotypes(argumentIsPresent(createSitesOnlyArgumentDefinition(),matches));
stub.setSkipWritingCommandLineHeader(argumentIsPresent(createNoCommandLineHeaderArgumentDefinition(),matches));
stub.setForceBCF(argumentIsPresent(createBCFArgumentDefinition(),matches));
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(stub,getArgumentTags(matches)); parsingEngine.addTags(stub,getArgumentTags(matches));
@ -159,8 +164,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
/** /**
* Creates the optional compression level argument for the BAM file. * Creates the optional no_header argument for the VCF file.
* @return Argument definition for the BAM file itself. Will not be null. * @return Argument definition for the VCF file itself. Will not be null.
*/ */
private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() { private ArgumentDefinition createNoCommandLineHeaderArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT, return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -179,8 +184,8 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
/** /**
* Creates the optional compression level argument for the BAM file. * Creates the optional sites_only argument definition
* @return Argument definition for the BAM file itself. Will not be null. * @return Argument definition for the VCF file itself. Will not be null.
*/ */
private ArgumentDefinition createSitesOnlyArgumentDefinition() { private ArgumentDefinition createSitesOnlyArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT, return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
@ -198,6 +203,26 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
null ); null );
} }
/**
* Creates the optional bcf argument definition
* @return Argument definition for the VCF file itself. Will not be null.
*/
private ArgumentDefinition createBCFArgumentDefinition() {
return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
boolean.class,
FORCE_BCF,
FORCE_BCF,
"force BCF output, regardless of the file's extension",
false,
true,
false,
true,
null,
null,
null,
null );
}
/** /**
* Returns true if the file will be compressed. * Returns true if the file will be compressed.
* @param writerFileName Name of the file * @param writerFileName Name of the file

View File

@ -79,7 +79,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/** /**
* Should we emit a compressed output stream? * Should we emit a compressed output stream?
*/ */
private final boolean isCompressed; private boolean isCompressed = false;
/** /**
* A hack: push the argument sources into the VCF header so that the VCF header * A hack: push the argument sources into the VCF header so that the VCF header
@ -90,12 +90,17 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
/** /**
* Should the header be written out? A hidden argument. * Should the header be written out? A hidden argument.
*/ */
private final boolean skipWritingCommandLineHeader; private boolean skipWritingCommandLineHeader = false;
/** /**
* Should we not write genotypes even when provided? * Should we not write genotypes even when provided?
*/ */
private final boolean doNotWriteGenotypes; private boolean doNotWriteGenotypes = false;
/**
* Should we force BCF writing regardless of the file extension?
*/
private boolean forceBCF = false;
/** /**
* Connects this stub with an external stream capable of serving the * Connects this stub with an external stream capable of serving the
@ -108,19 +113,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* *
* @param engine engine. * @param engine engine.
* @param genotypeFile file to (ultimately) create. * @param genotypeFile file to (ultimately) create.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources. * @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/ */
public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { public VariantContextWriterStub(GenomeAnalysisEngine engine, File genotypeFile, Collection<Object> argumentSources) {
this.engine = engine; this.engine = engine;
this.genotypeFile = genotypeFile; this.genotypeFile = genotypeFile;
this.genotypeStream = null; this.genotypeStream = null;
this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -128,19 +127,13 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
* *
* @param engine engine. * @param engine engine.
* @param genotypeStream stream to (ultimately) write. * @param genotypeStream stream to (ultimately) write.
* @param isCompressed should we compress the output stream?
* @param argumentSources sources. * @param argumentSources sources.
* @param skipWritingCommandLineHeader skip writing header.
* @param doNotWriteGenotypes do not write genotypes.
*/ */
public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, boolean isCompressed, Collection<Object> argumentSources, boolean skipWritingCommandLineHeader, boolean doNotWriteGenotypes) { public VariantContextWriterStub(GenomeAnalysisEngine engine, OutputStream genotypeStream, Collection<Object> argumentSources) {
this.engine = engine; this.engine = engine;
this.genotypeFile = null; this.genotypeFile = null;
this.genotypeStream = new PrintStream(genotypeStream); this.genotypeStream = new PrintStream(genotypeStream);
this.isCompressed = isCompressed;
this.argumentSources = argumentSources; this.argumentSources = argumentSources;
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
this.doNotWriteGenotypes = doNotWriteGenotypes;
} }
/** /**
@ -167,6 +160,22 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
return isCompressed; return isCompressed;
} }
public void setCompressed(boolean compressed) {
isCompressed = compressed;
}
public void setSkipWritingCommandLineHeader(boolean skipWritingCommandLineHeader) {
this.skipWritingCommandLineHeader = skipWritingCommandLineHeader;
}
public void setDoNotWriteGenotypes(boolean doNotWriteGenotypes) {
this.doNotWriteGenotypes = doNotWriteGenotypes;
}
public void setForceBCF(boolean forceBCF) {
this.forceBCF = forceBCF;
}
/** /**
* Gets the master sequence dictionary from the engine associated with this stub * Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary * @link GenomeAnalysisEngine.getMasterSequenceDictionary
@ -187,7 +196,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER); if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY); if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
if ( getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile()) ) if ( forceBCF || (getFile() != null && VariantContextWriterFactory.isBCFOutput(getFile())) )
options.add(Options.FORCE_BCF); options.add(Options.FORCE_BCF);
return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options); return options.isEmpty() ? EnumSet.noneOf(Options.class) : EnumSet.copyOf(options);

View File

@ -85,18 +85,16 @@ public class FeatureManager {
private final PluginManager<FeatureCodec> pluginManager; private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>(); private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
private final VCFHeader headerForRepairs;
private final boolean lenientVCFProcessing; private final boolean lenientVCFProcessing;
/** /**
* Construct a FeatureManager without a master VCF header * Construct a FeatureManager without a master VCF header
*/ */
public FeatureManager() { public FeatureManager() {
this(null, false); this(false);
} }
public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) { public FeatureManager(final boolean lenientVCFProcessing) {
this.headerForRepairs = headerForRepairs;
this.lenientVCFProcessing = lenientVCFProcessing; this.lenientVCFProcessing = lenientVCFProcessing;
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec"); pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
@ -255,8 +253,6 @@ public class FeatureManager {
((NameAwareCodec)codex).setName(name); ((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec ) if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser); ((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
if ( codex instanceof VCFCodec )
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing ) if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
((AbstractVCFCodec)codex).disableOnTheFlyModifications(); ((AbstractVCFCodec)codex).disableOnTheFlyModifications();

View File

@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
* please talk through your approach with the SE team. * please talk through your approach with the SE team.
* @param dict Sequence dictionary to use. * @param dict Sequence dictionary to use.
* @param genomeLocParser Location parser to use. * @param genomeLocParser Location parser to use.
* @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification. * @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
*/ */
public RMDTrackBuilder(final SAMSequenceDictionary dict, public RMDTrackBuilder(final SAMSequenceDictionary dict,
final GenomeLocParser genomeLocParser, final GenomeLocParser genomeLocParser,
final VCFHeader headerForRepairs,
ValidationExclusion.TYPE validationExclusionType) { ValidationExclusion.TYPE validationExclusionType) {
this.dict = dict; this.dict = dict;
this.validationExclusionType = validationExclusionType; this.validationExclusionType = validationExclusionType;
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType)); this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
} }
/** /**
@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
return featureManager; return featureManager;
} }
/**
* Same as full constructor but makes one without a header for repairs
* @param dict
* @param genomeLocParser
* @param validationExclusionType
*/
public RMDTrackBuilder(final SAMSequenceDictionary dict,
final GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
this(dict, genomeLocParser, null, validationExclusionType);
}
/** /**
* create a RMDTrack of the specified type * create a RMDTrack of the specified type
* *

View File

@ -19,6 +19,8 @@ import java.util.*;
* it computes the AC from the genotypes themselves. If no AC can be computed, 0 is used. * it computes the AC from the genotypes themselves. If no AC can be computed, 0 is used.
*/ */
public class AlleleCount extends VariantStratifier { public class AlleleCount extends VariantStratifier {
int nchrom;
@Override @Override
public void initialize() { public void initialize() {
// we can only work with a single eval VCF, and it must have genotypes // we can only work with a single eval VCF, and it must have genotypes
@ -26,7 +28,8 @@ public class AlleleCount extends VariantStratifier {
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf"); throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification only works with a single eval vcf");
// There are 2 x n sample chromosomes for diploids // There are 2 x n sample chromosomes for diploids
int nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2; // TODO -- generalize to handle multiple ploidy
nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * 2;
if ( nchrom < 2 ) if ( nchrom < 2 )
throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample"); throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
@ -52,8 +55,10 @@ public class AlleleCount extends VariantStratifier {
} }
// make sure that the AC isn't invalid // make sure that the AC isn't invalid
if ( AC > eval.getCalledChrCount() ) if ( AC > nchrom )
throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d is larger than the possible called chromosome count (%d)", AC, eval.getChr(), eval.getStart(), eval.getCalledChrCount())); throw new UserException.MalformedVCF(String.format("The AC or MLEAC value (%d) at position %s:%d " +
"is larger than the number of chromosomes over all samples (%d)", AC,
eval.getChr(), eval.getStart(), nchrom));
return Collections.singletonList((Object) AC); return Collections.singletonList((Object) AC);
} else { } else {

View File

@ -51,7 +51,6 @@ import java.util.Map;
*/ */
public final class BCF2Codec implements FeatureCodec<VariantContext> { public final class BCF2Codec implements FeatureCodec<VariantContext> {
final protected static Logger logger = Logger.getLogger(BCF2Codec.class); final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
private final static boolean FORBID_SYMBOLICS = false;
private final static int ALLOWED_MAJOR_VERSION = 2; private final static int ALLOWED_MAJOR_VERSION = 2;
private final static int MIN_MINOR_VERSION = 1; private final static int MIN_MINOR_VERSION = 1;
@ -178,7 +177,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
contigNames.add(contig.getID()); contigNames.add(contig.getID());
} }
} else { } else {
throw new UserException.MalformedBCF2("Didn't find any contig lines in BCF2 file header"); error("Didn't find any contig lines in BCF2 file header");
} }
// create the string dictionary // create the string dictionary
@ -271,7 +270,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
final int nSamples = nFormatSamples & 0x00FFFFF; final int nSamples = nFormatSamples & 0x00FFFFF;
if ( header.getNGenotypeSamples() != nSamples ) if ( header.getNGenotypeSamples() != nSamples )
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " + error("GATK currently doesn't support reading BCF2 files with " +
"different numbers of samples per record. Saw " + header.getNGenotypeSamples() + "different numbers of samples per record. Saw " + header.getNGenotypeSamples() +
" samples in header but have a record with " + nSamples + " samples"); " samples in header but have a record with " + nSamples + " samples");
@ -343,9 +342,6 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
if ( isRef ) ref = alleleBases; if ( isRef ) ref = alleleBases;
alleles.add(allele); alleles.add(allele);
if ( FORBID_SYMBOLICS && allele.isSymbolic() )
throw new ReviewedStingException("LIMITATION: GATK BCF2 codec does not yet support symbolic alleles");
} }
assert ref != null; assert ref != null;
@ -496,7 +492,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
return gtFieldDecoders.getDecoder(field); return gtFieldDecoders.getDecoder(field);
} }
private final void error(final String message) throws RuntimeException { private void error(final String message) throws RuntimeException {
throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos)); throw new UserException.MalformedBCF2(String.format("%s, at record %d with position %d:", message, recordNo, pos));
} }
} }

View File

@ -49,13 +49,6 @@ public class VCFCodec extends AbstractVCFCodec {
// Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters. // Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4"; public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
/**
* A VCF header the contains master info/filter/format records that we use to 'fill in'
* any missing records from our input VCF header. This allows us to repair headers on
* the fly
*/
private VCFHeader headerForRepairs = null;
/** /**
* @param reader the line reader to take header lines from * @param reader the line reader to take header lines from
* @return the number of header lines * @return the number of header lines
@ -88,8 +81,6 @@ public class VCFCodec extends AbstractVCFCodec {
} }
headerStrings.add(line); headerStrings.add(line);
super.parseHeaderFromLines(headerStrings, version); super.parseHeaderFromLines(headerStrings, version);
if ( headerForRepairs != null )
this.header = repairHeader(this.header, headerForRepairs);
return this.header; return this.header;
} }
else { else {
@ -103,24 +94,6 @@ public class VCFCodec extends AbstractVCFCodec {
throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file"); throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
} }
private final VCFHeader repairHeader(final VCFHeader readHeader, final VCFHeader masterHeader) {
final Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(Arrays.asList(readHeader, masterHeader), log);
return new VCFHeader(lines, readHeader.getGenotypeSamples());
}
/**
* Tells this VCFCodec to repair the incoming header files with the information in masterHeader
*
* @param headerForRepairs
*/
public void setHeaderForRepairs(final VCFHeader headerForRepairs) {
if ( headerForRepairs != null )
log.info("Using master VCF header to repair missing files from incoming VCFs");
this.headerForRepairs = headerForRepairs;
}
/** /**
* parse the filter string, first checking to see if we already have parsed it in a previous attempt * parse the filter string, first checking to see if we already have parsed it in a previous attempt
* *

View File

@ -563,6 +563,6 @@ class VCFWriter extends IndexingVariantContextWriter {
+ " at " + vc.getChr() + ":" + vc.getStart() + " at " + vc.getChr() + ":" + vc.getStart()
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have" + " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
+ " complete VCF headers by default. This error can be disabled with the engine argument" + " complete VCF headers by default. This error can be disabled with the engine argument"
+ " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader"); + " -U LENIENT_VCF_PROCESSING");
} }
} }

View File

@ -76,7 +76,7 @@ public class VariantRecalibrationWalkersIntegrationTest extends WalkerTest {
VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf", VRTest bcfTest = new VRTest(privateTestDir + "vqsr.bcf_test.snps.unfiltered.bcf",
"a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches "a8ce3cd3dccafdf7d580bcce7d660a9a", // tranches
"1cdf8c9ee77d91d1ba7f002573108bad", // recal file "74c10fc15f9739a938b7138909fbde04", // recal file
"62fda105e14b619a1c263855cf56af1d"); // cut VCF "62fda105e14b619a1c263855cf56af1d"); // cut VCF
@DataProvider(name = "VRBCFTest") @DataProvider(name = "VRBCFTest")

View File

@ -92,7 +92,7 @@ public class VCFIntegrationTest extends WalkerTest {
// //
// //
// Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working // Tests to ensure that -U LENIENT_VCF_PROCESS
// //
// //
@ -106,11 +106,6 @@ public class VCFIntegrationTest extends WalkerTest {
runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true); runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true);
} }
@Test
public void testPassingOnVCFWithoutHeadersRepairingHeaders() {
runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false);
}
private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) { private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) {
final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf"; final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf";
final String baseCommand = "-R " + b37KGReference final String baseCommand = "-R " + b37KGReference