GSA-485: Remove repairVCFHeader from GATK codebase
-- Removed half-a*ssed attempt to automatically repair VCF files with bad headers, which allowed users to provide a replacement header overwriting the file's actually header on the fly. Not a good idea, really. Eric has promised to create a utility that walks through a VCF file and creates a meaningful header field based on the file's contents (if this ever becomes a priority)
This commit is contained in:
parent
52bfe8db8a
commit
4e42988c66
|
|
@ -849,20 +849,9 @@ public class GenomeAnalysisEngine {
|
|||
SAMSequenceDictionary sequenceDictionary,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
VCFHeader header = null;
|
||||
if ( getArguments().repairVCFHeader != null ) {
|
||||
try {
|
||||
final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(getArguments().repairVCFHeader));
|
||||
header = (VCFHeader)new VCFCodec().readHeader(pbs).getHeaderValue();
|
||||
pbs.close();
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(getArguments().repairVCFHeader, e);
|
||||
}
|
||||
}
|
||||
final RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, validationExclusionType);
|
||||
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser, header, validationExclusionType);
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
final List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
dataSources.add(new ReferenceOrderedDataSource(fileDescriptor,
|
||||
builder,
|
||||
|
|
|
|||
|
|
@ -384,14 +384,5 @@ public class GATKArgumentCollection {
|
|||
@Hidden
|
||||
public boolean USE_SLOW_GENOTYPES = false;
|
||||
// TODO -- remove all code tagged with TODO -- remove me when argument generateShadowBCF is removed
|
||||
|
||||
/**
|
||||
* The file pointed to by this argument must be a VCF file. The GATK will read in just the header of this file
|
||||
* and then use the INFO, FORMAT, and FILTER field values from this file to repair the header file of any other
|
||||
* VCF file that GATK reads in. This allows us to have in effect a master set of header records and use these
|
||||
* to fill in any missing ones in input VCF files.
|
||||
*/
|
||||
@Argument(fullName="repairVCFHeader", shortName = "repairVCFHeader", doc="If provided, whenever we read a VCF file we will use the header in this file to repair the header of the input VCF files", required=false)
|
||||
public File repairVCFHeader = null;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -85,18 +85,16 @@ public class FeatureManager {
|
|||
|
||||
private final PluginManager<FeatureCodec> pluginManager;
|
||||
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
|
||||
private final VCFHeader headerForRepairs;
|
||||
private final boolean lenientVCFProcessing;
|
||||
|
||||
/**
|
||||
* Construct a FeatureManager without a master VCF header
|
||||
*/
|
||||
public FeatureManager() {
|
||||
this(null, false);
|
||||
this(false);
|
||||
}
|
||||
|
||||
public FeatureManager(final VCFHeader headerForRepairs, final boolean lenientVCFProcessing) {
|
||||
this.headerForRepairs = headerForRepairs;
|
||||
public FeatureManager(final boolean lenientVCFProcessing) {
|
||||
this.lenientVCFProcessing = lenientVCFProcessing;
|
||||
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
|
||||
|
||||
|
|
@ -255,8 +253,6 @@ public class FeatureManager {
|
|||
((NameAwareCodec)codex).setName(name);
|
||||
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
if ( codex instanceof VCFCodec )
|
||||
((VCFCodec)codex).setHeaderForRepairs(headerForRepairs);
|
||||
if ( codex instanceof AbstractVCFCodec && lenientVCFProcessing )
|
||||
((AbstractVCFCodec)codex).disableOnTheFlyModifications();
|
||||
|
||||
|
|
|
|||
|
|
@ -89,17 +89,15 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
* please talk through your approach with the SE team.
|
||||
* @param dict Sequence dictionary to use.
|
||||
* @param genomeLocParser Location parser to use.
|
||||
* @param headerForRepairs a VCF header that should be used to repair VCF headers. Can be null
|
||||
* @param validationExclusionType Types of validations to exclude, for sequence dictionary verification.
|
||||
*/
|
||||
public RMDTrackBuilder(final SAMSequenceDictionary dict,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
final VCFHeader headerForRepairs,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
this.dict = dict;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.featureManager = new FeatureManager(headerForRepairs, GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
|
||||
this.featureManager = new FeatureManager(GenomeAnalysisEngine.lenientVCFProcessing(validationExclusionType));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -111,18 +109,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
return featureManager;
|
||||
}
|
||||
|
||||
/**
|
||||
* Same as full constructor but makes one without a header for repairs
|
||||
* @param dict
|
||||
* @param genomeLocParser
|
||||
* @param validationExclusionType
|
||||
*/
|
||||
public RMDTrackBuilder(final SAMSequenceDictionary dict,
|
||||
final GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
this(dict, genomeLocParser, null, validationExclusionType);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a RMDTrack of the specified type
|
||||
*
|
||||
|
|
|
|||
|
|
@ -49,13 +49,6 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
// Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
|
||||
public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
|
||||
|
||||
/**
|
||||
* A VCF header the contains master info/filter/format records that we use to 'fill in'
|
||||
* any missing records from our input VCF header. This allows us to repair headers on
|
||||
* the fly
|
||||
*/
|
||||
private VCFHeader headerForRepairs = null;
|
||||
|
||||
/**
|
||||
* @param reader the line reader to take header lines from
|
||||
* @return the number of header lines
|
||||
|
|
@ -88,8 +81,6 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
}
|
||||
headerStrings.add(line);
|
||||
super.parseHeaderFromLines(headerStrings, version);
|
||||
if ( headerForRepairs != null )
|
||||
this.header = repairHeader(this.header, headerForRepairs);
|
||||
return this.header;
|
||||
}
|
||||
else {
|
||||
|
|
@ -103,24 +94,6 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
|
||||
}
|
||||
|
||||
private final VCFHeader repairHeader(final VCFHeader readHeader, final VCFHeader masterHeader) {
|
||||
final Set<VCFHeaderLine> lines = VCFUtils.smartMergeHeaders(Arrays.asList(readHeader, masterHeader), log);
|
||||
return new VCFHeader(lines, readHeader.getGenotypeSamples());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this VCFCodec to repair the incoming header files with the information in masterHeader
|
||||
*
|
||||
* @param headerForRepairs
|
||||
*/
|
||||
public void setHeaderForRepairs(final VCFHeader headerForRepairs) {
|
||||
if ( headerForRepairs != null )
|
||||
log.info("Using master VCF header to repair missing files from incoming VCFs");
|
||||
this.headerForRepairs = headerForRepairs;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* parse the filter string, first checking to see if we already have parsed it in a previous attempt
|
||||
*
|
||||
|
|
|
|||
|
|
@ -563,6 +563,6 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
+ " at " + vc.getChr() + ":" + vc.getStart()
|
||||
+ " but this key isn't defined in the VCFHeader. The GATK now requires all VCFs to have"
|
||||
+ " complete VCF headers by default. This error can be disabled with the engine argument"
|
||||
+ " -U LENIENT_VCF_PROCESSING or repair the VCF file header using repairVCFHeader");
|
||||
+ " -U LENIENT_VCF_PROCESSING");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
|
||||
//
|
||||
//
|
||||
// Tests to ensure that -U LENIENT_VCF_PROCESS and header repairs are working
|
||||
// Tests to ensure that -U LENIENT_VCF_PROCESS
|
||||
//
|
||||
//
|
||||
|
||||
|
|
@ -106,11 +106,6 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
runVCFWithoutHeaders("-U LENIENT_VCF_PROCESSING", "6de8cb7457154dd355aa55befb943f88", null, true);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPassingOnVCFWithoutHeadersRepairingHeaders() {
|
||||
runVCFWithoutHeaders("-repairVCFHeader " + privateTestDir + "vcfexample2.justHeader.vcf", "ff61e9cad6653c7f93d82d391f7ecdcb", null, false);
|
||||
}
|
||||
|
||||
private void runVCFWithoutHeaders(final String moreArgs, final String expectedMD5, final Class expectedException, final boolean disableBCF) {
|
||||
final String testVCF = privateTestDir + "vcfexample2.noHeader.vcf";
|
||||
final String baseCommand = "-R " + b37KGReference
|
||||
|
|
|
|||
Loading…
Reference in New Issue