More BCF2 improvements
-- Refactored setting of contigs from VCFWriterStub to VCFUtils. Necessary for proper BCF working -- Added VCFContigHeaderLine that manages the order for sorting, so we now emit contigs in the proper order. -- Cleaned up VCFHeader operations -- BCF now uses the right header files correctly when encoding / decoding contigs -- Clean up unused tools -- Refactored header parsing routines to make them more accessible -- More minor header changes from Intellij
This commit is contained in:
parent
7541b523e1
commit
3afbc50511
|
|
@ -25,13 +25,13 @@
|
|||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFWriter;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -210,12 +210,7 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
}
|
||||
|
||||
// also put in the reference contig header lines
|
||||
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
|
||||
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
|
||||
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
|
||||
|
||||
vcfHeader.addMetaDataLine(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
|
||||
vcfHeader = VCFUtils.withUpdatedContigs(vcfHeader, engine);
|
||||
}
|
||||
|
||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||
|
|
@ -252,27 +247,4 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
|
||||
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
|
||||
}
|
||||
|
||||
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
|
||||
String val;
|
||||
if ( assembly != null )
|
||||
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
|
||||
else
|
||||
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
|
||||
return new VCFHeaderLine(VCFHeader.CONTIG_KEY, val);
|
||||
}
|
||||
|
||||
private String getReferenceAssembly(String refPath) {
|
||||
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
|
||||
String assembly = null;
|
||||
if (refPath.contains("b37") || refPath.contains("v37"))
|
||||
assembly = "b37";
|
||||
else if (refPath.contains("b36"))
|
||||
assembly = "b36";
|
||||
else if (refPath.contains("hg18"))
|
||||
assembly = "hg18";
|
||||
else if (refPath.contains("hg19"))
|
||||
assembly = "hg19";
|
||||
return assembly;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,8 +29,6 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
protected final static Logger log = Logger.getLogger(VCFCodec.class);
|
||||
protected final static int NUM_STANDARD_FIELDS = 8; // INFO is the 8th column
|
||||
|
||||
protected VCFHeaderVersion version;
|
||||
|
||||
// we have to store the list of strings that make up the header until they're needed
|
||||
protected VCFHeader header = null;
|
||||
|
||||
|
|
@ -122,16 +120,13 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
/**
|
||||
* create a VCF header
|
||||
* @param headerStrings a list of strings that represent all the ## entries
|
||||
* @param line the single # line (column names)
|
||||
* @return the count of header lines
|
||||
* @param headerStrings a list of strings that represent all the ## and # entries
|
||||
* @return a VCFHeader object
|
||||
*/
|
||||
protected Object createHeader(List<String> headerStrings, String line) {
|
||||
|
||||
headerStrings.add(line);
|
||||
|
||||
public static VCFHeader parseHeader(final List<String> headerStrings, final VCFHeaderVersion version) {
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
Set<String> sampleNames = new LinkedHashSet<String>();
|
||||
int contigCounter = 0;
|
||||
// iterate over all the passed in strings
|
||||
for ( String str : headerStrings ) {
|
||||
if ( !str.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
||||
|
|
@ -166,19 +161,16 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
|
||||
} else {
|
||||
if ( str.startsWith(VCFConstants.INFO_HEADER_START) ) {
|
||||
final VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7),version);
|
||||
final VCFInfoHeaderLine info = new VCFInfoHeaderLine(str.substring(7), version);
|
||||
metaData.add(info);
|
||||
infoFields.put(info.getID(), info.getType());
|
||||
} else if ( str.startsWith(VCFConstants.FILTER_HEADER_START) ) {
|
||||
final VCFFilterHeaderLine filter = new VCFFilterHeaderLine(str.substring(9), version);
|
||||
metaData.add(filter);
|
||||
filterFields.add(filter.getID());
|
||||
} else if ( str.startsWith(VCFConstants.FORMAT_HEADER_START) ) {
|
||||
final VCFFormatHeaderLine format = new VCFFormatHeaderLine(str.substring(9), version);
|
||||
metaData.add(format);
|
||||
formatFields.put(format.getID(), format.getType());
|
||||
} else if ( str.startsWith(VCFConstants.CONTIG_HEADER_START) ) {
|
||||
final VCFSimpleHeaderLine contig = new VCFSimpleHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), null);
|
||||
final VCFContigHeaderLine contig = new VCFContigHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), contigCounter++);
|
||||
metaData.add(contig);
|
||||
} else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) {
|
||||
final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description"));
|
||||
|
|
@ -191,8 +183,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
|
|||
}
|
||||
}
|
||||
|
||||
header = new VCFHeader(metaData, sampleNames);
|
||||
header.buildVCFReaderMaps(new ArrayList<String>(sampleNames));
|
||||
return new VCFHeader(metaData, sampleNames);
|
||||
}
|
||||
|
||||
protected VCFHeader createAndSetVCFHeader(final List<String> headerStrings, final String line, final VCFHeaderVersion version) {
|
||||
headerStrings.add(line);
|
||||
header = parseHeader(headerStrings, version);
|
||||
return header;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,8 @@ import java.util.*;
|
|||
* this class writes VCF files
|
||||
*/
|
||||
public class StandardVCFWriter extends IndexingVCFWriter {
|
||||
private final static String VERSION_LINE = VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString();
|
||||
|
||||
// the print stream we're writing to
|
||||
final protected BufferedWriter mWriter;
|
||||
|
||||
|
|
@ -87,50 +89,62 @@ public class StandardVCFWriter extends IndexingVCFWriter {
|
|||
|
||||
@Override
|
||||
public void writeHeader(VCFHeader header) {
|
||||
mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||
mHeader = header;
|
||||
writeHeader(mHeader, mWriter, doNotWriteGenotypes, VERSION_LINE, getStreamName());
|
||||
|
||||
// determine if we use filters, so we should FORCE pass the records
|
||||
// TODO -- this might not be necessary any longer as we have unfiltered, filtered, and PASS VCs
|
||||
for ( final VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFFilterHeaderLine)
|
||||
filtersWereAppliedToContext = true;
|
||||
}
|
||||
}
|
||||
|
||||
public static void writeHeader(VCFHeader header,
|
||||
final Writer writer,
|
||||
final boolean doNotWriteGenotypes,
|
||||
final String versionLine,
|
||||
final String streamNameForError) {
|
||||
header = doNotWriteGenotypes ? new VCFHeader(header.getMetaData()) : header;
|
||||
|
||||
try {
|
||||
// the file format field needs to be written first
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString() + "\n");
|
||||
writer.write(versionLine + "\n");
|
||||
|
||||
for ( VCFHeaderLine line : mHeader.getMetaData() ) {
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( VCFHeaderVersion.isFormatString(line.getKey()) )
|
||||
continue;
|
||||
|
||||
// are the records filtered (so we know what to put in the FILTER column of passing records) ?
|
||||
if ( line instanceof VCFFilterHeaderLine)
|
||||
filtersWereAppliedToContext = true;
|
||||
|
||||
mWriter.write(VCFHeader.METADATA_INDICATOR);
|
||||
mWriter.write(line.toString());
|
||||
mWriter.write("\n");
|
||||
writer.write(VCFHeader.METADATA_INDICATOR);
|
||||
writer.write(line.toString());
|
||||
writer.write("\n");
|
||||
}
|
||||
|
||||
// write out the column line
|
||||
mWriter.write(VCFHeader.HEADER_INDICATOR);
|
||||
writer.write(VCFHeader.HEADER_INDICATOR);
|
||||
boolean isFirst = true;
|
||||
for ( VCFHeader.HEADER_FIELDS field : mHeader.getHeaderFields() ) {
|
||||
for ( VCFHeader.HEADER_FIELDS field : header.getHeaderFields() ) {
|
||||
if ( isFirst )
|
||||
isFirst = false; // don't write out a field separator
|
||||
else
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write(field.toString());
|
||||
writer.write(VCFConstants.FIELD_SEPARATOR);
|
||||
writer.write(field.toString());
|
||||
}
|
||||
|
||||
if ( mHeader.hasGenotypingData() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write("FORMAT");
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
mWriter.write(sample);
|
||||
if ( header.hasGenotypingData() ) {
|
||||
writer.write(VCFConstants.FIELD_SEPARATOR);
|
||||
writer.write("FORMAT");
|
||||
for ( String sample : header.getGenotypeSamples() ) {
|
||||
writer.write(VCFConstants.FIELD_SEPARATOR);
|
||||
writer.write(sample);
|
||||
}
|
||||
}
|
||||
|
||||
mWriter.write("\n");
|
||||
mWriter.flush(); // necessary so that writing to an output stream will work
|
||||
writer.write("\n");
|
||||
writer.flush(); // necessary so that writing to an output stream will work
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new ReviewedStingException("IOException writing the VCF header to " + getStreamName(), e);
|
||||
throw new ReviewedStingException("IOException writing the VCF header to " + streamNameForError, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
List<String> headerStrings = new ArrayList<String>();
|
||||
|
||||
String line;
|
||||
VCFHeaderVersion version = null;
|
||||
try {
|
||||
boolean foundHeaderVersion = false;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
|
|
@ -57,7 +58,7 @@ public class VCF3Codec extends AbstractVCFCodec {
|
|||
if (!foundHeaderVersion) {
|
||||
throw new TribbleException.InvalidHeader("We never saw a header line specifying VCF version");
|
||||
}
|
||||
return createHeader(headerStrings, line);
|
||||
return createAndSetVCFHeader(headerStrings, line, version);
|
||||
}
|
||||
else {
|
||||
throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ import java.util.*;
|
|||
*/
|
||||
public class VCFCodec extends AbstractVCFCodec {
|
||||
// Our aim is to read in the records and convert to VariantContext as quickly as possible, relying on VariantContext to do the validation of any contradictory (or malformed) record parameters.
|
||||
|
||||
public final static String VCF4_MAGIC_HEADER = "##fileformat=VCFv4";
|
||||
private VCFHeaderVersion version = null;
|
||||
|
||||
/**
|
||||
* @param reader the line reader to take header lines from
|
||||
|
|
@ -80,7 +80,7 @@ public class VCFCodec extends AbstractVCFCodec {
|
|||
if (!foundHeaderVersion) {
|
||||
throw new TribbleException.InvalidHeader("We never saw a header line specifying VCF version");
|
||||
}
|
||||
return createHeader(headerStrings, line);
|
||||
return createAndSetVCFHeader(headerStrings, line, version);
|
||||
}
|
||||
else {
|
||||
throw new TribbleException.InvalidHeader("We never saw the required CHROM header line (starting with one #) for the input VCF file");
|
||||
|
|
|
|||
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* A special class representing a contig VCF header line. Nows the true contig order and sorts on that
|
||||
*
|
||||
* @author mdepristo
|
||||
*/
|
||||
public class VCFContigHeaderLine extends VCFSimpleHeaderLine {
|
||||
final Integer contigIndex;
|
||||
|
||||
|
||||
/**
|
||||
* create a VCF contig header line
|
||||
*
|
||||
* @param line the header line
|
||||
* @param version the vcf header version
|
||||
* @param key the key for this header line
|
||||
*/
|
||||
public VCFContigHeaderLine(final String line, final VCFHeaderVersion version, final String key, int contigIndex) {
|
||||
super(line, version, key, null);
|
||||
this.contigIndex = contigIndex;
|
||||
}
|
||||
|
||||
public VCFContigHeaderLine(final String key, final Map<String, String> mapping, int contigIndex) {
|
||||
super(key, mapping, null);
|
||||
this.contigIndex = contigIndex;
|
||||
}
|
||||
|
||||
public Integer getContigIndex() {
|
||||
return contigIndex;
|
||||
}
|
||||
|
||||
/**
|
||||
* IT IS CRITIAL THAT THIS BE OVERRIDDEN SO WE SORT THE CONTIGS IN THE CORRECT ORDER
|
||||
*
|
||||
* @param other
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(final Object other) {
|
||||
if ( other instanceof VCFContigHeaderLine )
|
||||
return contigIndex.compareTo(((VCFContigHeaderLine) other).contigIndex);
|
||||
else {
|
||||
return super.compareTo(other);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -44,10 +44,11 @@ public class VCFHeader {
|
|||
}
|
||||
|
||||
// the associated meta data
|
||||
private final Set<VCFHeaderLine> mMetaData;
|
||||
private final Set<VCFHeaderLine> mMetaData = new TreeSet<VCFHeaderLine>();
|
||||
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
|
||||
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
|
||||
private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
|
||||
private final List<VCFContigHeaderLine> contigMetaData = new ArrayList<VCFContigHeaderLine>();
|
||||
|
||||
// the list of auxillary tags
|
||||
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||
|
|
@ -79,7 +80,7 @@ public class VCFHeader {
|
|||
* @param metaData the meta data associated with this header
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>(metaData);
|
||||
mMetaData.addAll(metaData);
|
||||
loadVCFVersion();
|
||||
loadMetaDataMaps();
|
||||
}
|
||||
|
|
@ -91,16 +92,10 @@ public class VCFHeader {
|
|||
* @param genotypeSampleNames the sample names
|
||||
*/
|
||||
public VCFHeader(Set<VCFHeaderLine> metaData, Set<String> genotypeSampleNames) {
|
||||
mMetaData = new TreeSet<VCFHeaderLine>();
|
||||
if ( metaData != null )
|
||||
mMetaData.addAll(metaData);
|
||||
|
||||
this(metaData);
|
||||
mGenotypeSampleNames.addAll(genotypeSampleNames);
|
||||
|
||||
loadVCFVersion();
|
||||
loadMetaDataMaps();
|
||||
|
||||
samplesWereAlreadySorted = ParsingUtils.isSorted(genotypeSampleNames);
|
||||
buildVCFReaderMaps(genotypeSampleNames);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -109,10 +104,9 @@ public class VCFHeader {
|
|||
* using this header (i.e., read by the VCFCodec) will have genotypes
|
||||
* occurring in the same order
|
||||
*
|
||||
* @param genotypeSampleNamesInAppearenceOrder genotype sample names
|
||||
* @param genotypeSampleNamesInAppearenceOrder genotype sample names, must iterator in order of appearence
|
||||
*/
|
||||
|
||||
public void buildVCFReaderMaps(List<String> genotypeSampleNamesInAppearenceOrder) {
|
||||
private void buildVCFReaderMaps(Collection<String> genotypeSampleNamesInAppearenceOrder) {
|
||||
sampleNamesInOrder = new ArrayList<String>(genotypeSampleNamesInAppearenceOrder.size());
|
||||
sampleNameToOffset = new HashMap<String, Integer>(genotypeSampleNamesInAppearenceOrder.size());
|
||||
|
||||
|
|
@ -131,7 +125,14 @@ public class VCFHeader {
|
|||
* @param headerLine Line to add to the existing metadata component.
|
||||
*/
|
||||
public void addMetaDataLine(VCFHeaderLine headerLine) {
|
||||
mMetaData.add(headerLine);
|
||||
mMetaData.add(headerLine);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return all of the VCF header lines of the ##contig form in order, or an empty set if none were present
|
||||
*/
|
||||
public List<VCFContigHeaderLine> getContigLines() {
|
||||
return Collections.unmodifiableList(contigMetaData);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -157,12 +158,12 @@ public class VCFHeader {
|
|||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
VCFInfoHeaderLine infoLine = (VCFInfoHeaderLine)line;
|
||||
mInfoMetaData.put(infoLine.getID(), infoLine);
|
||||
}
|
||||
else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
} else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
|
||||
mFormatMetaData.put(formatLine.getID(), formatLine);
|
||||
}
|
||||
else {
|
||||
} else if ( line instanceof VCFContigHeaderLine ) {
|
||||
contigMetaData.add((VCFContigHeaderLine)line);
|
||||
} else {
|
||||
mOtherMetaData.put(line.getKey(), line);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,18 +14,6 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin
|
|||
private String name;
|
||||
private Map<String, String> genericFields = new LinkedHashMap<String, String>();
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
* @param key the key for this header line
|
||||
* @param name the name for this header line
|
||||
* @param genericFields other fields for this header line
|
||||
*/
|
||||
public VCFSimpleHeaderLine(String key, String name, Map<String, String> genericFields) {
|
||||
super(key, "");
|
||||
initialize(name, genericFields);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a VCF filter header line
|
||||
*
|
||||
|
|
@ -48,9 +36,12 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin
|
|||
* @param key the key for this header line
|
||||
* @param expectedTagOrdering the tag ordering expected for this header line
|
||||
*/
|
||||
public VCFSimpleHeaderLine(String line, VCFHeaderVersion version, String key, List<String> expectedTagOrdering) {
|
||||
public VCFSimpleHeaderLine(final String line, final VCFHeaderVersion version, final String key, final List<String> expectedTagOrdering) {
|
||||
this(key, VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering), expectedTagOrdering);
|
||||
}
|
||||
|
||||
public VCFSimpleHeaderLine(final String key, final Map<String, String> mapping, final List<String> expectedTagOrdering) {
|
||||
super(key, "");
|
||||
Map<String, String> mapping = VCFHeaderLineTranslator.parseLine(version, line, expectedTagOrdering);
|
||||
name = mapping.get("ID");
|
||||
initialize(name, mapping);
|
||||
}
|
||||
|
|
@ -87,8 +78,4 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin
|
|||
public String getID() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public Map<String, String> getGenericFields() {
|
||||
return genericFields;
|
||||
}
|
||||
}
|
||||
|
|
@ -25,6 +25,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.vcf;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
|
|
@ -32,6 +34,7 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -225,4 +228,62 @@ public class VCFUtils {
|
|||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add / replace the contig header lines in the VCFHeader with the information in the GATK engine
|
||||
*
|
||||
* @param header the header to update
|
||||
* @param engine the GATK engine containing command line arguments and the master sequence dictionary
|
||||
*/
|
||||
public final static VCFHeader withUpdatedContigs(final VCFHeader header, final GenomeAnalysisEngine engine) {
|
||||
return VCFUtils.withUpdatedContigs(header, engine.getArguments().referenceFile, engine.getMasterSequenceDictionary());
|
||||
}
|
||||
|
||||
/**
|
||||
* Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary
|
||||
*
|
||||
* @param oldHeader the header to update
|
||||
* @param referenceFile the file path to the reference sequence used to generate this vcf
|
||||
* @param refDict the SAM formatted reference sequence dictionary
|
||||
*/
|
||||
public final static VCFHeader withUpdatedContigs(final VCFHeader oldHeader, final File referenceFile, final SAMSequenceDictionary refDict) {
|
||||
final Set<VCFHeaderLine> lines = new LinkedHashSet<VCFHeaderLine>(oldHeader.getMetaData().size());
|
||||
|
||||
for ( final VCFHeaderLine line : oldHeader.getMetaData() ) {
|
||||
if ( line instanceof VCFContigHeaderLine )
|
||||
continue; // skip old contig lines
|
||||
if ( line.getKey().equals(VCFHeader.REFERENCE_KEY) )
|
||||
continue; // skip the old reference key
|
||||
lines.add(line);
|
||||
}
|
||||
|
||||
final String assembly = getReferenceAssembly(referenceFile.getName());
|
||||
for ( SAMSequenceRecord contig : refDict.getSequences() )
|
||||
lines.add(getContigHeaderLine(contig, assembly));
|
||||
|
||||
lines.add(new VCFHeaderLine(VCFHeader.REFERENCE_KEY, "file://" + referenceFile.getAbsolutePath()));
|
||||
return new VCFHeader(lines, oldHeader.getGenotypeSamples());
|
||||
}
|
||||
|
||||
private final static VCFHeaderLine getContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
|
||||
final Map<String, String> map = new LinkedHashMap<String, String>(3);
|
||||
map.put("ID", contig.getSequenceName());
|
||||
map.put("length", String.valueOf(contig.getSequenceLength()));
|
||||
if ( assembly != null ) map.put("assembly", assembly);
|
||||
return new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, map, contig.getSequenceIndex());
|
||||
}
|
||||
|
||||
private final static String getReferenceAssembly(final String refPath) {
|
||||
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
|
||||
String assembly = null;
|
||||
if (refPath.contains("b37") || refPath.contains("v37"))
|
||||
assembly = "b37";
|
||||
else if (refPath.contains("b36"))
|
||||
assembly = "b36";
|
||||
else if (refPath.contains("hg18"))
|
||||
assembly = "hg18";
|
||||
else if (refPath.contains("hg19"))
|
||||
assembly = "hg19";
|
||||
return assembly;
|
||||
}
|
||||
}
|
||||
|
|
@ -32,9 +32,8 @@ import org.apache.log4j.Logger;
|
|||
import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.AbstractVCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue