VCF headers now include 'contig' lines describing the name, length, and assembly (when easily parsable) for each contig in the reference.
This commit is contained in:
parent
7afb5c9f1c
commit
ec76bf6d4a
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
|
|
@ -177,14 +178,21 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
vcfHeader = header;
|
||||
|
||||
// Check for the command-line argument header line. If not present, add it in.
|
||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||
boolean foundCommandLineHeaderLine = false;
|
||||
for(VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||
if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
|
||||
foundCommandLineHeaderLine = true;
|
||||
if ( !skipWritingHeader ) {
|
||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||
boolean foundCommandLineHeaderLine = false;
|
||||
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
|
||||
foundCommandLineHeaderLine = true;
|
||||
}
|
||||
if ( !foundCommandLineHeaderLine )
|
||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
|
||||
// also put in the reference contig header lines
|
||||
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
|
||||
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
|
||||
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
|
||||
}
|
||||
if(!foundCommandLineHeaderLine && !skipWritingHeader)
|
||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
|
||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
@ -220,4 +228,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
|
||||
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
|
||||
}
|
||||
|
||||
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
|
||||
String val;
|
||||
if ( assembly != null )
|
||||
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
|
||||
else
|
||||
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
|
||||
return new VCFHeaderLine("contig", val);
|
||||
}
|
||||
|
||||
private String getReferenceAssembly(String refPath) {
|
||||
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
|
||||
String assembly = null;
|
||||
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
|
||||
assembly = "b37";
|
||||
else if ( refPath.indexOf("b36") != -1 )
|
||||
assembly = "b36";
|
||||
else if ( refPath.indexOf("hg18") != -1 )
|
||||
assembly = "hg18";
|
||||
else if ( refPath.indexOf("hg19") != -1 )
|
||||
assembly = "hg19";
|
||||
return assembly;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue