VCF headers now include 'contig' lines describing the name, length, and assembly (when easily parsable) for each contig in the reference.

This commit is contained in:
Eric Banks 2011-08-09 11:24:48 -04:00
parent 7afb5c9f1c
commit ec76bf6d4a
1 changed files with 38 additions and 7 deletions

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.io.stubs;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -177,14 +178,21 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
vcfHeader = header;
// Check for the command-line argument header line. If not present, add it in.
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
boolean foundCommandLineHeaderLine = false;
for(VCFHeaderLine line: vcfHeader.getMetaData()) {
if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
foundCommandLineHeaderLine = true;
if ( !skipWritingHeader ) {
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
boolean foundCommandLineHeaderLine = false;
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
foundCommandLineHeaderLine = true;
}
if ( !foundCommandLineHeaderLine )
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
// also put in the reference contig header lines
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
}
if(!foundCommandLineHeaderLine && !skipWritingHeader)
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
outputTracker.getStorage(this).writeHeader(vcfHeader);
}
@ -220,4 +228,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
}
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
String val;
if ( assembly != null )
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
else
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
return new VCFHeaderLine("contig", val);
}
private String getReferenceAssembly(String refPath) {
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
String assembly = null;
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
assembly = "b37";
else if ( refPath.indexOf("b36") != -1 )
assembly = "b36";
else if ( refPath.indexOf("hg18") != -1 )
assembly = "hg18";
else if ( refPath.indexOf("hg19") != -1 )
assembly = "hg19";
return assembly;
}
}