From ec76bf6d4ac1e8887bfafc461cbb3bd538f4ece5 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 9 Aug 2011 11:24:48 -0400 Subject: [PATCH] VCF headers now include 'contig' lines describing the name, length, and assembly (when easily parsable) for each contig in the reference. --- .../sting/gatk/io/stubs/VCFWriterStub.java | 45 ++++++++++++++++--- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java index 7a110fde5..6ade8e78c 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java +++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.io.stubs; +import net.sf.samtools.SAMSequenceRecord; import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.io.OutputTracker; @@ -177,14 +178,21 @@ public class VCFWriterStub implements Stub, VCFWriter { vcfHeader = header; // Check for the command-line argument header line. If not present, add it in. - VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); - boolean foundCommandLineHeaderLine = false; - for(VCFHeaderLine line: vcfHeader.getMetaData()) { - if(line.getKey().equals(commandLineArgHeaderLine.getKey())) - foundCommandLineHeaderLine = true; + if ( !skipWritingHeader ) { + VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); + boolean foundCommandLineHeaderLine = false; + for (VCFHeaderLine line: vcfHeader.getMetaData()) { + if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) ) + foundCommandLineHeaderLine = true; + } + if ( !foundCommandLineHeaderLine ) + vcfHeader.addMetaDataLine(commandLineArgHeaderLine); + + // also put in the reference contig header lines + String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName()); + for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() ) + vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly)); } - if(!foundCommandLineHeaderLine && !skipWritingHeader) - vcfHeader.addMetaDataLine(commandLineArgHeaderLine); outputTracker.getStorage(this).writeHeader(vcfHeader); } @@ -220,4 +228,27 @@ public class VCFWriterStub implements Stub, VCFWriter { CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class); return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\""); } + + private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) { + String val; + if ( assembly != null ) + val = String.format("", contig.getSequenceName(), contig.getSequenceLength(), assembly); + else + val = String.format("", contig.getSequenceName(), contig.getSequenceLength()); + return new VCFHeaderLine("contig", val); + } + + private String getReferenceAssembly(String refPath) { + // This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot + String assembly = null; + if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 ) + assembly = "b37"; + else if ( refPath.indexOf("b36") != -1 ) + assembly = "b36"; + else if ( refPath.indexOf("hg18") != -1 ) + assembly = "hg18"; + else if ( refPath.indexOf("hg19") != -1 ) + assembly = "hg19"; + return assembly; + } } \ No newline at end of file