diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java index d6925c890..da3763f1a 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/GATKVCFUtils.java @@ -79,13 +79,15 @@ public class GATKVCFUtils { /** * Gets the appropriately formatted header for a VCF file describing this GATK run * + * @param header the existing VCFHeader that we will be adding this command line argument header line to. Existing + * command line argument header lines will be used to generate a unique header line key. * @param engine the GATK engine that holds the walker name, GATK version, and other information * @param argumentSources contains information on the argument values provided to the GATK for converting to a * command line string. Should be provided from the data in the parsing engine. Can be * empty in which case the command line will be the empty string. * @return VCF header line describing this run of the GATK. */ - public static VCFHeaderLine getCommandLineArgumentHeaderLine(final GenomeAnalysisEngine engine, final Collection argumentSources) { + public static VCFHeaderLine getCommandLineArgumentHeaderLine(final VCFHeader header, final GenomeAnalysisEngine engine, final Collection argumentSources) { if ( engine == null ) throw new IllegalArgumentException("engine cannot be null"); if ( argumentSources == null ) throw new IllegalArgumentException("argumentSources cannot be null"); @@ -96,7 +98,36 @@ public class GATKVCFUtils { attributes.put("Date", date.toString()); attributes.put("Epoch", Long.toString(date.getTime())); attributes.put("CommandLineOptions", engine.createApproximateCommandLineArgumentString(argumentSources.toArray())); - return new VCFSimpleHeaderLine(GATK_COMMAND_LINE_KEY, attributes); + + // in case the walker name contains space, remove any spaces + String key = getCommandLineKey(header, engine.getWalkerName().replaceAll("\\s", "")); + return new VCFSimpleHeaderLine(key, attributes); + } + + // create a unique command line argument header line key. This method will look for existing + // keys using the same walker name and append a count after it to make it unique. + private static String getCommandLineKey(final VCFHeader header, final String walkerName) { + final Iterator existingMetaDataIterator = header.getMetaDataInInputOrder().iterator(); + + // the command line argument keys are in the format GATK_COMMAND_LINE_KEY.(walker name) + final String searchKey = String.format("%s.%s", GATK_COMMAND_LINE_KEY, walkerName); + + int commandLineKeyCount = 0; + VCFHeaderLine line; + while ( existingMetaDataIterator.hasNext() ) { + line = existingMetaDataIterator.next(); + // if we find another key that starts with the same text as the walker + if ( line.getKey().startsWith(searchKey) ) + commandLineKeyCount++; + } + + // if there are no existing keys with this same walker name, then just return the + // GATK_COMMAND_LINE_KEY.(walker name) format + if ( commandLineKeyCount == 0 ) + return searchKey; + // otherwise append the count associated with this new command (existing + 1) + else + return String.format("%s.%d", searchKey, commandLineKeyCount+1); } public static Map getVCFHeadersFromRods(GenomeAnalysisEngine toolkit, List> rodBindings) { diff --git a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java index 75832ec02..548bee887 100644 --- a/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java +++ b/public/gatk-engine/src/main/java/org/broadinstitute/gatk/engine/io/stubs/VariantContextWriterStub.java @@ -253,7 +253,7 @@ public class VariantContextWriterStub implements Stub, Var // skip writing the command line header if requested if ( ! skipWritingCommandLineHeader && header.isWriteCommandLine() ) { // Always add the header line, as the current format allows multiple entries - final VCFHeaderLine commandLineArgHeaderLine = GATKVCFUtils.getCommandLineArgumentHeaderLine(engine, argumentSources); + final VCFHeaderLine commandLineArgHeaderLine = GATKVCFUtils.getCommandLineArgumentHeaderLine(vcfHeader, engine, argumentSources); vcfHeader.addMetaDataLine(commandLineArgHeaderLine); } diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java index bae83b072..fb498412a 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/EngineFeaturesIntegrationTest.java @@ -232,7 +232,15 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); - final VCFHeaderLine versionLine = header.getMetaDataLine(GATKVCFUtils.GATK_COMMAND_LINE_KEY); + + // go through the metadata headers and look for ones that start with the GATK_COMMAND_LINE_KEY + VCFHeaderLine versionLine = null; + for ( final VCFHeaderLine headerLine : header.getMetaDataInInputOrder()) { + if(headerLine.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY)) { + versionLine = headerLine; + break; + } + } Assert.assertNotNull(versionLine); Assert.assertTrue(versionLine.toString().contains("TestPrintVariantsWalker")); } @@ -251,7 +259,7 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { boolean foundHC = false; boolean foundPV = false; for ( final VCFHeaderLine line : header.getMetaDataInInputOrder() ) { - if ( line.getKey().equals(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) { + if ( line.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) { if ( line.toString().contains("HaplotypeCaller") ) { Assert.assertFalse(foundHC); foundHC = true; @@ -267,6 +275,41 @@ public class EngineFeaturesIntegrationTest extends WalkerTest { Assert.assertTrue(foundPV, "Didn't find TestPrintVariantsWalker command line header field"); } + @Test(enabled = true) + public void testMultipleGATKVersionsSameWalkerInVCF() throws Exception { + WalkerTestSpec spec = new WalkerTestSpec("-T TestPrintVariantsWalker -R " + b37KGReference + + " -V " + privateTestDir + "gatkCommandLineExistsInHeader.vcf" + + " -o %s", + 1, Arrays.asList("")); + spec.disableShadowBCF(); + final File vcf = executeTest("testMultipleGATKVersionsSameWalkerInVCF", spec).first.get(0); + final VCFCodec codec = new VCFCodec(); + final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); + + boolean foundFirstWalker = false; + boolean foundSecondWalker = false; + for ( final VCFHeaderLine line : header.getMetaDataInInputOrder() ) { + if ( line.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) { + // check if we found the second walker command line header field key + if ( line.getKey().contains("TestPrintVariantsWalker.2") ) { + Assert.assertFalse(foundSecondWalker); + foundSecondWalker = true; + } + // otherwise if this is not the second walker command but contains the same + // walker name, then it is the first occurrence. If we somehow got more than + // two occurrences of this walker, the Assert.assertFalse(foundFirstWalker); + // will catch this + else if ( line.getKey().contains("TestPrintVariantsWalker") ) { + Assert.assertFalse(foundFirstWalker); + foundFirstWalker = true; + } + } + } + + Assert.assertTrue(foundFirstWalker, "Didn't find TestPrintVariantsWalker command line header field"); + Assert.assertTrue(foundSecondWalker, "Didn't find (second) TestPrintVariantsWalker command line header field"); + } + // -------------------------------------------------------------------------------- // // Test that defaultBaseQualities actually works diff --git a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/GATKVCFUtilsUnitTest.java b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/GATKVCFUtilsUnitTest.java index cd8b19bd0..3881eb719 100644 --- a/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/GATKVCFUtilsUnitTest.java +++ b/public/gatk-engine/src/test/java/org/broadinstitute/gatk/engine/GATKVCFUtilsUnitTest.java @@ -70,17 +70,21 @@ public class GATKVCFUtilsUnitTest extends BaseTest { final GenomeAnalysisEngine testEngine2 = new GenomeAnalysisEngine(); testEngine2.setWalker(walker2); - final VCFHeaderLine line1 = GATKVCFUtils.getCommandLineArgumentHeaderLine(testEngine1, Collections.EMPTY_LIST); + final VCFHeaderLine line1 = GATKVCFUtils.getCommandLineArgumentHeaderLine(header, testEngine1, Collections.EMPTY_LIST); logger.warn(line1); Assert.assertNotNull(line1); - Assert.assertEquals(line1.getKey(), GATKVCFUtils.GATK_COMMAND_LINE_KEY); - for ( final String field : Arrays.asList("Version", "ID", "Date", "CommandLineOptions")) + // assert the key matches the expected format (GATKVCFUtils.GATK_COMMAND_LINE_KEY).(walker name) + final String expectedLine1Key = String.format("%s.%s", GATKVCFUtils.GATK_COMMAND_LINE_KEY, testEngine1.getWalkerName()); + Assert.assertEquals(line1.getKey(), expectedLine1Key); + + for (final String field : Arrays.asList("Version", "ID", "Date", "CommandLineOptions")) Assert.assertTrue(line1.toString().contains(field), "Couldn't find field " + field + " in " + line1.getValue()); Assert.assertTrue(line1.toString().contains("ID=" + testEngine1.getWalkerName())); - final VCFHeaderLine line2 = GATKVCFUtils.getCommandLineArgumentHeaderLine(testEngine2, Collections.EMPTY_LIST); + final VCFHeaderLine line2 = GATKVCFUtils.getCommandLineArgumentHeaderLine(header, testEngine2, Collections.EMPTY_LIST); logger.warn(line2); + header.addMetaDataLine(line1); final Set lines1 = header.getMetaDataInInputOrder(); Assert.assertTrue(lines1.contains(line1)); @@ -89,6 +93,23 @@ public class GATKVCFUtilsUnitTest extends BaseTest { final Set lines2 = header.getMetaDataInInputOrder(); Assert.assertTrue(lines2.contains(line1)); Assert.assertTrue(lines2.contains(line2)); + + // create a new header line using the same engine as used by line 1 + final VCFHeaderLine line3 = GATKVCFUtils.getCommandLineArgumentHeaderLine(header, testEngine1, Collections.EMPTY_LIST); + logger.warn(line3); + + // ensure convention followed by getCommandLineArgumentHeaderLine is to append ".(number of duplicate engine runs)" + // line3 uses the same walker as line1, whereas line2 uses a different walker. line3 is the second occurrence of walker1 + // so a ".2" gets appended afterwards + final String expectedLine3Key = String.format("%s.%s.2", GATKVCFUtils.GATK_COMMAND_LINE_KEY, testEngine1.getWalkerName()); + Assert.assertEquals(line3.getKey(), expectedLine3Key); + + header.addMetaDataLine(line3); + + final Set lines3 = header.getMetaDataInInputOrder(); + Assert.assertTrue(lines3.contains(line1)); + Assert.assertTrue(lines3.contains(line2)); + Assert.assertTrue(lines3.contains(line3)); } private class IndexCreatorTest extends TestDataProvider {