From 594b7912f1bb2dcd5272db9f99a84dbd6cfb5839 Mon Sep 17 00:00:00 2001 From: ebanks Date: Sun, 8 Aug 2010 03:53:07 +0000 Subject: [PATCH] Added a generic method for returning the complete command-line used when calling a walker, to be used in the bam/vcf headers. As requested, every possible engine/walker argument is included. I've added it to the Unified Genotyper output, so people can try it out and let me know what they think. Something that needs to be discussed in group meeting: what happens when we merge VCFs? Do we keep all of the command-lines? git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3969 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/commandline/CommandLineUtils.java | 30 +++++++-- .../walkers/genotyper/UnifiedGenotyper.java | 29 ++++----- .../UnifiedGenotyperIntegrationTest.java | 62 ++++++++----------- 3 files changed, 63 insertions(+), 58 deletions(-) diff --git a/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java b/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java index 17d1080f2..9b5200b43 100644 --- a/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java +++ b/java/src/org/broadinstitute/sting/commandline/CommandLineUtils.java @@ -26,11 +26,10 @@ package org.broadinstitute.sting.commandline; import org.broadinstitute.sting.utils.classloader.JVMUtils; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.gatk.walkers.Walker; -import java.util.Map; -import java.util.List; -import java.util.LinkedHashMap; -import java.util.Collection; +import java.util.*; /** * Static utility methods for working with command-line arguments. @@ -66,4 +65,27 @@ public class CommandLineUtils { return commandLineArguments; } + + public static String createApproximateCommandLineArgumentString(GenomeAnalysisEngine toolkit, Collection otherArgumentProviders, Class walkerType) { + + StringBuffer sb = new StringBuffer(); + sb.append("analysis_type="); + sb.append(toolkit.getWalkerName(walkerType)); + + ArrayList allArgumentProviders = new ArrayList(); + allArgumentProviders.add(toolkit.getArguments()); + allArgumentProviders.addAll(otherArgumentProviders); + + Map commandLineArgs = getApproximateCommandLineArguments(allArgumentProviders); + + for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) { + sb.append(" "); + sb.append(commandLineArg.getKey()); + sb.append("="); + sb.append(commandLineArg.getValue()); + } + + return sb.toString(); + } + } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java index 4de777755..858853f60 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java @@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.genotype.vcf.*; import java.util.*; import java.io.PrintStream; -import java.io.File; /** @@ -56,10 +55,10 @@ public class UnifiedGenotyper extends LocusWalker args = new HashSet(); - args.add(UAC); - args.addAll(getToolkit().getFilters()); - Map commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args); - for ( Map.Entry commandLineArg : commandLineArgs.entrySet() ) - headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue())); - // also, the list of input bams - for ( File file : getToolkit().getArguments().samFiles ) - headerInfo.add(new VCFHeaderLine("UG_bam_file_used", file.getName())); - + if ( !NO_VCF_HEADER_LINE ) { + Set args = new HashSet(); + args.add(UAC); + headerInfo.add(new VCFHeaderLine("UnifiedGenotyper", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\"")); + } + return headerInfo; } diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java index 6bd1c8bab..8a423c2e0 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java @@ -13,18 +13,7 @@ import java.util.Map; public class UnifiedGenotyperIntegrationTest extends WalkerTest { - // -------------------------------------------------------------------------------------------------------------- - // - // testing pooled model - // - // -------------------------------------------------------------------------------------------------------------- -// @Test -// public void testPooled1() { -// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( -// "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1, -// Arrays.asList("c91f44a198cd7222520118726ea806ca")); -// executeTest("testPooled1", spec); -// } + private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER"; // -------------------------------------------------------------------------------------------------------------- // @@ -34,24 +23,24 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testMultiSamplePilot1Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1, - Arrays.asList("680292498be02796787bc4b2393a003c")); + baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1, + Arrays.asList("73d514f53f1630832b3bed65c67fe869")); executeTest("testMultiSamplePilot1 - Joint Estimate", spec); } @Test public void testMultiSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1, - Arrays.asList("1bf2dc92f97f7bd661e61453b657477a")); + baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1, + Arrays.asList("e21ff78ca74bdaacaea562795a90e979")); executeTest("testMultiSamplePilot2 - Joint Estimate", spec); } @Test public void testSingleSamplePilot2Joint() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1, - Arrays.asList("6cab256a3c984c8938ffe026ed620c36")); + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1, + Arrays.asList("20878adf2f56687ef03f32e6bac8fb30")); executeTest("testSingleSamplePilot2 - Joint Estimate", spec); } @@ -63,20 +52,20 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParallelization() { - String md5 = "8f464769be0920ee2bdfd72da2193161"; + String md5 = "664f02681a0ff09bdf3269f97284009c"; WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1, Arrays.asList(md5)); executeTest("test parallelization (single thread)", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1, Arrays.asList(md5)); executeTest("test parallelization (2 threads)", spec2); WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 4", 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 4", 1, Arrays.asList(md5)); executeTest("test parallelization (4 threads)", spec3); } @@ -90,15 +79,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testParameter() { HashMap e = new HashMap(); - e.put( "-genotype", "e4735f7d20f8d82d359f89824cfafbad" ); - e.put( "-all_bases", "c41acfac9f7908609a8d0ccdcefbd9e4" ); - e.put( "--min_base_quality_score 26", "6fea507d70c1b7b74fb6b553bd8904c2" ); - e.put( "--min_mapping_quality_score 26", "f8b1a0449f0a49c1c5edb70f4e90b5f8" ); - e.put( "--max_mismatches_in_40bp_window 5", "bf29fc94431f7b18fd4c65025294de3c" ); + e.put( "-genotype", "5d3f4a25039959e1758a035b8dc595b7" ); + e.put( "-all_bases", "402740e2e1ef23a246120dde91a42dcd" ); + e.put( "--min_base_quality_score 26", "a6fe67665b1ffe1d53194199b1d28b9a" ); + e.put( "--min_mapping_quality_score 26", "3fcb922f5939221ee73661f83d604050" ); + e.put( "--max_mismatches_in_40bp_window 5", "93a0d181625c2c98c35acd03599f6f8b" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testParameter[%s]", entry.getKey()), spec); } @@ -107,13 +96,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testConfidence() { WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, - Arrays.asList("065be668bbea5342c7700017e131eb2a")); + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, + Arrays.asList("292ecd223494a2cff64a9cb72b0850d9")); executeTest("testConfidence1", spec1); WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, - Arrays.asList("b58578df1d05926fb7ecd9fecffa9c5e")); + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, + Arrays.asList("65e29fad296e3d03312087e7e9168095")); executeTest("testConfidence2", spec2); } @@ -131,7 +120,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -vf " + entry.getKey(), 1, + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -vf " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testOtherFormat[%s]", entry.getKey()), spec); } @@ -154,7 +143,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -vf GELI -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1, + baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); } @@ -174,7 +163,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper -vf GELI -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1, + baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1, Arrays.asList(entry.getValue())); executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec); } @@ -188,8 +177,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest { @Test public void testMultiTechnologies() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( - "-T UnifiedGenotyper" + - " -R " + b36KGReference + + baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + " -varout %s" + " -L 1:10,000,000-10,100,000" +