Added a generic method for returning the complete command-line used when calling a walker, to be used in the bam/vcf headers. As requested, every possible engine/walker argument is included. I've added it to the Unified Genotyper output, so people can try it out and let me know what they think. Something that needs to be discussed in group meeting: what happens when we merge VCFs? Do we keep all of the command-lines?

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3969 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-08-08 03:53:07 +00:00
parent 6e389059cf
commit 594b7912f1
3 changed files with 63 additions and 58 deletions

View File

@ -26,11 +26,10 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.walkers.Walker;
import java.util.Map; import java.util.*;
import java.util.List;
import java.util.LinkedHashMap;
import java.util.Collection;
/** /**
* Static utility methods for working with command-line arguments. * Static utility methods for working with command-line arguments.
@ -66,4 +65,27 @@ public class CommandLineUtils {
return commandLineArguments; return commandLineArguments;
} }
public static String createApproximateCommandLineArgumentString(GenomeAnalysisEngine toolkit, Collection<Object> otherArgumentProviders, Class<? extends Walker> walkerType) {
StringBuffer sb = new StringBuffer();
sb.append("analysis_type=");
sb.append(toolkit.getWalkerName(walkerType));
ArrayList<Object> allArgumentProviders = new ArrayList<Object>();
allArgumentProviders.add(toolkit.getArguments());
allArgumentProviders.addAll(otherArgumentProviders);
Map<String,String> commandLineArgs = getApproximateCommandLineArguments(allArgumentProviders);
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() ) {
sb.append(" ");
sb.append(commandLineArg.getKey());
sb.append("=");
sb.append(commandLineArg.getValue());
}
return sb.toString();
}
} }

View File

@ -39,7 +39,6 @@ import org.broadinstitute.sting.utils.genotype.vcf.*;
import java.util.*; import java.util.*;
import java.io.PrintStream; import java.io.PrintStream;
import java.io.File;
/** /**
@ -56,10 +55,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
public GenotypeWriter writer = null; public GenotypeWriter writer = null;
@Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false) @Argument(fullName = "verbose_mode", shortName = "verbose", doc = "File to print all of the annotated and detailed debugging output", required = false)
public PrintStream verboseWriter = null; protected PrintStream verboseWriter = null;
@Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false) @Argument(fullName = "metrics_file", shortName = "metrics", doc = "File to print any relevant callability metrics output", required = false)
public PrintStream metricsWriter = null; protected PrintStream metricsWriter = null;
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false) @Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected String[] annotationsToUse = {}; protected String[] annotationsToUse = {};
@ -67,6 +66,10 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
@Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false) @Argument(fullName="group", shortName="G", doc="One or more classes/groups of annotations to apply to variant calls", required=false)
protected String[] annotationClassesToUse = { "Standard" }; protected String[] annotationClassesToUse = { "Standard" };
@Argument(fullName = "NO_HEADER", shortName = "NO_HEADER", doc = "Don't output the usual VCF header tag with the command line. FOR DEBUGGING PURPOSES ONLY. This option is required in order to pass integration tests.", required = false)
protected Boolean NO_VCF_HEADER_LINE = false;
// the calculation arguments // the calculation arguments
private UnifiedGenotyperEngine UG_engine = null; private UnifiedGenotyperEngine UG_engine = null;
@ -133,10 +136,6 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( !(writer instanceof VCFGenotypeWriter) ) if ( !(writer instanceof VCFGenotypeWriter) )
return headerInfo; return headerInfo;
// first, the basic info
headerInfo.add(new VCFHeaderLine("source", "UnifiedGenotyper"));
headerInfo.add(new VCFHeaderLine("reference", getToolkit().getArguments().referenceFile.getName()));
// all annotation fields from VariantAnnotatorEngine // all annotation fields from VariantAnnotatorEngine
headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions()); headerInfo.addAll(annotationEngine.getVCFAnnotationDescriptions());
@ -155,16 +154,12 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality")); headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotyperEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
// all of the arguments from the argument collection // all of the arguments from the argument collection
Set<Object> args = new HashSet<Object>(); if ( !NO_VCF_HEADER_LINE ) {
args.add(UAC); Set<Object> args = new HashSet<Object>();
args.addAll(getToolkit().getFilters()); args.add(UAC);
Map<String,String> commandLineArgs = CommandLineUtils.getApproximateCommandLineArguments(args); headerInfo.add(new VCFHeaderLine("UnifiedGenotyper", "\"" + CommandLineUtils.createApproximateCommandLineArgumentString(getToolkit(), args, getClass()) + "\""));
for ( Map.Entry<String, String> commandLineArg : commandLineArgs.entrySet() ) }
headerInfo.add(new VCFHeaderLine(String.format("UG_%s", commandLineArg.getKey()), commandLineArg.getValue()));
// also, the list of input bams
for ( File file : getToolkit().getArguments().samFiles )
headerInfo.add(new VCFHeaderLine("UG_bam_file_used", file.getName()));
return headerInfo; return headerInfo;
} }

View File

@ -13,18 +13,7 @@ import java.util.Map;
public class UnifiedGenotyperIntegrationTest extends WalkerTest { public class UnifiedGenotyperIntegrationTest extends WalkerTest {
// -------------------------------------------------------------------------------------------------------------- private final static String baseCommand = "-T UnifiedGenotyper -R " + b36KGReference + " -NO_HEADER";
//
// testing pooled model
//
// --------------------------------------------------------------------------------------------------------------
// @Test
// public void testPooled1() {
// WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
// "-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,023,000-10,024,000 -bm empirical -gm POOLED -ps 60 -confidence 30", 1,
// Arrays.asList("c91f44a198cd7222520118726ea806ca"));
// executeTest("testPooled1", spec);
// }
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
@ -34,24 +23,24 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testMultiSamplePilot1Joint() { public void testMultiSamplePilot1Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1, baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -varout %s -L 1:10,022,000-10,025,000", 1,
Arrays.asList("680292498be02796787bc4b2393a003c")); Arrays.asList("73d514f53f1630832b3bed65c67fe869"));
executeTest("testMultiSamplePilot1 - Joint Estimate", spec); executeTest("testMultiSamplePilot1 - Joint Estimate", spec);
} }
@Test @Test
public void testMultiSamplePilot2Joint() { public void testMultiSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1, baseCommand + " -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -varout %s -L 20:10,000,000-10,050,000", 1,
Arrays.asList("1bf2dc92f97f7bd661e61453b657477a")); Arrays.asList("e21ff78ca74bdaacaea562795a90e979"));
executeTest("testMultiSamplePilot2 - Joint Estimate", spec); executeTest("testMultiSamplePilot2 - Joint Estimate", spec);
} }
@Test @Test
public void testSingleSamplePilot2Joint() { public void testSingleSamplePilot2Joint() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000", 1,
Arrays.asList("6cab256a3c984c8938ffe026ed620c36")); Arrays.asList("20878adf2f56687ef03f32e6bac8fb30"));
executeTest("testSingleSamplePilot2 - Joint Estimate", spec); executeTest("testSingleSamplePilot2 - Joint Estimate", spec);
} }
@ -63,20 +52,20 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testParallelization() { public void testParallelization() {
String md5 = "8f464769be0920ee2bdfd72da2193161"; String md5 = "664f02681a0ff09bdf3269f97284009c";
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000", 1,
Arrays.asList(md5)); Arrays.asList(md5));
executeTest("test parallelization (single thread)", spec1); executeTest("test parallelization (single thread)", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 2", 1,
Arrays.asList(md5)); Arrays.asList(md5));
executeTest("test parallelization (2 threads)", spec2); executeTest("test parallelization (2 threads)", spec2);
WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec3 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 4", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,075,000 -nt 4", 1,
Arrays.asList(md5)); Arrays.asList(md5));
executeTest("test parallelization (4 threads)", spec3); executeTest("test parallelization (4 threads)", spec3);
} }
@ -90,15 +79,15 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testParameter() { public void testParameter() {
HashMap<String, String> e = new HashMap<String, String>(); HashMap<String, String> e = new HashMap<String, String>();
e.put( "-genotype", "e4735f7d20f8d82d359f89824cfafbad" ); e.put( "-genotype", "5d3f4a25039959e1758a035b8dc595b7" );
e.put( "-all_bases", "c41acfac9f7908609a8d0ccdcefbd9e4" ); e.put( "-all_bases", "402740e2e1ef23a246120dde91a42dcd" );
e.put( "--min_base_quality_score 26", "6fea507d70c1b7b74fb6b553bd8904c2" ); e.put( "--min_base_quality_score 26", "a6fe67665b1ffe1d53194199b1d28b9a" );
e.put( "--min_mapping_quality_score 26", "f8b1a0449f0a49c1c5edb70f4e90b5f8" ); e.put( "--min_mapping_quality_score 26", "3fcb922f5939221ee73661f83d604050" );
e.put( "--max_mismatches_in_40bp_window 5", "bf29fc94431f7b18fd4c65025294de3c" ); e.put( "--max_mismatches_in_40bp_window 5", "93a0d181625c2c98c35acd03599f6f8b" );
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 " + entry.getKey(), 1,
Arrays.asList(entry.getValue())); Arrays.asList(entry.getValue()));
executeTest(String.format("testParameter[%s]", entry.getKey()), spec); executeTest(String.format("testParameter[%s]", entry.getKey()), spec);
} }
@ -107,13 +96,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testConfidence() { public void testConfidence() {
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
Arrays.asList("065be668bbea5342c7700017e131eb2a")); Arrays.asList("292ecd223494a2cff64a9cb72b0850d9"));
executeTest("testConfidence1", spec1); executeTest("testConfidence1", spec1);
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
Arrays.asList("b58578df1d05926fb7ecd9fecffa9c5e")); Arrays.asList("65e29fad296e3d03312087e7e9168095"));
executeTest("testConfidence2", spec2); executeTest("testConfidence2", spec2);
} }
@ -131,7 +120,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -vf " + entry.getKey(), 1, baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -vf " + entry.getKey(), 1,
Arrays.asList(entry.getValue())); Arrays.asList(entry.getValue()));
executeTest(String.format("testOtherFormat[%s]", entry.getKey()), spec); executeTest(String.format("testOtherFormat[%s]", entry.getKey()), spec);
} }
@ -154,7 +143,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
for ( Map.Entry<Double, String> entry : e.entrySet() ) { for ( Map.Entry<Double, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -vf GELI -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1, baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 --heterozygosity " + entry.getKey(), 1,
Arrays.asList(entry.getValue())); Arrays.asList(entry.getValue()));
executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec); executeTest(String.format("testHeterozyosity[%s]", entry.getKey()), spec);
} }
@ -174,7 +163,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
for ( Map.Entry<String, String> entry : e.entrySet() ) { for ( Map.Entry<String, String> entry : e.entrySet() ) {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper -vf GELI -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1, baseCommand + " -vf GELI -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -varout %s -L 1:10,000,000-10,100,000 -bm " + entry.getKey(), 1,
Arrays.asList(entry.getValue())); Arrays.asList(entry.getValue()));
executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec); executeTest(String.format("testOtherBaseCallModel[%s]", entry.getKey()), spec);
} }
@ -188,8 +177,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
@Test @Test
public void testMultiTechnologies() { public void testMultiTechnologies() {
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
"-T UnifiedGenotyper" + baseCommand +
" -R " + b36KGReference +
" -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.allTechs.bam" +
" -varout %s" + " -varout %s" +
" -L 1:10,000,000-10,100,000" + " -L 1:10,000,000-10,100,000" +