From 0a58d7aa1a5de0298c0e44c80c40ce6ba3fcf784 Mon Sep 17 00:00:00 2001 From: kshakir Date: Fri, 8 Apr 2011 18:44:32 +0000 Subject: [PATCH] Marked boolean SAMFileWriterATD arguments as flags so scala generator maps them to Boolean instead of Option[Boolean]. Using the VCFWriterATD isCompressed to check if the VCF index will be auto generated. Tracking BAM and Tribble indexes as @Inputs and @Outputs in generated QFunctions. Updates to the BamGatherFunction to disable the index during merge when disable_bam_indexing = true. Made a shortcut for live-running pipelinetest, pipelinetestrun. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5606 348d0f76-0448-11de-a6fe-93d51630548a --- build.xml | 17 ++- .../SAMFileWriterArgumentTypeDescriptor.java | 4 +- .../VCFWriterArgumentTypeDescriptor.java | 12 +- .../gatk/ArgumentDefinitionField.java | 133 +++++++++++++++--- .../gatk/AutoIndexGatherFunction.scala | 36 +++++ .../extensions/gatk/BamGatherFunction.scala | 4 +- 6 files changed, 173 insertions(+), 33 deletions(-) create mode 100644 scala/src/org/broadinstitute/sting/queue/extensions/gatk/AutoIndexGatherFunction.scala diff --git a/build.xml b/build.xml index 416fce4bd..4e7c642ba 100644 --- a/build.xml +++ b/build.xml @@ -191,11 +191,6 @@ - - - - - @@ -624,6 +619,11 @@ + + + + + @@ -687,6 +687,13 @@ + + + + + + + diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java index 77903bbc0..bf2623f25 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java @@ -178,7 +178,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor null, "Turn off on-the-fly creation of indices for output BAM files.", false, - false, + true, false, source.isHidden(), null, @@ -194,7 +194,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor SIMPLIFY_BAM_SHORTNAME, "If provided, output BAM files will be simplified to include just key reads for downstream variation discovery analyses (removing duplicates, PF-, non-primary reads), as well stripping all extended tags from the kept reads except the read group identifier", false, - false, + true, false, source.isHidden(), null, diff --git a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java index f36e02f23..888f8b36e 100644 --- a/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java +++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterArgumentTypeDescriptor.java @@ -133,7 +133,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { throw new MissingArgumentValueException(defaultArgumentDefinition); // Should we compress the output stream? - boolean compress = writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName)); + boolean compress = isCompressed(writerFileName); boolean skipWritingHeader = argumentIsPresent(createNoHeaderArgumentDefinition(),matches); boolean doNotWriteGenotypes = argumentIsPresent(createSitesOnlyArgumentDefinition(),matches); @@ -189,13 +189,21 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor { null ); } + /** + * Returns true if the file will be compressed. + * @param writerFileName Name of the file + * @return true if the file will be compressed. + */ + public static boolean isCompressed(String writerFileName) { + return writerFileName != null && SUPPORTED_ZIPPED_SUFFIXES.contains(getFileSuffix(writerFileName)); + } /** * Returns a lower-cased version of the suffix of the provided file. * @param fileName the file name. Must not be null. * @return lower-cased version of the file suffix. Will not be null. */ - private String getFileSuffix(String fileName) { + private static String getFileSuffix(String fileName) { int indexOfLastDot = fileName.lastIndexOf("."); if ( indexOfLastDot == -1 ) return ""; diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java index c8c62fbd1..0ac08b7f7 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/ArgumentDefinitionField.java @@ -24,9 +24,12 @@ package org.broadinstitute.sting.queue.extensions.gatk; +import net.sf.samtools.BAMIndex; import net.sf.samtools.SAMFileWriter; +import org.broad.tribble.Tribble; import org.broad.tribble.vcf.VCFWriter; import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import java.io.File; import java.lang.annotation.Annotation; @@ -70,7 +73,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { " * Short name of %1$s%n" + " * @param value Short name of %1$s%n" + " */%n" + - "def %4$s(value: %2$s) = this.%1$s = value%n", + "def %4$s(value: %2$s) { this.%1$s = value }%n", getFieldName(), getFieldType(), getShortFieldGetter(), @@ -132,17 +135,37 @@ public abstract class ArgumentDefinitionField extends ArgumentField { // ROD Bindings are set by the RodBindField } else if (RodBindField.ROD_BIND_FIELD.equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { // TODO: Once everyone is using @Allows and @Requires correctly, we can stop blindly allowing Triplets - return Collections.singletonList(new RodBindArgumentField(argumentDefinition)); + return Arrays.asList(new RodBindArgumentField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, Tribble.STANDARD_INDEX_EXTENSION)); //return Collections.emptyList(); } else if ("input_file".equals(argumentDefinition.fullName) && argumentDefinition.ioType == ArgumentIOType.INPUT) { - return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new IndexFilesField()); + return Arrays.asList(new InputTaggedFileDefinitionField(argumentDefinition), new InputIndexesArgumentField(argumentDefinition, BAMIndex.BAMIndexSuffix, ".bam")); } else if (argumentDefinition.ioType == ArgumentIOType.INPUT) { return Collections.singletonList(new InputArgumentField(argumentDefinition)); } else if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) { - return Collections.singletonList(new OutputArgumentField(argumentDefinition, gatherer)); + + List fields = new ArrayList(); + + String gatherClass; + if (gatherer != null) + gatherClass = gatherer.getName(); + else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) + gatherClass = "BamGatherFunction"; + else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType)) + gatherClass = "VcfGatherFunction"; + else + gatherClass = "org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction"; + + fields.add(new OutputArgumentField(argumentDefinition, gatherClass)); + + if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) + fields.add(new SAMFileWriterIndexArgumentField(argumentDefinition)); + else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType)) + fields.add(new VCFWriterIndexArgumentField(argumentDefinition)); + + return fields; } else if (argumentDefinition.isFlag) { return Collections.singletonList(new FlagArgumentField(argumentDefinition)); @@ -228,10 +251,10 @@ public abstract class ArgumentDefinitionField extends ArgumentField { // if (argumentDefinition.ioType == ArgumentIOType.OUTPUT) // Map all outputs to files. private static class OutputArgumentField extends ArgumentDefinitionField { - private final Class gatherer; - public OutputArgumentField(ArgumentDefinition argumentDefinition, Class gatherer) { + private final String gatherClass; + public OutputArgumentField(ArgumentDefinition argumentDefinition, String gatherClass) { super(argumentDefinition); - this.gatherer = gatherer; + this.gatherClass = gatherClass; } @Override protected Class getInnerType() { return File.class; } @@ -240,16 +263,7 @@ public abstract class ArgumentDefinitionField extends ArgumentField { @Override public boolean isGather() { return true; } @Override protected String getGatherAnnotation() { - String gather; - if (gatherer != null) - gather = "@Gather(classOf[" + gatherer.getName() + "])%n"; - else if (SAMFileWriter.class.isAssignableFrom(argumentDefinition.argumentType)) - gather = "@Gather(classOf[BamGatherFunction])%n"; - else if (VCFWriter.class.isAssignableFrom(argumentDefinition.argumentType)) - gather = "@Gather(classOf[VcfGatherFunction])%n"; - else - gather = "@Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])%n"; - return String.format(gather); + return String.format("@Gather(classOf[%s])%n", gatherClass); } } @@ -348,19 +362,94 @@ public abstract class ArgumentDefinitionField extends ArgumentField { /** * Adds optional inputs for the indexes of any bams or sams added to this function. */ - private static class IndexFilesField extends ArgumentField { + private static class InputIndexesArgumentField extends ArgumentField { + private final String indexFieldName; + private final String originalFieldName; + private final String indexSuffix; + private final String originalSuffix; + public InputIndexesArgumentField(ArgumentDefinition originalArgumentDefinition, String indexSuffix) { + this(originalArgumentDefinition, indexSuffix, null); + } + public InputIndexesArgumentField(ArgumentDefinition originalArgumentDefinition, String indexSuffix, String originalSuffix) { + this.indexFieldName = originalArgumentDefinition.fullName + "Indexes"; + this.originalFieldName = originalArgumentDefinition.fullName; + this.indexSuffix = indexSuffix; + this.originalSuffix = originalSuffix; + } @Override protected Class getAnnotationIOClass() { return Input.class; } @Override public String getCommandLineAddition() { return ""; } - @Override protected String getDoc() { return "Dependencies on any index files for any bams added to input_files"; } - @Override protected String getFullName() { return "index_files"; } + @Override protected String getDoc() { return "Dependencies on any indexes of " + this.originalFieldName; } + @Override protected String getFullName() { return this.indexFieldName; } @Override protected boolean isRequired() { return false; } @Override protected String getFieldType() { return "List[File]"; } @Override protected String getDefaultValue() { return "Nil"; } @Override protected Class getInnerType() { return File.class; } - @Override protected String getRawFieldName() { return "index_files"; } + @Override protected String getRawFieldName() { return this.indexFieldName; } + @Override protected String getFreezeFields() { + if (originalSuffix == null) { + return String.format( + ("%1$s ++= %2$s" + + ".filter(orig => orig != null)" + + ".map(orig => new File(orig.getPath + \"%3$s\"))%n"), + indexFieldName, originalFieldName, indexSuffix); + } else { + return String.format( + ("%1$s ++= %2$s" + + ".filter(orig => orig != null && orig.getName.endsWith(\"%4$s\"))" + + ".flatMap(orig => Array(" + + " new File(orig.getPath + \"%3$s\")," + + " new File(orig.getPath.stripSuffix(\"%4$s\") + \"%3$s\") ))%n"), + indexFieldName, originalFieldName, indexSuffix, originalSuffix); + } + } + } + + private static abstract class OutputIndexArgumentField extends ArgumentField { + protected final String indexFieldName; + protected final String originalFieldName; + public OutputIndexArgumentField(ArgumentDefinition originalArgumentDefinition) { + this.indexFieldName = originalArgumentDefinition.fullName + "Index"; + this.originalFieldName = originalArgumentDefinition.fullName; + } + @Override protected Class getAnnotationIOClass() { return Output.class; } + @Override public String getCommandLineAddition() { return ""; } + @Override protected String getDoc() { return "Automatically generated index for " + this.originalFieldName; } + @Override protected String getFullName() { return this.indexFieldName; } + @Override protected boolean isRequired() { return false; } + @Override protected String getFieldType() { return "File"; } + @Override protected String getDefaultValue() { return "_"; } + @Override protected Class getInnerType() { return File.class; } + @Override protected String getRawFieldName() { return this.indexFieldName; } + + @Override public boolean isGather() { return true; } + @Override protected String getGatherAnnotation() { + return String.format("@Gather(classOf[AutoIndexGatherFunction])%n"); + } + } + + private static class VCFWriterIndexArgumentField extends OutputIndexArgumentField { + public VCFWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) { + super(originalArgumentDefinition); + } @Override protected String getFreezeFields() { return String.format( - "index_files ++= input_file.filter(bam => bam != null && bam.getName.endsWith(\".bam\")).map(bam => new File(bam.getPath + \".bai\"))%n"); + ("if (%2$s != null)%n" + + " if (!org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor.isCompressed(%2$s.getPath))%n" + + " %1$s = new File(%2$s.getPath + \"%3$s\")%n"), + indexFieldName, originalFieldName, Tribble.STANDARD_INDEX_EXTENSION); + } + } + + private static class SAMFileWriterIndexArgumentField extends OutputIndexArgumentField { + public SAMFileWriterIndexArgumentField(ArgumentDefinition originalArgumentDefinition) { + super(originalArgumentDefinition); + } + @Override protected String getFreezeFields() { + return String.format( + ("if (%2$s != null)%n" + + " if (!%3$s)%n" + + " %1$s = new File(%2$s.getPath.stripSuffix(\".bam\") + \"%4$s\")%n"), + indexFieldName, originalFieldName, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME, BAMIndex.BAMIndexSuffix); } } diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/AutoIndexGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/AutoIndexGatherFunction.scala new file mode 100644 index 000000000..7fb96e074 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/AutoIndexGatherFunction.scala @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk + +import org.broadinstitute.sting.queue.function.scattergather.GatherFunction +import org.broadinstitute.sting.queue.function.InProcessFunction + +/** + * A no-op for index files that were automatically generated during the gather step. + * TODO: Allow graph to know that this isn't needed, and/or that one gather job can actually gather N-outputs, and/or look more into generic source->sinks. + */ +class AutoIndexGatherFunction extends InProcessFunction with GatherFunction { + def run() {} +} diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala index bbc4cd67e..9751012a4 100644 --- a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/BamGatherFunction.scala @@ -50,8 +50,8 @@ class BamGatherFunction extends GatherFunction with PicardBamFunction { val compression = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME) this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]] - val indexBam = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) - this.createIndex = originalGATK.getFieldValue(indexBam).asInstanceOf[Option[Boolean]] + val disableIndex = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.DISABLE_INDEXING_FULLNAME) + this.createIndex = Some(!originalGATK.getFieldValue(disableIndex).asInstanceOf[Boolean]) super.freezeFieldValues }