diff --git a/build.xml b/build.xml index 766c987a7..c63e1d4cb 100644 --- a/build.xml +++ b/build.xml @@ -339,13 +339,13 @@ - - - - + + + + @@ -710,12 +710,12 @@ - - - - - + + + + + diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java index 516ece4a1..74e8848cd 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java @@ -19,6 +19,7 @@ import java.util.Arrays; */ @By(DataSource.READS) @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) +@PartitionBy(PartitionType.Interval) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class}) public abstract class LocusWalker extends Walker { // Do we actually want to operate on the context? diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PartitionBy.java b/java/src/org/broadinstitute/sting/gatk/walkers/PartitionBy.java new file mode 100644 index 000000000..bf6cb0d18 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PartitionBy.java @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers; + +import java.lang.annotation.*; + +/** + * Allows the walker to indicate how to partition data it wants to consume. + */ +@Documented +@Inherited +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.TYPE) +public @interface PartitionBy { + PartitionType value(); +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java b/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java new file mode 100644 index 000000000..f6bb705a3 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers; + +/** + * Defines the ways walkers inputs can be partitioned before + * being passed to multiple instances of the walker. + */ +public enum PartitionType { + /** + * Do not partition the walker inputs. + */ + None, + + /** + * The walker inputs can be chunked down to the + * per-locus level. + */ + Locus, + + /** + * The walker inputs should be processed as complete + * intervals defined -L or the reference contigs. + */ + Interval, + + /** + * The walker inputs should always be processed as complete + * contigs, even if there are multiple intervals per contig. + */ + Contig +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java index 5de608c64..fd4dc0522 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java @@ -12,6 +12,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; * To change this template use File | Settings | File Templates. */ @Requires({DataSource.READS, DataSource.REFERENCE_BASES}) +@PartitionBy(PartitionType.Contig) public abstract class ReadWalker extends Walker { public boolean requiresOrderedReads() { return false; } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java index fc652ed8f..296887d87 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java @@ -42,6 +42,7 @@ import org.apache.log4j.Logger; * To change this template use File | Settings | File Templates. */ @ReadFilters(MalformedReadFilter.class) +@PartitionBy(PartitionType.None) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) public abstract class Walker { final protected static Logger logger = Logger.getLogger(Walker.class); diff --git a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java index 144aad026..dbb52eb66 100644 --- a/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java +++ b/java/src/org/broadinstitute/sting/queue/extensions/gatk/GATKExtensionsGenerator.java @@ -39,6 +39,8 @@ import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; +import org.broadinstitute.sting.gatk.walkers.PartitionBy; +import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.classloader.JVMUtils; @@ -209,10 +211,10 @@ public class GATKExtensionsGenerator extends CommandLineProgram { * @return The scatter type for the walker. */ private String getScatterClass(Class walkerType) { - if (ReadWalker.class.isAssignableFrom(walkerType)) - return "ContigScatterFunction"; - else - return "IntervalScatterFunction"; + PartitionType partitionType = walkerType.getAnnotation(PartitionBy.class).value(); + if (partitionType == PartitionType.None) + return null; + return partitionType.name() + "ScatterFunction"; } /** diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala new file mode 100644 index 000000000..50482033f --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/LocusScatterFunction.scala @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk + +/** + * For now returns an IntervalScatterFunction. + * TODO: A scatter function that divides down to the locus level. + */ +class LocusScatterFunction extends IntervalScatterFunction {}