From 3759d9dd679ed26ce67e9b75bca90291d91f797c Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Tue, 5 Mar 2013 15:57:44 -0500 Subject: [PATCH] Added the functionality to impose a relative ordering on ReadTransformers in the GATK engine. * ReadTransformers can say they must be first, must be last, or don't care. * By default, none of the existing ones care about ordering except BQSR (must be first). * This addresses a bug reported on the forum where BAQ is incorrectly applied before BQSR. * The engine now orders the read transformers up front before applying iterators. * The engine checks for enabled RTs that are not compatible (e.g. both must be first) and blows up (gracefully). * Added unit tests. --- .../recalibration/BQSRReadTransformer.java | 3 + .../sting/gatk/GenomeAnalysisEngine.java | 37 +++++++- .../sting/gatk/iterators/ReadTransformer.java | 36 +++++++ .../sting/utils/exceptions/UserException.java | 6 ++ .../gatk/GenomeAnalysisEngineUnitTest.java | 93 +++++++++++++++++++ 5 files changed, 173 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java index 113ea2222..3f8fd0e88 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java +++ b/protected/java/src/org/broadinstitute/sting/utils/recalibration/BQSRReadTransformer.java @@ -62,6 +62,9 @@ public class BQSRReadTransformer extends ReadTransformer { private boolean enabled; private BaseRecalibration bqsr = null; + @Override + public OrderingConstraint getOrderingConstraint() { return OrderingConstraint.MUST_BE_FIRST; } + @Override public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { this.enabled = engine.hasBQSRArgumentSet(); diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index 85c94cc92..e45a750ba 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -372,7 +372,8 @@ public class GenomeAnalysisEngine { * @param walker the walker we need to apply read transformers too */ public void initializeReadTransformers(final Walker walker) { - final List activeTransformers = new ArrayList(); + // keep a list of the active read transformers sorted based on priority ordering + List activeTransformers = new ArrayList(); final ReadTransformersMode overrideMode = WalkerManager.getWalkerAnnotation(walker, ReadTransformersMode.class); final ReadTransformer.ApplicationTime overrideTime = overrideMode != null ? overrideMode.ApplicationTime() : null; @@ -392,9 +393,41 @@ public class GenomeAnalysisEngine { return readTransformers; } - private void setReadTransformers(final List readTransformers) { + /* + * Sanity checks that incompatible read transformers are not active together (and throws an exception if they are). + * + * @param readTransformers the active read transformers + */ + protected void checkActiveReadTransformers(final List readTransformers) { + if ( readTransformers == null ) + throw new IllegalArgumentException("read transformers cannot be null"); + + ReadTransformer sawMustBeFirst = null; + ReadTransformer sawMustBeLast = null; + + for ( final ReadTransformer r : readTransformers ) { + if ( r.getOrderingConstraint() == ReadTransformer.OrderingConstraint.MUST_BE_FIRST ) { + if ( sawMustBeFirst != null ) + throw new UserException.IncompatibleReadFiltersException(sawMustBeFirst.toString(), r.toString()); + sawMustBeFirst = r; + } else if ( r.getOrderingConstraint() == ReadTransformer.OrderingConstraint.MUST_BE_LAST ) { + if ( sawMustBeLast != null ) + throw new UserException.IncompatibleReadFiltersException(sawMustBeLast.toString(), r.toString()); + sawMustBeLast = r; + } + } + } + + protected void setReadTransformers(final List readTransformers) { if ( readTransformers == null ) throw new ReviewedStingException("read transformers cannot be null"); + + // sort them in priority order + Collections.sort(readTransformers, new ReadTransformer.ReadTransformerComparator()); + + // make sure we don't have an invalid set of active read transformers + checkActiveReadTransformers(readTransformers); + this.readTransformers = readTransformers; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java index f026b8f6c..799014cd4 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java @@ -31,6 +31,8 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.util.Comparator; + /** * Baseclass used to describe a read transformer like BAQ and BQSR * @@ -65,6 +67,11 @@ abstract public class ReadTransformer { protected ReadTransformer() {} + /* + * @return the ordering constraint for the given read transformer + */ + public OrderingConstraint getOrderingConstraint() { return OrderingConstraint.DO_NOT_CARE; } + /** * Master initialization routine. Called to setup a ReadTransform, using it's overloaded initializeSub routine. * @@ -166,4 +173,33 @@ abstract public class ReadTransformer { */ HANDLED_IN_WALKER } + + /* + * This enum specifies the constraints that the given read transformer has relative to any other read transformers being used + */ + public enum OrderingConstraint { + /* + * If 2 read transformers are both active and MUST_BE_FIRST, then an error will be generated + */ + MUST_BE_FIRST, + + /* + * No constraints on the ordering for this read transformer + */ + DO_NOT_CARE, + + /* + * If 2 read transformers are both active and MUST_BE_LAST, then an error will be generated + */ + MUST_BE_LAST + } + + public static class ReadTransformerComparator implements Comparator { + + public int compare(final ReadTransformer r1, final ReadTransformer r2) { + if ( r1.getOrderingConstraint() == r2.getOrderingConstraint() ) + return 0; + return ( r1.getOrderingConstraint() == OrderingConstraint.MUST_BE_FIRST || r2.getOrderingConstraint() == OrderingConstraint.MUST_BE_LAST ) ? -1 : 1; + } + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java index 1c461748e..b3c5bd2c7 100644 --- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java +++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java @@ -75,6 +75,12 @@ public class UserException extends ReviewedStingException { } } + public static class IncompatibleReadFiltersException extends CommandLineException { + public IncompatibleReadFiltersException(final String filter1, final String filter2) { + super(String.format("Two read filters are enabled that are incompatible and cannot be used simultaneously: %s and %s", filter1, filter2)); + } + } + public static class MalformedWalkerArgumentsException extends CommandLineException { public MalformedWalkerArgumentsException(String message) { super(String.format("Malformed walker argument: %s",message)); diff --git a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java index 0b6e08fa7..3f74e0eae 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/GenomeAnalysisEngineUnitTest.java @@ -29,15 +29,23 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.commandline.ArgumentException; import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID; +import org.broadinstitute.sting.gatk.iterators.ReadTransformer; +import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.readutils.PrintReads; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.testng.Assert; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.List; /** * Tests selected functionality in the GenomeAnalysisEngine class @@ -81,4 +89,89 @@ public class GenomeAnalysisEngineUnitTest extends BaseTest { testEngine.validateSuppliedIntervals(); } + + + /////////////////////////////////////////////////// + // Test the ReadTransformer ordering enforcement // + /////////////////////////////////////////////////// + + public static class TestReadTransformer extends ReadTransformer { + + private OrderingConstraint orderingConstraint = OrderingConstraint.DO_NOT_CARE; + private boolean enabled; + + protected TestReadTransformer(final OrderingConstraint orderingConstraint) { + this.orderingConstraint = orderingConstraint; + enabled = true; + } + + // need this because PackageUtils will pick up this class as a possible ReadTransformer + protected TestReadTransformer() { + enabled = false; + } + + @Override + public OrderingConstraint getOrderingConstraint() { return orderingConstraint; } + + @Override + public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) { return ApplicationTime.HANDLED_IN_WALKER; } + + @Override + public boolean enabled() { return enabled; } + + @Override + public GATKSAMRecord apply(final GATKSAMRecord read) { return read; } + + } + + @DataProvider(name = "ReadTransformerData") + public Object[][] makeReadTransformerData() { + List tests = new ArrayList(); + + for ( final ReadTransformer.OrderingConstraint orderingConstraint1 : ReadTransformer.OrderingConstraint.values() ) { + for ( final ReadTransformer.OrderingConstraint orderingConstraint2 : ReadTransformer.OrderingConstraint.values() ) { + for ( final ReadTransformer.OrderingConstraint orderingConstraint3 : ReadTransformer.OrderingConstraint.values() ) { + tests.add(new Object[]{orderingConstraint1, orderingConstraint2, orderingConstraint3}); + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "ReadTransformerData") + public void testReadTransformer(final ReadTransformer.OrderingConstraint oc1, final ReadTransformer.OrderingConstraint oc2, final ReadTransformer.OrderingConstraint oc3) { + + final GenomeAnalysisEngine testEngine = new GenomeAnalysisEngine(); + final List readTransformers = new ArrayList(3); + readTransformers.add(new TestReadTransformer(oc1)); + readTransformers.add(new TestReadTransformer(oc2)); + readTransformers.add(new TestReadTransformer(oc3)); + + final boolean shouldThrowException = numWithConstraint(ReadTransformer.OrderingConstraint.MUST_BE_FIRST, oc1, oc2, oc3) > 1 || + numWithConstraint(ReadTransformer.OrderingConstraint.MUST_BE_LAST, oc1, oc2, oc3) > 1; + + try { + testEngine.setReadTransformers(readTransformers); + + Assert.assertFalse(shouldThrowException); + Assert.assertEquals(testEngine.getReadTransformers().size(), 3); + + Assert.assertTrue(testEngine.getReadTransformers().get(1).getOrderingConstraint() != ReadTransformer.OrderingConstraint.MUST_BE_FIRST); + Assert.assertTrue(testEngine.getReadTransformers().get(2).getOrderingConstraint() != ReadTransformer.OrderingConstraint.MUST_BE_FIRST); + Assert.assertTrue(testEngine.getReadTransformers().get(0).getOrderingConstraint() != ReadTransformer.OrderingConstraint.MUST_BE_LAST); + Assert.assertTrue(testEngine.getReadTransformers().get(1).getOrderingConstraint() != ReadTransformer.OrderingConstraint.MUST_BE_LAST); + } catch (UserException.IncompatibleReadFiltersException e) { + Assert.assertTrue(shouldThrowException); + } + } + + private int numWithConstraint(final ReadTransformer.OrderingConstraint target, final ReadTransformer.OrderingConstraint... constraints ) { + int count = 0; + for ( final ReadTransformer.OrderingConstraint constraint : constraints ) { + if ( constraint == target ) + count++; + } + return count; + } }