From 7b8b06416553c6b60f4092dd4cd45340de219359 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Fri, 18 Jan 2013 16:11:53 -0500 Subject: [PATCH 02/46] Last manual license update (hopefully) if everyone updates their git hook accordingly, this will be the last time I have to manually run the script. GSATDG-5 --- .../GenotypeConcordanceIntegrationTest.java | 67 ++++++++++++------- .../IntervalArgumentCollection.java | 6 +- .../sting/gatk/iterators/GATKSAMIterator.java | 46 ++++++------- .../locusiterator/AlignmentStateMachine.java | 46 ++++++------- .../locusiterator/LIBSDownsamplingInfo.java | 46 ++++++------- .../utils/locusiterator/LIBSPerformance.java | 46 ++++++------- .../utils/locusiterator/LocusIterator.java | 25 +++++++ .../PerSampleReadStateManager.java | 46 ++++++------- .../utils/locusiterator/ReadStateManager.java | 46 ++++++------- .../locusiterator/SamplePartitioner.java | 46 ++++++------- .../sting/utils/sam/ArtificialBAMBuilder.java | 46 ++++++------- .../traversals/DummyActiveRegionWalker.java | 46 ++++++------- .../TraverseActiveRegionsUnitTest.java | 46 ++++++------- .../AlignmentStateMachineUnitTest.java | 46 ++++++------- .../utils/locusiterator/LIBS_position.java | 46 ++++++------- .../locusiterator/LocusIteratorBenchmark.java | 46 ++++++------- .../LocusIteratorByStateBaseTest.java | 46 ++++++------- .../PerSampleReadStateManagerUnitTest.java | 46 ++++++------- .../utils/pileup/PileupElementUnitTest.java | 46 ++++++------- .../sam/ArtificialBAMBuilderUnitTest.java | 46 ++++++------- 20 files changed, 463 insertions(+), 417 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java index e69d1ee60..2ebb1d7d8 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordanceIntegrationTest.java @@ -1,27 +1,48 @@ /* - * Copyright (c) 2010. - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ package org.broadinstitute.sting.gatk.walkers.variantutils; diff --git a/public/java/src/org/broadinstitute/sting/commandline/IntervalArgumentCollection.java b/public/java/src/org/broadinstitute/sting/commandline/IntervalArgumentCollection.java index 3f76ae652..b491c9f3d 100644 --- a/public/java/src/org/broadinstitute/sting/commandline/IntervalArgumentCollection.java +++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalArgumentCollection.java @@ -1,6 +1,6 @@ /* * Copyright (c) 2012 The Broad Institute -* +* * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without @@ -9,10 +9,10 @@ * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: -* +* * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. -* +* * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/GATKSAMIterator.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/GATKSAMIterator.java index 30a520e09..f5f4ec6f6 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/iterators/GATKSAMIterator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/GATKSAMIterator.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.gatk.iterators; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java index 50bc9e25b..c4b566582 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachine.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSDownsamplingInfo.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSDownsamplingInfo.java index fc282163e..c7875354f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSDownsamplingInfo.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSDownsamplingInfo.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java index 2d074f420..8069ea29f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LIBSPerformance.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java index f830dcb30..fc114b4f0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java @@ -1,3 +1,28 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + package org.broadinstitute.sting.utils.locusiterator; import net.sf.samtools.util.CloseableIterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java index 3f3bc706f..2caaf9d27 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManager.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java index 09ec3b264..8fbd302a8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/ReadStateManager.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java index 9bb474e4d..49a8d10aa 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/SamplePartitioner.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java index ab539c9dc..82b5b29cc 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.sam; diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java index bc1e1d7b0..76be54d72 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.gatk.traversals; diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 319af5ec5..76eac3a8d 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.gatk.traversals; diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java index 2f1e95a1f..7cb148b61 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/AlignmentStateMachineUnitTest.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LIBS_position.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LIBS_position.java index 31be5a25a..d856805f8 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LIBS_position.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LIBS_position.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java index c0938676e..e52cd46cc 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorBenchmark.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java index 7c8c6108c..1a51440ad 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/LocusIteratorByStateBaseTest.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java index b9f2fb29a..77dd29e60 100644 --- a/public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/locusiterator/PerSampleReadStateManagerUnitTest.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.locusiterator; diff --git a/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java index a760833f5..888ab7f7f 100644 --- a/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/pileup/PileupElementUnitTest.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.pileup; diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilderUnitTest.java index 2a638eb69..a2aec1b1e 100644 --- a/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilderUnitTest.java @@ -1,27 +1,27 @@ /* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ package org.broadinstitute.sting.utils.sam; From 7060e01a8e0bcbd20d98f6601c48705ed939be74 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Tue, 22 Jan 2013 15:14:41 -0500 Subject: [PATCH 07/46] Fix for broken unit test plus some minor changes to comments. Unit tests were broken by my pulling the site status utility function into the enum. Thankfully the unit tests caught my silly duplication of a line. --- .../gatk/walkers/variantutils/ConcordanceMetrics.java | 2 +- .../gatk/walkers/variantutils/GenotypeConcordance.java | 5 +++-- .../walkers/variantutils/ConcordanceMetricsUnitTest.java | 7 ++++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java index 8a87c9957..9d5495351 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java @@ -299,7 +299,7 @@ public class ConcordanceMetrics { return EVAL_ONLY; boolean evalSubsetTruth = VariantContextUtils.allelesAreSubset(eval,truth); - boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(eval,truth); + boolean truthSubsetEval = VariantContextUtils.allelesAreSubset(truth,eval); if ( evalSubsetTruth && truthSubsetEval ) return ALLELES_MATCH; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index e8965dfc8..6902f864e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -168,8 +168,9 @@ public class GenotypeConcordance extends RodWalker data = getData2(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); @@ -709,8 +709,13 @@ public class ConcordanceMetricsUnitTest extends BaseTest { List> data = getData7(); + int idx = 0; + int[] expecNotMatch = new int[]{0,0,0,0,0,1,1}; for ( Pair varPair : data ) { metrics.update(varPair.getFirst(),varPair.getSecond()); + Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),expecNotMatch[idx]); + logger.info(idx); + idx++; } Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),1); From d33c755aeaafcef933b474fba01c82a0a71dbe7b Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Tue, 22 Jan 2013 15:29:33 -0500 Subject: [PATCH 08/46] Adding docs. --- .../variantutils/GenotypeConcordance.java | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java index 6902f864e..47e2ea052 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java @@ -62,27 +62,75 @@ import java.io.PrintStream; import java.util.*; /** - * A simple walker for performing genotype concordance calculations between two callsets + * A simple walker for performing genotype concordance calculations between two callsets. Outputs a GATK table with + * per-sample and aggregate counts and frequencies, a summary table for NRD/NRS, and a table for site allele overlaps. + * + *

+ * Genotype concordance takes in two callsets (vcfs) and tabulates the number of sites which overlap and share alleles, + * and for each sample, the genotype-by-genotype counts (for instance, the number of sites at which a sample was + * called homozygous reference in the EVAL callset, but homozygous variant in the COMP callset). It outputs these + * counts as well as convenient proportions (such as the proportion of het calls in the EVAL which were called REF in + * the COMP) and metrics (such as NRD and NRS). + * + *

INPUT

+ *

+ * Genotype concordance requires two callsets (as it does a comparison): an EVAL and a COMP callset, specified via + * the -eval and -comp arguments + *

+ * (Optional) Jexl expressions for genotype-level filtering of EVAL or COMP genotypes, specified via the -gfe and + * -cfe arguments, respectively. + * + *

OUTPUT

+ * Genotype Concordance writes a GATK report to the specified (via -o) file, consisting of multiple tables of counts + * and proportions. These tables may be optionally moltenized via the -moltenize argument. + * */ public class GenotypeConcordance extends RodWalker>,ConcordanceMetrics> { + /** + * The callset you want to evaluate, typically this is where you'd put 'unassessed' callsets. + */ @Input(fullName="eval",shortName="eval",doc="The variants and genotypes to evaluate",required=true) RodBinding evalBinding; + /** + * The callset you want to treat as 'truth'. Can also be of unknown quality for the sake of callset comparisons. + */ @Input(fullName="comp",shortName="comp",doc="The variants and genotypes to compare against",required=true) RodBinding compBinding; + /** + * The FILTER field of the eval and comp VCFs will be ignored. If this flag is not included, all FILTER sites will + * be treated as not being present in the VCF. (That is, the genotypes will be assigned UNAVAILABLE, as distinct + * from NO_CALL). + */ @Argument(fullName="ignoreFilters",doc="Filters will be ignored",required=false) boolean ignoreFilters = false; + /** + * A genotype level JEXL expression to apply to eval genotypes. Genotypes filtered in this way will be replaced by NO_CALL. + * For instance: -gfe 'GQ<20' will set to no-call any genotype with genotype quality less than 20. + */ @Argument(shortName="gfe", fullName="genotypeFilterExpressionEval", doc="One or more criteria to use to set EVAL genotypes to no-call. "+ "These genotype-level filters are only applied to the EVAL rod.", required=false) public ArrayList genotypeFilterExpressionsEval = new ArrayList(); + /** + * Identical to -gfe except the filter is applied to genotypes in the comp rod. + */ @Argument(shortName="gfc", fullName="genotypeFilterExpressionComp", doc="One or more criteria to use to set COMP genotypes to no-call. "+ "These genotype-level filters are only applied to the COMP rod.", required=false) public ArrayList genotypeFilterExpressionsComp = new ArrayList(); + /** + * Moltenize the count and proportion tables. Rather than moltenizing per-sample data into a 2x2 table, it is fully + * moltenized into elements. That is, WITHOUT this argument, each row of the table begins with the sample name and + * proceeds directly with counts/proportions of eval/comp counts (for instance HOM_REF/HOM_REF, HOM_REF/NO_CALL). + * + * If the Moltenize argument is given, the output will begin with a sample name, followed by the contrastive genotype + * type (such as HOM_REF/HOM_REF), followed by the count or proportion. This will significantly increase the number of + * rows. + */ @Argument(shortName="moltenize",fullName="moltenize",doc="Molten rather than tabular output") public boolean moltenize = false; From c5e1bb678b9e80fbc331d41328186db8db058c6e Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Tue, 22 Jan 2013 15:18:19 -0700 Subject: [PATCH 09/46] Refrain from pushing symlinks into the repo... not all filesystems treat it correctly --- licensing/private_license.txt | 44 ++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) mode change 120000 => 100644 licensing/private_license.txt diff --git a/licensing/private_license.txt b/licensing/private_license.txt deleted file mode 120000 index d83474e7a..000000000 --- a/licensing/private_license.txt +++ /dev/null @@ -1 +0,0 @@ -protected_license.txt \ No newline at end of file diff --git a/licensing/private_license.txt b/licensing/private_license.txt new file mode 100644 index 000000000..2f40c5089 --- /dev/null +++ b/licensing/private_license.txt @@ -0,0 +1,43 @@ + By downloading the PROGRAM you agree to the following terms of use: + + BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY + + This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). + + WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and + WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. + NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: + + 1. DEFINITIONS + 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. + + 2. LICENSE + 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. + The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. + 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. + 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. + + 3. OWNERSHIP OF INTELLECTUAL PROPERTY + LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. + Copyright 2012 Broad Institute, Inc. + Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. + LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. + + 4. INDEMNIFICATION + LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. + + 5. NO REPRESENTATIONS OR WARRANTIES + THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. + IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. + + 6. ASSIGNMENT + This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. + + 7. MISCELLANEOUS + 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. + 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. + 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. + 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. + 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. + 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. + 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. From 42b807a5fe806011f8594554e3a4355ecf9c3df8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 17 Jan 2013 14:51:26 -0500 Subject: [PATCH 12/46] Unit tests for ActivityProfileResult --- .../ActivityProfileResultTest.java | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java new file mode 100644 index 000000000..d131666fa --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.activeregion; + +import net.sf.picard.reference.ReferenceSequenceFile; +import net.sf.samtools.SAMFileHeader; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.broadinstitute.sting.utils.sam.ArtificialBAMBuilder; +import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.LinkedList; +import java.util.List; + +/** + * Created with IntelliJ IDEA. + * User: depristo + * Date: 1/17/13 + * Time: 2:30 PM + * To change this template use File | Settings | File Templates. + */ +public class ActivityProfileResultTest { + private GenomeLocParser genomeLocParser; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 100); + genomeLocParser = new GenomeLocParser(header.getSequenceDictionary()); + } + + @DataProvider(name = "ActiveProfileResultProvider") + public Object[][] makeActiveProfileResultProvider() { + final List tests = new LinkedList(); + + final String chr = genomeLocParser.getContigs().getSequence(0).getSequenceName(); + for ( final GenomeLoc loc : Arrays.asList( + genomeLocParser.createGenomeLoc(chr, 10, 10), + genomeLocParser.createGenomeLoc(chr, 100, 100) )) { + for ( final double prob : Arrays.asList(0.0, 0.5, 1.0) ) { + for ( final ActivityProfileResult.ActivityProfileResultState state : ActivityProfileResult.ActivityProfileResultState.values() ) { + for ( final Number value : Arrays.asList(1, 2, 4) ) { + tests.add(new Object[]{ loc, prob, state, value}); + } + } + tests.add(new Object[]{ loc, prob, null, null}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "ActiveProfileResultProvider") + public void testActiveProfileResultProvider(GenomeLoc loc, final double prob, ActivityProfileResult.ActivityProfileResultState maybeState, final Number maybeNumber) { + final ActivityProfileResult apr = maybeState == null + ? new ActivityProfileResult(loc, prob) + : new ActivityProfileResult(loc, prob, maybeState, maybeNumber); + + Assert.assertEquals(apr.getLoc(), loc); + Assert.assertNotNull(apr.toString()); + Assert.assertEquals(apr.isActiveProb, prob); + Assert.assertEquals(apr.resultState, maybeState == null ? ActivityProfileResult.ActivityProfileResultState.NONE : maybeState); + Assert.assertEquals(apr.resultValue, maybeState == null ? null : maybeNumber); + } +} From 8d9b0f1bd57d4d569d6ec27e7d372a923e11b9bd Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 17 Jan 2013 17:31:56 -0500 Subject: [PATCH 13/46] Restructure ActivityProfiler into root class ActivityProfile and derived class BandPassActivityProfile -- Required before I jump in an redo the entire activity profile so it's can be run imcrementally -- This restructuring makes the differences between the two functionalities clearer, as almost all of the functionality is in the base class. The only functionality provided by the BandPassActivityProfile is isolated to a finalizeProfile function overloaded from the base class. -- Renamed ActivityProfileResult to ActivityProfileState, as this is a clearer indication of its actual functionality. Almost all of the misc. walker changes are due to this name update -- Code cleanup and docs for TraverseActiveRegions -- Expanded unit tests for ActivityProfile and ActivityProfileState --- .../targets/FindCoveredIntervals.java | 6 +- .../haplotypecaller/HaplotypeCaller.java | 13 +- .../traversals/TraverseActiveRegions.java | 95 +++++---- .../gatk/walkers/ActiveRegionWalker.java | 4 +- .../utils/activeregion/ActivityProfile.java | 183 ++++++++++-------- ...eResult.java => ActivityProfileState.java} | 22 +-- .../activeregion/BandPassActivityProfile.java | 84 ++++++++ .../traversals/DummyActiveRegionWalker.java | 6 +- ...java => ActivityProfileStateUnitTest.java} | 20 +- .../activeregion/ActivityProfileUnitTest.java | 55 ++++-- 10 files changed, 308 insertions(+), 180 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/activeregion/{ActivityProfileResult.java => ActivityProfileState.java} (77%) create mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java rename public/java/test/org/broadinstitute/sting/utils/activeregion/{ActivityProfileResultTest.java => ActivityProfileStateUnitTest.java} (79%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 08de5a6aa..74ff77e4b 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -57,7 +57,7 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.io.PrintStream; @@ -74,12 +74,12 @@ public class FindCoveredIntervals extends ActiveRegionWalker { @Override // Look to see if the region has sufficient coverage - public ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { + public ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) { int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup()); // note the linear probability scale - return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1)); + return new ActivityProfileState(ref.getLocus(), Math.min(depth / coverageThreshold, 1)); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 26f2560b7..9bb04421c 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -56,7 +56,6 @@ import org.broadinstitute.sting.gatk.arguments.StandardCallerArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.downsampling.DownsampleType; import org.broadinstitute.sting.gatk.filters.BadMateFilter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.iterators.ReadTransformer; @@ -71,7 +70,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext; import org.broadinstitute.sting.utils.*; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.clipping.ReadClipper; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.vcf.*; @@ -382,7 +381,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem @Override @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"}) - public ActivityProfileResult isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { + public ActivityProfileState isActive( final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context ) { if( UG_engine.getUAC().GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) { for( final VariantContext vc : tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()) ) { @@ -391,15 +390,15 @@ public class HaplotypeCaller extends ActiveRegionWalker implem } } if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) { - return new ActivityProfileResult(ref.getLocus(), 1.0); + return new ActivityProfileState(ref.getLocus(), 1.0); } } if( USE_ALLELES_TRIGGER ) { - return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); + return new ActivityProfileState( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 ); } - if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); } + if( context == null ) { return new ActivityProfileState(ref.getLocus(), 0.0); } final List noCall = new ArrayList(); // used to noCall all genotypes until the exact model is applied noCall.add(Allele.NO_CALL); @@ -436,7 +435,7 @@ public class HaplotypeCaller extends ActiveRegionWalker implem final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL); final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() ); - return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() ); + return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() ); } //--------------------------------------------------------------------------------------------------------------- diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index a7e4d7649..de0bfd1f1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -41,7 +41,8 @@ import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActivityProfile; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; +import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -71,6 +72,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine myReads = new LinkedList(); private GenomeLoc spanOfLastReadSeen = null; + @Override + public void initialize(GenomeAnalysisEngine engine, Walker walker, ProgressMeter progressMeter) { + super.initialize(engine, walker, progressMeter); + + final ActiveRegionWalker arWalker = (ActiveRegionWalker)walker; + if ( arWalker.wantsExtendedReads() && ! arWalker.wantsNonPrimaryReads() ) { + throw new IllegalArgumentException("Active region walker " + arWalker + " requested extended events but not " + + "non-primary reads, an inconsistent state. Please modify the walker"); + } + + activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension(); + maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion(); + walkerHasPresetRegions = arWalker.hasPresetActiveRegions(); + } + + // ------------------------------------------------------------------------------------- + // + // Utility functions + // + // ------------------------------------------------------------------------------------- + protected int getActiveRegionExtension() { return activeRegionExtension; } @@ -97,19 +120,6 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, + final LocusShardDataProvider dataProvider, + final LocusView locusView) { + if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) + return new ManagingReferenceOrderedView( dataProvider ); + else + return (RodLocusView)locusView; + } + + + // ------------------------------------------------------------------------------------- + // + // Working with ActivityProfiles and Active Regions + // + // ------------------------------------------------------------------------------------- + /** * Take the individual isActive calls and integrate them into contiguous active regions and * add these blocks of work to the work queue @@ -133,28 +159,26 @@ public class TraverseActiveRegions extends TraversalEngine walker, + protected final ActivityProfileState walkerActiveProb(final ActiveRegionWalker walker, final RefMetaDataTracker tracker, final ReferenceContext refContext, final AlignmentContext locus, final GenomeLoc location) { - if ( walker.hasPresetActiveRegions() ) { - return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); + if ( walkerHasPresetRegions ) { + return new ActivityProfileState(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); } else { return walker.isActive( tracker, refContext, locus ); } } - protected ReferenceOrderedView getReferenceOrderedView(final ActiveRegionWalker walker, - final LocusShardDataProvider dataProvider, - final LocusView locusView) { - if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA ) - return new ManagingReferenceOrderedView( dataProvider ); + private ActivityProfile makeNewActivityProfile() { + if ( walkerHasPresetRegions ) + return new ActivityProfile(engine.getGenomeLocParser()); else - return (RodLocusView)locusView; + return new BandPassActivityProfile(engine.getGenomeLocParser()); } /** @@ -171,6 +195,12 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = new LinkedList(); - ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() ); + ActivityProfile profile = makeNewActivityProfile(); ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); @@ -245,9 +269,8 @@ public class TraverseActiveRegions extends TraversalEngine reads = locusView.getLIBS().transferReadsFromAllPreviousPileups(); for( final GATKSAMRecord read : reads ) { if ( appearedInLastShard(locOfLastReadAtTraversalStart, read) ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index f937c2458..820100f7f 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -40,7 +40,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalUtils; @@ -114,7 +114,7 @@ public abstract class ActiveRegionWalker extends Walker= 0.0", "result.isActiveProb <= 1.0"}) - public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context); + public abstract ActivityProfileState isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context); // Map over the ActiveRegion public abstract MapType map(final ActiveRegion activeRegion, final RefMetaDataTracker metaDataTracker); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 909d99424..fd05ddd7b 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -1,35 +1,34 @@ /* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ package org.broadinstitute.sting.utils.activeregion; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; -import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; import java.util.ArrayList; import java.util.Collections; @@ -43,36 +42,37 @@ import java.util.List; * @since Date created */ public class ActivityProfile { - final GenomeLocParser parser; - final boolean presetRegions; - GenomeLoc regionStartLoc = null; - GenomeLoc regionStopLoc = null; - final List isActiveList; - private static final int FILTER_SIZE = 80; - private static final double[] GaussianKernel; + private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author - static { - GaussianKernel = new double[2*FILTER_SIZE + 1]; - for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); - } + protected final List isActiveList; + protected final GenomeLocParser parser; + + protected GenomeLoc regionStartLoc = null; + protected GenomeLoc regionStopLoc = null; + + public ActivityProfile(final GenomeLocParser parser) { + this(parser, new ArrayList(), null); } - // todo -- add upfront the start and stop of the intervals - // todo -- check that no regions are unexpectedly missing - // todo -- add unit tests - // TODO -- own preset regions - public ActivityProfile(final GenomeLocParser parser, final boolean presetRegions) { - this(parser, presetRegions, new ArrayList(), null); - } - - protected ActivityProfile(final GenomeLocParser parser, final boolean presetRegions, final List isActiveList, final GenomeLoc regionStartLoc) { + protected ActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { this.parser = parser; - this.presetRegions = presetRegions; this.isActiveList = isActiveList; this.regionStartLoc = regionStartLoc; } + /** + * Create a profile of the same class as this object containing just the provided isActiveList + * + * Used by clients to create derived activity profiles (such as ones without the starting X + * sites because they've been removed in an ActiveRegion) of the same class. + * + * @param isActiveList the active results list to use in the derived instance + * @return a freshly allocated data set + */ + protected ActivityProfile createDerivedProfile(final List isActiveList) { + return new ActivityProfile(parser, isActiveList, regionStartLoc); + } + @Override public String toString() { return "ActivityProfile{" + @@ -82,14 +82,14 @@ public class ActivityProfile { } /** - * Add the next ActivityProfileResult to this profile. + * Add the next ActivityProfileState to this profile. * * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown * - * @param result a well-formed ActivityProfileResult result to incorporate into this profile + * @param result a well-formed ActivityProfileState result to incorporate into this profile */ @Requires("result != null") - public void add(final ActivityProfileResult result) { + public void add(final ActivityProfileState result) { final GenomeLoc loc = result.getLoc(); if ( regionStartLoc == null ) { @@ -104,31 +104,67 @@ public class ActivityProfile { isActiveList.add(result); } + /** + * How many profile results are in this profile? + * @return the number of profile results + */ + @Ensures("result >= 0") public int size() { return isActiveList.size(); } + /** + * Is this profile empty? + * @return true if the profile is empty + */ + @Ensures("isEmpty() == (size() == 0)") public boolean isEmpty() { return isActiveList.isEmpty(); } - public boolean hasPresetRegions() { - return presetRegions; + /** + * Get the list of active profile results in this object + * @return a non-null, ordered list of active profile results + */ + @Ensures("result != null") + protected List getActiveList() { + return isActiveList; } /** - * Band pass this ActivityProfile, producing a new profile that's band pass filtered - * @return a new ActivityProfile that's the band-pass filtered version of this profile + * Finalize the probabilities in this activity profile, preparing it for a future + * call to createActiveRegions. This function returns a new profile with cleaned + * up activity estimates. + * + * This code looks at the current list of states, cleans them up, and then returns + * a newly allocated ActivityProfile + * + * @return a newly allocated ActivityProfile based on the current state of this + * profile, but that has been "finalized" as required by the profile implementation */ - public ActivityProfile bandPassFilter() { - final double[] activeProbArray = new double[isActiveList.size()]; + public ActivityProfile finalizeProfile() { int iii = 0; - for( final ActivityProfileResult result : isActiveList ) { + for( final double prob : finalizeProbabilities() ) { + final ActivityProfileState result = isActiveList.get(iii++); + result.isActiveProb = prob; + result.resultState = ActivityProfileState.Type.NONE; + result.resultValue = null; + } + + return createDerivedProfile(isActiveList); + } + + public double[] finalizeProbabilities() { + final double[] activeProbArray = new double[isActiveList.size()]; + + int iii = 0; + for( final ActivityProfileState result : isActiveList ) { activeProbArray[iii++] = result.isActiveProb; } + iii = 0; - for( final ActivityProfileResult result : isActiveList ) { - if( result.resultState.equals(ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + for( final ActivityProfileState result : isActiveList ) { + if( result.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups final int numHQClips = result.resultValue.intValue(); for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); @@ -137,29 +173,7 @@ public class ActivityProfile { iii++; } - final double[] filteredProbArray; - if( !presetRegions ) { - // if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray - filteredProbArray = new double[activeProbArray.length]; - for( iii = 0; iii < activeProbArray.length; iii++ ) { - final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); - filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); - } - } else { - // otherwise we simply use the activeProbArray directly - filteredProbArray = activeProbArray; - } - - iii = 0; - for( final double prob : filteredProbArray ) { - final ActivityProfileResult result = isActiveList.get(iii++); - result.isActiveProb = prob; - result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE; - result.resultValue = null; - } - - return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc); + return activeProbArray; } /** @@ -168,7 +182,6 @@ public class ActivityProfile { * @return the list of active regions */ public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { - final double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author final ArrayList returnList = new ArrayList(); if( isActiveList.size() == 0 ) { @@ -203,11 +216,11 @@ public class ActivityProfile { * @param activeRegionExtension the amount of margin overlap in the active region * @return a fully initialized ActiveRegion with the above properties */ - private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { + private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); } - private final List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { + private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { if( !isActive || curEnd - curStart < maxRegionSize ) { final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); returnList.add(new ActiveRegion(loc, isActive, parser, activeRegionExtension)); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java similarity index 77% rename from public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java rename to public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index bf2636465..38e89b605 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -34,40 +34,40 @@ import org.broadinstitute.sting.utils.GenomeLoc; * User: rpoplin * Date: 7/27/12 */ -public class ActivityProfileResult { +public class ActivityProfileState { private GenomeLoc loc; public double isActiveProb; - public ActivityProfileResultState resultState; + public Type resultState; public Number resultValue; - public enum ActivityProfileResultState { + public enum Type { NONE, HIGH_QUALITY_SOFT_CLIPS } /** - * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb + * Create a new ActivityProfileState at loc with probability of being active of isActiveProb * * @param loc the position of the result profile (for debugging purposes) * @param isActiveProb the probability of being active (between 0 and 1) */ @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) - public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) { - this(loc, isActiveProb, ActivityProfileResultState.NONE, null); + public ActivityProfileState(final GenomeLoc loc, final double isActiveProb) { + this(loc, isActiveProb, Type.NONE, null); } /** - * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some + * Create a new ActivityProfileState at loc with probability of being active of isActiveProb that maintains some * information about the result state and value (TODO RYAN -- what do these mean?) * * @param loc the position of the result profile (for debugging purposes) * @param isActiveProb the probability of being active (between 0 and 1) */ @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"}) - public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) { + public ActivityProfileState(final GenomeLoc loc, final double isActiveProb, final Type resultState, final Number resultValue) { // make sure the location of that activity profile is 1 if ( loc.size() != 1 ) - throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc); + throw new IllegalArgumentException("Location for an ActivityProfileState must have to size 1 bp but saw " + loc); this.loc = loc; this.isActiveProb = isActiveProb; @@ -76,7 +76,7 @@ public class ActivityProfileResult { } /** - * Get the genome loc associated with the ActivityProfileResult + * Get the genome loc associated with the ActivityProfileState * @return the location of this result */ @Ensures("result != null") @@ -86,7 +86,7 @@ public class ActivityProfileResult { @Override public String toString() { - return "ActivityProfileResult{" + + return "ActivityProfileState{" + "loc=" + loc + ", isActiveProb=" + isActiveProb + ", resultState=" + resultState + diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java new file mode 100644 index 000000000..cef700419 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -0,0 +1,84 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.activeregion; + +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; + +import java.util.ArrayList; +import java.util.List; + +/** + * + * + * @author Mark DePristo + * @since 2011 + */ +public class BandPassActivityProfile extends ActivityProfile { + private static final int FILTER_SIZE = 80; + private static final double[] GaussianKernel; + + static { + GaussianKernel = new double[2*FILTER_SIZE + 1]; + for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { + GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); + } + } + + public BandPassActivityProfile(final GenomeLocParser parser) { + this(parser, new ArrayList(), null); + } + + public BandPassActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { + super(parser, isActiveList, regionStartLoc); + } + + @Override + protected ActivityProfile createDerivedProfile(List isActiveList) { + return new BandPassActivityProfile(parser, isActiveList, regionStartLoc); + } + + /** + * Band pass the probabilities in the ActivityProfile, producing a new profile that's band pass filtered + * @return a new double[] that's the band-pass filtered version of this profile + */ + @Override + public double[] finalizeProbabilities() { + final double[] activeProbArray = super.finalizeProbabilities(); + final double[] bandPassProbArray = new double[activeProbArray.length]; + + // apply the band pass filter for activeProbArray into filteredProbArray + for( int iii = 0; iii < activeProbArray.length; iii++ ) { + final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); + final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); + bandPassProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); + } + + return bandPassProbArray; + } +} diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java index 76be54d72..f09a4b3e8 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java @@ -33,7 +33,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; import org.broadinstitute.sting.utils.activeregion.ActiveRegion; import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult; +import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; import java.util.*; @@ -80,10 +80,10 @@ class DummyActiveRegionWalker extends ActiveRegionWalker { } @Override - public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { isActiveCalls.add(ref.getLocus()); final double p = activeRegions == null || activeRegions.overlaps(ref.getLocus()) ? prob : 0.0; - return new ActivityProfileResult(ref.getLocus(), p); + return new ActivityProfileState(ref.getLocus(), p); } @Override diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileStateUnitTest.java similarity index 79% rename from public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java rename to public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileStateUnitTest.java index d131666fa..019cf82da 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileResultTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileStateUnitTest.java @@ -25,23 +25,17 @@ package org.broadinstitute.sting.utils.activeregion; -import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.samtools.SAMFileHeader; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.GenomeLocSortedSet; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.broadinstitute.sting.utils.sam.ArtificialBAMBuilder; import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; import java.io.FileNotFoundException; import java.util.Arrays; -import java.util.EnumSet; import java.util.LinkedList; import java.util.List; @@ -52,7 +46,7 @@ import java.util.List; * Time: 2:30 PM * To change this template use File | Settings | File Templates. */ -public class ActivityProfileResultTest { +public class ActivityProfileStateUnitTest { private GenomeLocParser genomeLocParser; @BeforeClass @@ -71,7 +65,7 @@ public class ActivityProfileResultTest { genomeLocParser.createGenomeLoc(chr, 10, 10), genomeLocParser.createGenomeLoc(chr, 100, 100) )) { for ( final double prob : Arrays.asList(0.0, 0.5, 1.0) ) { - for ( final ActivityProfileResult.ActivityProfileResultState state : ActivityProfileResult.ActivityProfileResultState.values() ) { + for ( final ActivityProfileState.Type state : ActivityProfileState.Type.values() ) { for ( final Number value : Arrays.asList(1, 2, 4) ) { tests.add(new Object[]{ loc, prob, state, value}); } @@ -84,15 +78,15 @@ public class ActivityProfileResultTest { } @Test(dataProvider = "ActiveProfileResultProvider") - public void testActiveProfileResultProvider(GenomeLoc loc, final double prob, ActivityProfileResult.ActivityProfileResultState maybeState, final Number maybeNumber) { - final ActivityProfileResult apr = maybeState == null - ? new ActivityProfileResult(loc, prob) - : new ActivityProfileResult(loc, prob, maybeState, maybeNumber); + public void testActiveProfileResultProvider(GenomeLoc loc, final double prob, ActivityProfileState.Type maybeState, final Number maybeNumber) { + final ActivityProfileState apr = maybeState == null + ? new ActivityProfileState(loc, prob) + : new ActivityProfileState(loc, prob, maybeState, maybeNumber); Assert.assertEquals(apr.getLoc(), loc); Assert.assertNotNull(apr.toString()); Assert.assertEquals(apr.isActiveProb, prob); - Assert.assertEquals(apr.resultState, maybeState == null ? ActivityProfileResult.ActivityProfileResultState.NONE : maybeState); + Assert.assertEquals(apr.resultState, maybeState == null ? ActivityProfileState.Type.NONE : maybeState); Assert.assertEquals(apr.resultValue, maybeState == null ? null : maybeNumber); } } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index ff27037d3..430e0b5c6 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -42,9 +42,7 @@ import org.testng.annotations.Test; import java.io.File; import java.io.FileNotFoundException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; +import java.util.*; public class ActivityProfileUnitTest extends BaseTest { @@ -70,23 +68,26 @@ public class ActivityProfileUnitTest extends BaseTest { List expectedRegions; int extension = 0; GenomeLoc regionStart = startLoc; + final ProfileType type; - public BasicActivityProfileTestProvider(final List probs, final List expectedRegions) { - super(BasicActivityProfileTestProvider.class); - this.probs = probs; - this.expectedRegions = expectedRegions; - setName(getName()); - } - - public BasicActivityProfileTestProvider(final List probs, boolean startActive, int ... startsAndStops) { + public BasicActivityProfileTestProvider(final ProfileType type, final List probs, boolean startActive, int ... startsAndStops) { super(BasicActivityProfileTestProvider.class); + this.type = type; this.probs = probs; this.expectedRegions = toRegions(startActive, startsAndStops); setName(getName()); } private String getName() { - return String.format("probs=%s expectedRegions=%s", Utils.join(",", probs), Utils.join(",", expectedRegions)); + return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); + } + + public ActivityProfile makeProfile() { + switch ( type ) { + case Base: return new ActivityProfile(genomeLocParser); + case BandPass: return new BandPassActivityProfile(genomeLocParser); + default: throw new IllegalStateException(type.toString()); + } } private List toRegions(boolean isActive, int[] startsAndStops) { @@ -103,27 +104,36 @@ public class ActivityProfileUnitTest extends BaseTest { } } + private enum ProfileType { + Base, BandPass + } + @DataProvider(name = "BasicActivityProfileTestProvider") public Object[][] makeQualIntervalTestProvider() { - new BasicActivityProfileTestProvider(Arrays.asList(1.0), true, 0, 1); - new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0), true, 0, 1, 2); - new BasicActivityProfileTestProvider(Arrays.asList(0.0, 1.0), false, 0, 1, 2); - new BasicActivityProfileTestProvider(Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); - new BasicActivityProfileTestProvider(Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + for ( final ProfileType type : ProfileType.values() ) { + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + } return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); } @Test(dataProvider = "BasicActivityProfileTestProvider") public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { - ActivityProfile profile = new ActivityProfile(genomeLocParser, false); + ActivityProfile profile = cfg.makeProfile(); + + Assert.assertTrue(profile.isEmpty()); Assert.assertEquals(profile.parser, genomeLocParser); for ( int i = 0; i < cfg.probs.size(); i++ ) { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(new ActivityProfileResult(loc, p)); + profile.add(new ActivityProfileState(loc, p)); + Assert.assertFalse(profile.isEmpty()); } Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); @@ -131,6 +141,11 @@ public class ActivityProfileUnitTest extends BaseTest { assertProbsAreEqual(profile.isActiveList, cfg.probs); assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); + + Assert.assertEquals(profile.createDerivedProfile(profile.isActiveList).getClass(), profile.getClass()); + + final List empty = new LinkedList(); + Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0); } private void assertRegionsAreEqual(List actual, List expected) { @@ -140,7 +155,7 @@ public class ActivityProfileUnitTest extends BaseTest { } } - private void assertProbsAreEqual(List actual, List expected) { + private void assertProbsAreEqual(List actual, List expected) { Assert.assertEquals(actual.size(), expected.size()); for ( int i = 0; i < actual.size(); i++ ) { Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); From e050f649fdb838338d76a242f9e62d7ad1e19ebf Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 18 Jan 2013 10:18:04 -0500 Subject: [PATCH 14/46] IncrementalActivityProfile, complete with extensive unit tests -- This is an activity profile compatible with fetching its implied active regions incrementally, as activity profile states are added --- .../utils/activeregion/ActivityProfile.java | 2 +- .../activeregion/ActivityProfileState.java | 12 +- .../IncrementalActivityProfile.java | 373 ++++++++++++++++++ .../IncrementalActivityProfileUnitTest.java | 350 ++++++++++++++++ 4 files changed, 735 insertions(+), 2 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index fd05ddd7b..8d6012fac 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -61,7 +61,7 @@ public class ActivityProfile { } /** - * Create a profile of the same class as this object containing just the provided isActiveList + * Create a profile of the same class as this object containing just the provided stateList * * Used by clients to create derived activity profiles (such as ones without the starting X * sites because they've been removed in an ActiveRegion) of the same class. diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index 38e89b605..df21672a9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -35,7 +35,7 @@ import org.broadinstitute.sting.utils.GenomeLoc; * Date: 7/27/12 */ public class ActivityProfileState { - private GenomeLoc loc; + final private GenomeLoc loc; public double isActiveProb; public Type resultState; public Number resultValue; @@ -75,6 +75,16 @@ public class ActivityProfileState { this.resultValue = resultValue; } + /** + * The offset of state w.r.t. our current region's start location + * @param regionStartLoc the start of the region, as a genome loc + * @return the position of this profile relative to the start of this region + */ + public int getOffset(final GenomeLoc regionStartLoc) { + return getLoc().getStart() - regionStartLoc.getStart(); + } + + /** * Get the genome loc associated with the ActivityProfileState * @return the location of this result diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java new file mode 100644 index 000000000..e71f177f4 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.activeregion; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; + +import java.util.*; + +/** + * Class holding information about per-base activity scores for the + * active region traversal + * + * @author Mark DePristo + * @since Date created + */ +public class IncrementalActivityProfile { + private final static int MAX_PROB_PROPOGATION_DISTANCE = 10; + private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author + + protected final List stateList; + protected final GenomeLocParser parser; + + protected GenomeLoc regionStartLoc = null; + protected GenomeLoc regionStopLoc = null; + + /** + * Create a new empty IncrementalActivityProfile + * @param parser the parser we can use to create genome locs + */ + public IncrementalActivityProfile(final GenomeLocParser parser) { + this(parser, new ArrayList(), null); + } + + /** + * Create a new IncrementalActivityProfile using state list (not copied) and starting at regionStartLoc + * @param parser the parser we can use to create genome locs + */ + @Deprecated + protected IncrementalActivityProfile(final GenomeLocParser parser, final List stateList, final GenomeLoc regionStartLoc) { + this.parser = parser; + this.stateList = stateList; + this.regionStartLoc = regionStartLoc; + } + + /** + * Create a profile of the same class as this object containing just the provided stateList + * + * Used by clients to create derived activity profiles (such as ones without the starting X + * sites because they've been removed in an ActiveRegion) of the same class. + * + * @param isActiveList the active results list to use in the derived instance + * @return a freshly allocated data set + */ + @Deprecated + protected IncrementalActivityProfile createDerivedProfile(final List isActiveList) { + return new IncrementalActivityProfile(parser, isActiveList, regionStartLoc); + } + + @Override + public String toString() { + return "ActivityProfile{" + + "start=" + regionStartLoc + + ", stop=" + regionStopLoc + + '}'; + } + + /** + * How far away can probability mass be moved around in this profile? + * + * This distance puts an upper limit on how far, in bp, we will ever propogate probability max around + * when adding a new ActivityProfileState. For example, if the value of this function is + * 10, and you are looking at a state at bp 5, and we know that no states beyond 5 + 10 will have + * their probability propograted back to that state. + * + * @return a positive integer distance in bp + */ + @Ensures("result >= 0") + public int getMaxProbPropogationDistance() { + return MAX_PROB_PROPOGATION_DISTANCE; + } + + /** + * How many profile results are in this profile? + * @return the number of profile results + */ + @Ensures("result >= 0") + public int size() { + return stateList.size(); + } + + /** + * Is this profile empty? + * @return true if the profile is empty + */ + @Ensures("isEmpty() == (size() == 0)") + public boolean isEmpty() { + return stateList.isEmpty(); + } + + /** + * Get the list of active profile results in this object + * @return a non-null, ordered list of active profile results + */ + @Ensures("result != null") + protected List getStateList() { + return stateList; + } + + /** + * Helper function that gets the genome loc for a site offset from relativeLoc, protecting ourselves from + * falling off the edge of the contig. + * + * @param relativeLoc the location offset is relative to + * @param offset the offset from relativeLoc where we'd like to create a GenomeLoc + * @return a genome loc with relativeLoc.start + offset, if this is on the contig, null otherwise + */ + @Requires("relativeLoc != null") + protected GenomeLoc getLocForOffset(final GenomeLoc relativeLoc, final int offset) { + final int start = relativeLoc.getStart() + offset; + if ( start < 0 || start > getCurrentContigLength() ) { + return null; + } else { + return parser.createGenomeLoc(regionStartLoc.getContig(), start); + } + } + + /** + * Get the length of the current contig + * @return the length in bp + */ + @Requires("regionStartLoc != null") + @Ensures("result > 0") + private int getCurrentContigLength() { + // TODO -- fix performance problem with getContigInfo + return parser.getContigInfo(regionStartLoc.getContig()).getSequenceLength(); + } + + // -------------------------------------------------------------------------------- + // + // routines to add states to a profile + // + // -------------------------------------------------------------------------------- + + /** + * Add the next ActivityProfileState to this profile. + * + * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown + * + * @param state a well-formed ActivityProfileState result to incorporate into this profile + */ + @Requires("state != null") + public void add(final ActivityProfileState state) { + final GenomeLoc loc = state.getLoc(); + + if ( regionStartLoc == null ) { + regionStartLoc = loc; + regionStopLoc = loc; + } else { + // TODO -- need to figure out where to add loc as the regions will be popping off the front + if ( regionStopLoc.getStart() != loc.getStart() - 1 ) + throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); + regionStopLoc = loc; + } + + final Collection processedStates = processState(state); + for ( final ActivityProfileState processedState : processedStates ) { + incorporateSingleState(processedState); + } + } + + /** + * Incorporate a single activity profile state into the current list of states + * + * If state's position occurs immediately after the last position in this profile, then + * the state is appended to the state list. If it's within the existing states list, + * the prob of stateToAdd is added to its corresponding state in the list. If the + * position would be before the start of this profile, stateToAdd is simply ignored. + * + * @param stateToAdd the state we want to add to the states list + */ + @Requires("stateToAdd != null") + private void incorporateSingleState(final ActivityProfileState stateToAdd) { + final int position = stateToAdd.getOffset(regionStartLoc); + + if ( position > size() ) + // should we allow this? probably not + throw new IllegalArgumentException("Must add state contiguous to existing states"); + + if ( position >= 0 ) { + // ignore states starting before this regions start + if ( position < size() ) { + stateList.get(position).isActiveProb += stateToAdd.isActiveProb; + } else { + if ( position != size() ) throw new IllegalStateException("position == size but it wasn't"); + stateList.add(stateToAdd); + } + } + } + + /** + * Process justAddedState, returning a collection of derived states that actually be added to the stateList + * + * The purpose of this function is to transform justAddedStates, if needed, into a series of atomic states + * that we actually want to track. For example, if state is for soft clips, we transform that single + * state into a list of states that surround the state up to the distance of the soft clip. + * + * Can be overridden by subclasses to transform states in any way + * + * There's no particular contract for the output states, except that they can never refer to states + * beyond the current end of the stateList unless the explictly include preceding states before + * the reference. So for example if the current state list is [1, 2, 3] this function could return + * [1,2,3,4,5] but not [1,2,3,5]. + * + * @param justAddedState the state our client provided to use to add to the list + * @return a list of derived states that should actually be added to this profile's state list + */ + protected Collection processState(final ActivityProfileState justAddedState) { + if ( justAddedState.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { + // special code to deal with the problem that high quality soft clipped bases aren't added to pileups + final List states = new LinkedList(); + final int numHQClips = justAddedState.resultValue.intValue(); + for( int jjj = - numHQClips; jjj <= numHQClips; jjj++ ) { + final GenomeLoc loc = getLocForOffset(justAddedState.getLoc(), jjj); + if ( loc != null ) + states.add(new ActivityProfileState(loc, justAddedState.isActiveProb)); + } + + return states; + } else { + return Collections.singletonList(justAddedState); + } + } + + // -------------------------------------------------------------------------------- + // + // routines to get active regions from the profile + // + // -------------------------------------------------------------------------------- + + /** + * Get the next completed active regions from this profile, and remove all states supporting them from this profile + * + * Takes the current profile and finds all of the active / inactive from the start of the profile that are + * ready. By ready we mean unable to have their probability modified any longer by future additions to the + * profile. The regions that are popped off the profile take their states with them, so the start of this + * profile will always be after the end of the last region returned here. + * + * The regions are returned sorted by genomic position. + * + * This function may not return anything in the list, if no regions are ready + * + * No returned region will be larger than maxRegionSize. + * + * @param activeRegionExtension the extension value to provide to the constructed regions + * @param maxRegionSize the maximize size of the returned region + * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the + * stateList. Used to close out the active region when we've hit some kind of end (such + * as the end of the contig) + * @return a non-null list of active regions + */ + @Ensures("result != null") + public List popReadyActiveRegions(final int activeRegionExtension, final int maxRegionSize, final boolean forceConversion) { + if ( activeRegionExtension < 0 ) throw new IllegalArgumentException("activeRegionExtension must be >= 0 but got " + activeRegionExtension); + if ( maxRegionSize < 1 ) throw new IllegalArgumentException("maxRegionSize must be >= 1 but got " + maxRegionSize); + + final LinkedList regions = new LinkedList(); + + while ( true ) { + final ActiveRegion nextRegion = popNextReadyActiveRegion(activeRegionExtension, maxRegionSize, forceConversion); + if ( nextRegion == null ) + return regions; + else { + regions.add(nextRegion); + } + } + } + + /** + * Helper function for popReadyActiveRegions that pops the first ready region off the front of this profile + * + * If a region is returned, modifies the state of this profile so that states used to make the region are + * no longer part of the profile. Associated information (like the region start position) of this profile + * are also updated. + * + * @param activeRegionExtension the extension value to provide to the constructed regions + * @param maxRegionSize the maximize size of the returned region + * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the + * stateList. Used to close out the active region when we've hit some kind of end (such + * as the end of the contig) + * @return a fully formed active region, or null if none can be made + */ + private ActiveRegion popNextReadyActiveRegion(final int activeRegionExtension, final int maxRegionSize, final boolean forceConversion) { + if ( stateList.isEmpty() ) + return null; + + final ActivityProfileState first = stateList.get(0); + final boolean isActiveRegion = first.isActiveProb > ACTIVE_PROB_THRESHOLD; + final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, maxRegionSize, forceConversion); + if ( offsetOfNextRegionEnd == -1 ) + // couldn't find a valid ending offset, so we return null + return null; + + // we need to create the active region, and clip out the states we're extracting from this profile + stateList.subList(0, offsetOfNextRegionEnd + 1).clear(); + + // update the start and stop locations as necessary + if ( stateList.isEmpty() ) { + regionStartLoc = regionStopLoc = null; + } else { + regionStartLoc = stateList.get(0).getLoc(); + } + final GenomeLoc regionLoc = parser.createGenomeLoc(first.getLoc().getContig(), first.getLoc().getStart(), first.getLoc().getStart() + offsetOfNextRegionEnd); + return new ActiveRegion(regionLoc, isActiveRegion, parser, activeRegionExtension); + } + + /** + * Find the end of the current region, returning the index into the element isActive element, or -1 if the region isn't done + * + * The current region is defined from the start of the stateList, looking for elements that have the same isActiveRegion + * flag (i.e., if isActiveRegion is true we are looking for states with isActiveProb > threshold, or alternatively + * for states < threshold). The maximize size of the returned region is maxRegionSize. If forceConversion is + * true, then we'll return the region end even if this isn't safely beyond the max prob propogation distance. + * + * @param isActiveRegion is the region we're looking for an active region or inactive region? + * @param maxRegionSize the maximize size of the returned region + * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the + * stateList. Used to close out the active region when we've hit some kind of end (such + * as the end of the contig) + * @return the index into stateList of the last element of this region, or -1 if it cannot be found + */ + @Ensures({ + "result >= -1", + "result == -1 || result < maxRegionSize", + "! (result == -1 && forceConversion)"}) + private int findEndOfRegion(final boolean isActiveRegion, final int maxRegionSize, final boolean forceConversion) { + int i = 0; + while ( i < stateList.size() && i < maxRegionSize ) { + if ( stateList.get(i).isActiveProb > ACTIVE_PROB_THRESHOLD != isActiveRegion ) { + break; + } + i++; + } + + // we're one past the end, so i must be decremented + return forceConversion || i + getMaxProbPropogationDistance() < stateList.size() ? i - 1 : -1; + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java new file mode 100644 index 000000000..16b9b1877 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.activeregion; + + +// the imports for unit testing. + + +import net.sf.picard.reference.ReferenceSequenceFile; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + + +public class IncrementalActivityProfileUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + private GenomeLoc startLoc; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); + startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100); + } + + // -------------------------------------------------------------------------------- + // + // Basic tests Provider + // + // -------------------------------------------------------------------------------- + + private class BasicActivityProfileTestProvider extends TestDataProvider { + List probs; + List expectedRegions; + int extension = 0; + GenomeLoc regionStart = startLoc; + final ProfileType type; + + public BasicActivityProfileTestProvider(final ProfileType type, final List probs, boolean startActive, int ... startsAndStops) { + super(BasicActivityProfileTestProvider.class); + this.type = type; + this.probs = probs; + this.expectedRegions = toRegions(startActive, startsAndStops); + setName(getName()); + } + + private String getName() { + return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); + } + + public IncrementalActivityProfile makeProfile() { + switch ( type ) { + case Base: return new IncrementalActivityProfile(genomeLocParser); + case BandPass: //return new BandPassActivityProfile(genomeLocParser); + default: throw new IllegalStateException(type.toString()); + } + } + + private List toRegions(boolean isActive, int[] startsAndStops) { + List l = new ArrayList(); + for ( int i = 0; i < startsAndStops.length - 1; i++) { + int start = regionStart.getStart() + startsAndStops[i]; + int end = regionStart.getStart() + startsAndStops[i+1] - 1; + GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); + ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension); + l.add(r); + isActive = ! isActive; + } + return l; + } + } + + private enum ProfileType { + Base, BandPass + } + + @DataProvider(name = "BasicActivityProfileTestProvider") + public Object[][] makeQualIntervalTestProvider() { + for ( final ProfileType type : ProfileType.values() ) { + if ( type != ProfileType.BandPass ) { // todo -- re-enable + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); + } + } + + return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); + } + + @Test(dataProvider = "BasicActivityProfileTestProvider") + public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { + IncrementalActivityProfile profile = cfg.makeProfile(); + + Assert.assertTrue(profile.isEmpty()); + + Assert.assertEquals(profile.parser, genomeLocParser); + + for ( int i = 0; i < cfg.probs.size(); i++ ) { + double p = cfg.probs.get(i); + GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); + profile.add(new ActivityProfileState(loc, p)); + Assert.assertFalse(profile.isEmpty()); + } + Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); + + Assert.assertEquals(profile.size(), cfg.probs.size()); + assertProbsAreEqual(profile.stateList, cfg.probs); + + // TODO -- reanble tests + //assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); + + Assert.assertEquals(profile.createDerivedProfile(profile.stateList).getClass(), profile.getClass()); + + final List empty = new LinkedList(); + Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0); + } + + private void assertRegionsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i))); + } + } + + private void assertProbsAreEqual(List actual, List expected) { + Assert.assertEquals(actual.size(), expected.size()); + for ( int i = 0; i < actual.size(); i++ ) { + Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); + } + } + + // ------------------------------------------------------------------------------------- + // + // Hardcore tests for adding to the profile and constructing active regions + // + // ------------------------------------------------------------------------------------- + + private static class SizeToStringList extends ArrayList { + @Override public String toString() { return "List[" + size() + "]"; } + } + + @DataProvider(name = "RegionCreationTests") + public Object[][] makeRegionCreationTests() { + final List tests = new LinkedList(); + + final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); + for ( int start : Arrays.asList(1, 10, 100, contigLength - 100, contigLength - 10) ) { + for ( int regionSize : Arrays.asList(1, 10, 100, 1000, 10000) ) { + for ( int maxRegionSize : Arrays.asList(10, 50, 200) ) { + for ( final boolean waitUntilEnd : Arrays.asList(false, true) ) { + for ( final boolean forceConversion : Arrays.asList(false, true) ) { + // what do I really want to test here? I'd like to test a few cases: + // -- region is all active (1.0) + // -- region is all inactive (0.0) + // -- cut the interval into 1, 2, 3, 4, 5 ... 10 regions, each with alternating activity values + for ( final boolean startWithActive : Arrays.asList(true, false) ) { + for ( int nParts : Arrays.asList(1, 2, 3, 4, 5, 7, 10, 11, 13) ) { + +// for ( int start : Arrays.asList(1) ) { +// for ( int regionSize : Arrays.asList(100) ) { +// for ( int maxRegionSize : Arrays.asList(10) ) { +// for ( final boolean waitUntilEnd : Arrays.asList(true) ) { +// for ( final boolean forceConversion : Arrays.asList(false) ) { +// for ( final boolean startWithActive : Arrays.asList(true) ) { +// for ( int nParts : Arrays.asList(3) ) { + regionSize = Math.min(regionSize, contigLength - start); + final List regions = makeRegions(regionSize, startWithActive, nParts); + tests.add(new Object[]{ start, regions, maxRegionSize, nParts, forceConversion, waitUntilEnd }); + } + } + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private List makeRegions(final int totalRegionSize, + final boolean startWithActive, + final int nParts) { + final List regions = new SizeToStringList(); + + boolean isActive = startWithActive; + final int activeRegionSize = Math.max(totalRegionSize / nParts, 1); + for ( int i = 0; i < totalRegionSize; i += activeRegionSize ) { + for ( int j = 0; j < activeRegionSize && j + i < totalRegionSize; j++ ) { + regions.add(isActive); + } + isActive = ! isActive; + } + + return regions; + } + + + @Test(enabled = true, dataProvider = "RegionCreationTests") + public void testRegionCreation(final int start, final List probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { + final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); + Assert.assertNotNull(profile.toString()); + + final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); + final List seenSites = new ArrayList(Collections.nCopies(probs.size(), false)); + ActiveRegion lastRegion = null; + for ( int i = 0; i < probs.size(); i++ ) { + final boolean isActive = probs.get(i); + final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + start); + final ActivityProfileState state = new ActivityProfileState(loc, isActive ? 1.0 : 0.0); + profile.add(state); + Assert.assertNotNull(profile.toString()); + + if ( ! waitUntilEnd ) { + final List regions = profile.popReadyActiveRegions(0, maxRegionSize, false); + lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites); + } + } + + if ( waitUntilEnd || forceConversion ) { + final List regions = profile.popReadyActiveRegions(0, maxRegionSize, forceConversion); + lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites); + } + + for ( int i = 0; i < probs.size(); i++ ) { + if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropogationDistance() < probs.size())) + // only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end + Assert.assertTrue(seenSites.get(i), "Missed site " + i); + } + + Assert.assertNotNull(profile.toString()); + } + + private ActiveRegion assertGoodRegions(final int start, final List regions, final int maxRegionSize, ActiveRegion lastRegion, final List probs, final List seenSites) { + for ( final ActiveRegion region : regions ) { + Assert.assertTrue(region.getLocation().size() > 0, "Region " + region + " has a bad size"); + Assert.assertTrue(region.getLocation().size() <= maxRegionSize, "Region " + region + " has a bad size: it's big than the max region size " + maxRegionSize); + if ( lastRegion != null ) { + Assert.assertTrue(region.getLocation().getStart() == lastRegion.getLocation().getStop() + 1, "Region " + region + " doesn't start immediately after previous region" + lastRegion); + } + + // check that all active bases are actually active + final int regionOffset = region.getLocation().getStart() - start; + Assert.assertTrue(regionOffset >= 0 && regionOffset < probs.size(), "Region " + region + " has a bad offset w.r.t. start"); + for ( int j = 0; j < region.getLocation().size(); j++ ) { + final int siteOffset = j + regionOffset; + Assert.assertEquals(region.isActive, probs.get(siteOffset).booleanValue()); + Assert.assertFalse(seenSites.get(siteOffset), "Site " + j + " in " + region + " was seen already"); + seenSites.set(siteOffset, true); + } + + lastRegion = region; + } + + return lastRegion; + } + + // ------------------------------------------------------------------------------------- + // + // Hardcore tests for adding to the profile and constructing active regions + // + // ------------------------------------------------------------------------------------- + + @DataProvider(name = "SoftClipsTest") + public Object[][] makeSoftClipsTest() { + final List tests = new LinkedList(); + + final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); + for ( int start : Arrays.asList(1, 10, 100, contigLength - 100, contigLength - 10, contigLength - 1) ) { + for ( int precedingSites: Arrays.asList(0, 1, 10) ) { + if ( precedingSites + start < contigLength ) { + for ( int softClipSize : Arrays.asList(1, 2, 10, 100) ) { +// for ( int start : Arrays.asList(10) ) { +// for ( int precedingSites: Arrays.asList(10) ) { +// for ( int softClipSize : Arrays.asList(1) ) { + tests.add(new Object[]{ start, precedingSites, softClipSize }); + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "SoftClipsTest") + public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { + final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); + + final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); + final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); + for ( int i = 0; i < nPrecedingSites; i++ ) { + final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + start); + final ActivityProfileState state = new ActivityProfileState(loc, 0.0); + profile.add(state); + } + + final GenomeLoc softClipLoc = genomeLocParser.createGenomeLoc(contig, nPrecedingSites + start); + profile.add(new ActivityProfileState(softClipLoc, 1.0, ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS, softClipSize)); + + if ( nPrecedingSites == 0 ) { + final int profileSize = Math.min(start + softClipSize, contigLength) - start + 1; + Assert.assertEquals(profile.size(), profileSize, "Wrong number of states in the profile"); + } + + for ( int i = 0; i < profile.size(); i++ ) { + final ActivityProfileState state = profile.getStateList().get(i); + final boolean withinSCRange = state.getLoc().distance(softClipLoc) <= softClipSize; + if ( withinSCRange ) { + Assert.assertTrue(state.isActiveProb > 0.0, "active prob should be changed within soft clip size"); + } else { + Assert.assertEquals(state.isActiveProb, 0.0, "active prob shouldn't be changed outside of clip size"); + } + } + } +} \ No newline at end of file From ce160931d5c65cba4e355129b7328c2b56f08ead Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 18 Jan 2013 16:52:49 -0500 Subject: [PATCH 15/46] Optimize creation of reads in ArtificialBAMBuilder -- Now caches the reads so subsequent calls to makeReads() don't reallocate the reads from scratch each time --- .../sting/utils/sam/ArtificialBAMBuilder.java | 52 ++++++++++++------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java index 82b5b29cc..bf3045c71 100644 --- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java +++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialBAMBuilder.java @@ -62,6 +62,7 @@ public class ArtificialBAMBuilder { int alignmentStart = 1; int readLength = 10; private final ArrayList samples = new ArrayList(); + private List createdReads = null; private LinkedList additionalReads = new LinkedList(); @@ -102,6 +103,7 @@ public class ArtificialBAMBuilder { } public ArtificialBAMBuilder createAndSetHeader(final int nSamples) { + createdReads = null; this.header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.coordinate); header.setSequenceDictionary(parser.getContigs()); @@ -120,10 +122,12 @@ public class ArtificialBAMBuilder { } public void addReads(final GATKSAMRecord readToAdd) { + createdReads = null; additionalReads.add(readToAdd); } public void addReads(final Collection readsToAdd) { + createdReads = null; additionalReads.addAll(readsToAdd); } @@ -140,26 +144,34 @@ public class ArtificialBAMBuilder { * @return a ordered list of reads */ public List makeReads() { - final String baseName = "read"; - List reads = new ArrayList(nReadsPerLocus*nLoci); - for ( int locusI = 0; locusI < nLoci; locusI++) { - final int locus = locusI * (skipNLoci + 1); - for ( int readI = 0; readI < nReadsPerLocus; readI++ ) { - for ( final SAMReadGroupRecord rg : header.getReadGroups() ) { - final String readName = String.format("%s.%d.%d.%s", baseName, locus, readI, rg.getId()); - final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, readName, 0, alignmentStart + locus, readLength); - read.setReadGroup(new GATKSAMReadGroupRecord(rg)); - reads.add(read); + if ( createdReads == null ) { + final String baseName = "read"; + final LinkedList readGroups = new LinkedList(); + for ( final SAMReadGroupRecord rg : header.getReadGroups()) + readGroups.add(new GATKSAMReadGroupRecord(rg)); + + List reads = new ArrayList(nReadsPerLocus*nLoci); + for ( int locusI = 0; locusI < nLoci; locusI++) { + final int locus = locusI * (skipNLoci + 1); + for ( int readI = 0; readI < nReadsPerLocus; readI++ ) { + for ( final GATKSAMReadGroupRecord rg : readGroups ) { + final String readName = String.format("%s.%d.%d.%s", baseName, locus, readI, rg.getId()); + final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, readName, 0, alignmentStart + locus, readLength); + read.setReadGroup(rg); + reads.add(read); + } } } + + if ( ! additionalReads.isEmpty() ) { + reads.addAll(additionalReads); + Collections.sort(reads, new SAMRecordCoordinateComparator()); + } + + createdReads = new ArrayList(reads); } - if ( ! additionalReads.isEmpty() ) { - reads.addAll(additionalReads); - Collections.sort(reads, new SAMRecordCoordinateComparator()); - } - - return reads; + return createdReads; } /** @@ -192,13 +204,13 @@ public class ArtificialBAMBuilder { public int getnReadsPerLocus() { return nReadsPerLocus; } public int getnLoci() { return nLoci; } public int getSkipNLoci() { return skipNLoci; } - public ArtificialBAMBuilder setSkipNLoci(int skipNLoci) { this.skipNLoci = skipNLoci; return this; } + public ArtificialBAMBuilder setSkipNLoci(int skipNLoci) { this.skipNLoci = skipNLoci; createdReads = null; return this; } public int getAlignmentStart() { return alignmentStart; } - public ArtificialBAMBuilder setAlignmentStart(int alignmentStart) { this.alignmentStart = alignmentStart; return this; } + public ArtificialBAMBuilder setAlignmentStart(int alignmentStart) { this.alignmentStart = alignmentStart; createdReads = null; return this; } public int getReadLength() { return readLength; } - public ArtificialBAMBuilder setReadLength(int readLength) { this.readLength = readLength; return this; } + public ArtificialBAMBuilder setReadLength(int readLength) { this.readLength = readLength; createdReads = null; return this; } public SAMFileHeader getHeader() { return header; } - public ArtificialBAMBuilder setHeader(SAMFileHeader header) { this.header = header; return this; } + public ArtificialBAMBuilder setHeader(SAMFileHeader header) { this.header = header; createdReads = null; return this; } public int getAlignmentEnd() { return alignmentStart + nLoci * (skipNLoci + 1) + readLength; From eb60235dcd46aef1260cdad111708644aa08d4e5 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 18 Jan 2013 16:57:28 -0500 Subject: [PATCH 16/46] Working version of incremental active region traversals -- The incremental version now processes active regions as soon as they are ready to be processed, instead of waiting until the end of the shard as in the previous version. This means that ART walkers will now take much less memory than previously. On chr20 of NA12878 the majority of regions are processed with as few as 500 reads in memory. Over the whole chr20 only 5K reads were ever held in ART at one time. -- Fixed bug in the way active regions worked with shard boundaries. The new implementation no longer see shard boundaries in any meaningful way, and that uncovered a problem that active regions were always being closed across shard boundaries. This behavior was actually encoded in the unit tests, so those needed to be updated as well. -- Changed the way that preset regions work in ART. The new contract ensures that you get exactly the regions you requested. the isActive function is still called, but its result has no impact on the regions. With this functionality is should be possible to use the HC as a generic assembly by forcing it to operate over very large regions -- Added a few misc. useful functions to IncrementalActivityProfile --- .../traversals/TraverseActiveRegions.java | 240 ++++++++---------- .../gatk/walkers/ActiveRegionWalker.java | 22 +- .../utils/activeregion/ActiveRegion.java | 17 +- .../IncrementalActivityProfile.java | 18 ++ .../traversals/DummyActiveRegionWalker.java | 18 +- .../TraverseActiveRegionsUnitTest.java | 54 ++-- 6 files changed, 176 insertions(+), 193 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index de0bfd1f1..436edbdf1 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -32,17 +32,13 @@ import org.broadinstitute.sting.gatk.WalkerManager; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; -import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.activeregion.ActiveRegion; -import org.broadinstitute.sting.utils.activeregion.ActivityProfile; -import org.broadinstitute.sting.utils.activeregion.ActivityProfileState; -import org.broadinstitute.sting.utils.activeregion.BandPassActivityProfile; +import org.broadinstitute.sting.utils.activeregion.*; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; @@ -70,6 +66,7 @@ import java.util.*; public class TraverseActiveRegions extends TraversalEngine,LocusShardDataProvider> { protected final static Logger logger = Logger.getLogger(TraversalEngine.class); protected final static boolean DEBUG = false; + protected final static boolean LOG_READ_CARRYING = false; // set by the tranversal private boolean walkerHasPresetRegions = false; @@ -80,6 +77,8 @@ public class TraverseActiveRegions extends TraversalEngine myReads = new LinkedList(); private GenomeLoc spanOfLastReadSeen = null; + private IncrementalActivityProfile activityProfile = null; + int maxReadsInMemory = 0; @Override public void initialize(GenomeAnalysisEngine engine, Walker walker, ProgressMeter progressMeter) { @@ -94,6 +93,14 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine activeRegions) { - if ( profile.isEmpty() ) - throw new IllegalStateException("trying to incorporate an empty active profile " + profile); - - final ActivityProfile finalizedProfile = profile.finalizeProfile(); - activeRegions.addAll(finalizedProfile.createActiveRegions(getActiveRegionExtension(), getMaxRegionSize())); - return makeNewActivityProfile(); - } - - protected final ActivityProfileState walkerActiveProb(final ActiveRegionWalker walker, - final RefMetaDataTracker tracker, final ReferenceContext refContext, - final AlignmentContext locus, final GenomeLoc location) { - if ( walkerHasPresetRegions ) { - return new ActivityProfileState(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0); - } else { - return walker.isActive( tracker, refContext, locus ); - } - } - - private ActivityProfile makeNewActivityProfile() { - if ( walkerHasPresetRegions ) - return new ActivityProfile(engine.getGenomeLocParser()); - else - return new BandPassActivityProfile(engine.getGenomeLocParser()); - } - - /** - * Write out each active region to the walker activeRegionOutStream - * - * @param walker - */ - protected void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { - // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : workQueue ) { - if( activeRegion.isActive ) { - walker.activeRegionOutStream.println( activeRegion.getLocation() ); - } - } - } - // ------------------------------------------------------------------------------------- // // Actual traverse function @@ -219,7 +170,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, final LocusShardDataProvider dataProvider, T sum) { - logger.debug(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider)); + if ( LOG_READ_CARRYING || logger.isDebugEnabled() ) + logger.info(String.format("TraverseActiveRegions.traverse: Shard is %s", dataProvider)); final LocusView locusView = new AllLocusView(dataProvider); - final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); - - final List activeRegions = new LinkedList(); - ActivityProfile profile = makeNewActivityProfile(); - - ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); + final ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView); // We keep processing while the next reference location is within the interval final GenomeLoc locOfLastReadAtTraversalStart = spanOfLastSeenRead(); - // if we've moved onto a new contig, process all of the active regions - if ( onNewContig(dataProvider.getShard()) ) - sum = processActiveRegions(walker, sum, true); - - GenomeLoc prevLoc = null; while( locusView.hasNext() ) { final AlignmentContext locus = locusView.next(); final GenomeLoc location = locus.getLocation(); @@ -273,9 +205,7 @@ public class TraverseActiveRegions extends TraversalEngine reads = locusView.getLIBS().transferReadsFromAllPreviousPileups(); for( final GATKSAMRecord read : reads ) { - if ( appearedInLastShard(locOfLastReadAtTraversalStart, read) ) { - if ( DEBUG ) logger.warn("Skipping duplicated " + read.getReadName()); - } else { + if ( ! appearedInLastShard(locOfLastReadAtTraversalStart, read) ) { if ( DEBUG ) logger.warn("Adding read " + read.getReadName() + " at " + engine.getGenomeLocParser().createGenomeLoc(read) + " from provider " + dataProvider); rememberLastReadLocation(read); myReads.add(read); @@ -286,10 +216,11 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine walker, T sum) { - return processActiveRegions((ActiveRegionWalker)walker, sum, true); + return processActiveRegions((ActiveRegionWalker)walker, sum, true, true); } // ------------------------------------------------------------------------------------- @@ -383,8 +290,15 @@ public class TraverseActiveRegions extends TraversalEngine 0 ) + throw new IllegalStateException("Active region " + region + " on a contig after last seen read " + spanOfLastSeenRead()); + else { + return contigCmp < 0 || region.getExtendedLoc().getStop() < spanOfLastSeenRead().getStart(); + } } /** @@ -408,7 +322,9 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine walker, T sum, final boolean forceRegionsToBeActive) { - if( walker.activeRegionOutStream != null ) { + /** + * Invoke the walker isActive function, and incorporate its result into the activity profile + * + * @param walker the walker we're running + * @param tracker the ref meta data tracker to pass on to the isActive function of walker + * @param refContext the refContext to pass on to the isActive function of walker + * @param locus the AlignmentContext to pass on to the isActive function of walker + */ + private void addIsActiveResult(final ActiveRegionWalker walker, + final RefMetaDataTracker tracker, final ReferenceContext refContext, + final AlignmentContext locus) { + // must be called, even if we won't use the result, to satisfy walker contract + final ActivityProfileState state = walker.isActive( tracker, refContext, locus ); + if ( ! walkerHasPresetRegions ) { + activityProfile.add(state); + } + } + + /** + * Write out each active region to the walker activeRegionOutStream + * + * @param walker + */ + private void writeActiveRegionsToStream( final ActiveRegionWalker walker ) { + // Just want to output the active regions to a file, not actually process them + for( final ActiveRegion activeRegion : workQueue ) { + if( activeRegion.isActive ) { + walker.activeRegionOutStream.println( activeRegion.getLocation() ); + } + } + } + + /** + * Take the individual isActive calls and integrate them into contiguous active regions and + * add these blocks of work to the work queue + * band-pass filter the list of isActive probabilities and turn into active regions + */ + private T processActiveRegions(final ActiveRegionWalker walker, T sum, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) { + if ( ! walkerHasPresetRegions ) { + // We don't have preset regions, so we get our regions from the activity profile + final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile); + workQueue.addAll(activeRegions); + if ( logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + } + + if ( walker.activeRegionOutStream != null ) { writeActiveRegionsToStream(walker); return sum; } else { - return callWalkerMapOnActiveRegions(walker, sum, forceRegionsToBeActive); - } - } - - private T callWalkerMapOnActiveRegions(final ActiveRegionWalker walker, T sum, final boolean forceRegionsToBeActive) { - // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them - // TODO can implement parallel traversal here - while( workQueue.peek() != null ) { - final ActiveRegion activeRegion = workQueue.peek(); - if ( forceRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) { - if ( DEBUG ) logger.warn("Processing active region " + activeRegion + " dead zone " + spanOfLastSeenRead()); - sum = processActiveRegion( workQueue.remove(), sum, walker ); - } else { - break; + // Since we've traversed sufficiently past this point (or this contig!) in the workQueue we can unload those regions and process them + while( workQueue.peek() != null ) { + final ActiveRegion activeRegion = workQueue.peek(); + if ( forceAllRegionsToBeActive || regionCompletelyWithinDeadZone(activeRegion) ) { + if ( DEBUG ) logger.warn("Processing active region " + activeRegion + " dead zone " + spanOfLastSeenRead()); + sum = processActiveRegion( workQueue.remove(), sum, walker ); + } else { + break; + } } - } - return sum; + return sum; + } } - protected T processActiveRegion(final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker walker) { + private T processActiveRegion(final ActiveRegion activeRegion, final T sum, final ActiveRegionWalker walker) { final Iterator liveReads = myReads.iterator(); while ( liveReads.hasNext() ) { boolean killed = false; @@ -468,6 +423,11 @@ public class TraverseActiveRegions extends TraversalEngine> Map call with " + activeRegion.getReads().size() + " " + (activeRegion.isActive ? "active" : "inactive") + " reads @ " + activeRegion.getLocation() + " with full extent: " + activeRegion.getReferenceLoc()); + + if ( LOG_READ_CARRYING ) + logger.info(String.format("Processing region %20s span=%3d active?=%5b with %4d reads. Overall max reads carried is %s", + activeRegion.getLocation(), activeRegion.getLocation().size(), activeRegion.isActive, activeRegion.size(), maxReadsInMemory)); + final M x = walker.map(activeRegion, null); return walker.reduce( x, sum ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 820100f7f..85d7c8293 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -68,11 +68,7 @@ public abstract class ActiveRegionWalker extends Walker> activeRegionBindings = null; - public GenomeLocSortedSet presetActiveRegions = null; - - public boolean hasPresetActiveRegions() { - return presetActiveRegions != null; - } + private GenomeLocSortedSet presetActiveRegions = null; @Override public void initialize() { @@ -91,6 +87,22 @@ public abstract class ActiveRegionWalker extends Walker { - - public ActiveRegionStartLocationComparator() {} - - @Override - public int compare(final ActiveRegion left, final ActiveRegion right) { - return left.getLocation().compareTo(right.getLocation()); - } - } - */ } \ No newline at end of file diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java index e71f177f4..3cbad54e9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java @@ -123,6 +123,24 @@ public class IncrementalActivityProfile { return stateList.isEmpty(); } + /** + * Get the span of this activity profile, which is from the start of the first state to the stop of the last + * @return a potentially null GenomeLoc. Will be null if this profile is empty + */ + public GenomeLoc getSpan() { + return isEmpty() ? null : regionStartLoc.endpointSpan(regionStopLoc); + } + + @Requires("! isEmpty()") + public int getContigIndex() { + return regionStartLoc.getContigIndex(); + } + + @Requires("! isEmpty()") + public int getStop() { + return regionStopLoc.getStop(); + } + /** * Get the list of active profile results in this object * @return a non-null, ordered list of active profile results diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java index f09a4b3e8..e2cad88a1 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/DummyActiveRegionWalker.java @@ -51,6 +51,7 @@ class DummyActiveRegionWalker extends ActiveRegionWalker { protected List isActiveCalls = new ArrayList(); protected Map mappedActiveRegions = new LinkedHashMap(); + private boolean declareHavingPresetRegions = false; public DummyActiveRegionWalker() { this(1.0); @@ -60,20 +61,31 @@ class DummyActiveRegionWalker extends ActiveRegionWalker { this.prob = constProb; } - public DummyActiveRegionWalker(EnumSet wantStates) { - this(1.0); + public DummyActiveRegionWalker(GenomeLocSortedSet activeRegions, EnumSet wantStates, final boolean declareHavingPresetRegions) { + this(activeRegions, declareHavingPresetRegions); this.states = wantStates; } - public DummyActiveRegionWalker(GenomeLocSortedSet activeRegions) { + public DummyActiveRegionWalker(GenomeLocSortedSet activeRegions, final boolean declareHavingPresetRegions) { this(1.0); this.activeRegions = activeRegions; + this.declareHavingPresetRegions = declareHavingPresetRegions; } public void setStates(EnumSet states) { this.states = states; } + @Override + public boolean hasPresetActiveRegions() { + return declareHavingPresetRegions; + } + + @Override + public GenomeLocSortedSet getPresetActiveRegions() { + return declareHavingPresetRegions ? activeRegions : null; + } + @Override public EnumSet desiredReadStates() { return states; diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java index 76eac3a8d..a574932a7 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsUnitTest.java @@ -179,7 +179,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { @Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider") public void testActiveRegionCoverage(TraverseActiveRegions t) { - DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(new GenomeLocSortedSet(genomeLocParser, intervals), true); Collection activeRegions = getActiveRegions(t, walker, intervals).values(); verifyActiveRegionCoverage(intervals, activeRegions); @@ -242,9 +242,11 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { } } - @Test(enabled = true, dataProvider = "TraversalEngineProvider") + @Test(enabled = true && !DEBUG, dataProvider = "TraversalEngineProvider") public void testPrimaryReadMapping(TraverseActiveRegions t) { - DummyActiveRegionWalker walker = new DummyActiveRegionWalker(); + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(new GenomeLocSortedSet(genomeLocParser, intervals), + EnumSet.of(ActiveRegionReadState.PRIMARY), + true); // Contract: Each read has the Primary state in a single region (or none) // This is the region of maximum overlap for the read (earlier if tied) @@ -275,20 +277,18 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); verifyReadMapping(region); - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384)); + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 10000, 20000)); verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927)); - verifyReadMapping(region); - region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); verifyReadMapping(region, "simple20"); } @Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider") public void testNonPrimaryReadMapping(TraverseActiveRegions t) { - DummyActiveRegionWalker walker = new DummyActiveRegionWalker( - EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY)); + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(new GenomeLocSortedSet(genomeLocParser, intervals), + EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY), + true); // Contract: Each read has the Primary state in a single region (or none) // This is the region of maximum overlap for the read (earlier if tied) @@ -321,10 +321,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); verifyReadMapping(region, "boundary_equal", "boundary_unequal", "boundary_1_pre", "boundary_1_post"); - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384)); - verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); - - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927)); + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 10000, 20000)); verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); @@ -333,8 +330,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { @Test(enabled = true && ! DEBUG, dataProvider = "TraversalEngineProvider") public void testExtendedReadMapping(TraverseActiveRegions t) { - DummyActiveRegionWalker walker = new DummyActiveRegionWalker( - EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED)); + DummyActiveRegionWalker walker = new DummyActiveRegionWalker(new GenomeLocSortedSet(genomeLocParser, intervals), + EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED), + true); // Contract: Each read has the Primary state in a single region (or none) // This is the region of maximum overlap for the read (earlier if tied) @@ -368,10 +366,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999)); verifyReadMapping(region, "boundary_equal", "boundary_unequal", "extended_and_np", "boundary_1_pre", "boundary_1_post"); - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 14908, 16384)); - verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); - - region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 16385, 16927)); + region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 10000, 20000)); verifyReadMapping(region, "shard_boundary_1_pre", "shard_boundary_1_post", "shard_boundary_equal"); region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100)); @@ -384,6 +379,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { } private void verifyReadMapping(ActiveRegion region, String... reads) { + Assert.assertNotNull(region, "Region was unexpectedly null"); final Set regionReads = new HashSet(); for (SAMRecord read : region.getReads()) { Assert.assertFalse(regionReads.contains(read.getReadName()), "Duplicate reads detected in region " + region + " read " + read.getReadName()); @@ -530,12 +526,11 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { for ( final int start : starts ) { for ( final int nReadsPerLocus : Arrays.asList(1, 2) ) { for ( final int nLoci : Arrays.asList(1, 1000) ) { + final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci); + bamBuilder.setReadLength(readLength); + bamBuilder.setSkipNLoci(skips); + bamBuilder.setAlignmentStart(start); for ( EnumSet readStates : allReadStates ) { - final ArtificialBAMBuilder bamBuilder = new ArtificialBAMBuilder(reference, nReadsPerLocus, nLoci); - bamBuilder.setReadLength(readLength); - bamBuilder.setSkipNLoci(skips); - bamBuilder.setAlignmentStart(start); - for ( final GenomeLocSortedSet activeRegions : enumerateActiveRegions(bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd())) { nTests++; if ( nTests < maxTests ) // && nTests == 1238 ) @@ -595,7 +590,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd()) ); - final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions); + final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions, false); walker.setStates(readStates); final TraverseActiveRegions traversal = new TraverseActiveRegions(); @@ -619,8 +614,9 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { alreadySeenReads.add(read.getReadName()); } - Assert.assertEquals(readNamesInRegion.contains(read.getReadName()), shouldBeInRegion, "Region " + region + - " failed contains read check: read " + read + " with span " + readLoc + " should be in region is " + shouldBeInRegion + " but I got the opposite"); + String msg = readNamesInRegion.contains(read.getReadName()) == shouldBeInRegion ? "" : "Region " + region + + " failed contains read check: read " + read + " with span " + readLoc + " should be in region is " + shouldBeInRegion + " but I got the opposite"; + Assert.assertEquals(readNamesInRegion.contains(read.getReadName()), shouldBeInRegion, msg); nReadsExpectedInRegion += shouldBeInRegion ? 1 : 0; } @@ -642,7 +638,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { // // --------------------------------------------------------------------------------------------------------- - @Test + @Test(enabled = true && ! DEBUG) public void ensureAllInsertionReadsAreInActiveRegions() { final int readLength = 10; @@ -667,7 +663,7 @@ public class TraverseActiveRegionsUnitTest extends BaseTest { genomeLocParser.createGenomeLoc("1", bamBuilder.getAlignmentStart(), bamBuilder.getAlignmentEnd()) ); - final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions); + final DummyActiveRegionWalker walker = new DummyActiveRegionWalker(activeRegions, false); final TraverseActiveRegions traversal = new TraverseActiveRegions(); final Map activeRegionsMap = getActiveRegions(traversal, walker, intervals, bamBuilder.makeTemporarilyBAMFile().toString()); From 7fd27a5167ca0fd0eda062954565ed19831bc6e9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 22 Jan 2013 15:40:09 -0500 Subject: [PATCH 17/46] Add band pass filtering activity profile -- Based on the new incremental activity profile -- Unit Tested! Fixed a few bugs with the old band pass filter -- Expand IncrementalActivityProfileUnitTest to test the band pass filter as well for basic properties -- Add new UnitTest for BandPassIncrementalActivityProfile -- Added normalizeFromRealSpace to MathUtils -- Cleanup unused code in new activity profiles --- .../broadinstitute/sting/utils/MathUtils.java | 24 +++ .../BandPassIncrementalActivityProfile.java | 127 +++++++++++++ .../IncrementalActivityProfile.java | 41 ++--- ...assIncrementalActivityProfileUnitTest.java | 167 ++++++++++++++++++ .../IncrementalActivityProfileUnitTest.java | 27 ++- 5 files changed, 345 insertions(+), 41 deletions(-) create mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java create mode 100644 public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index 7462416bc..f1f0ab9b1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -634,6 +634,30 @@ public class MathUtils { return normalizeFromLog10(array, false); } + /** + * normalizes the real-space probability array. + * + * Does not assume anything about the values in the array, beyond that no elements are below 0. It's ok + * to have values in the array of > 1, or have the sum go above 0. + * + * @param array the array to be normalized + * @return a newly allocated array corresponding the normalized values in array + */ + @Requires("array != null") + @Ensures({"result != null"}) + public static double[] normalizeFromRealSpace(final double[] array) { + if ( array.length == 0 ) + return array; + + final double sum = sum(array); + final double[] normalized = new double[array.length]; + if ( sum < 0.0 || sum > 1.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum); + for ( int i = 0; i < array.length; i++ ) { + normalized[i] = array[i] / sum; + } + return normalized; + } + public static int maxElementIndex(final double[] array) { return maxElementIndex(array, array.length); } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java new file mode 100644 index 000000000..805a0b60a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.activeregion; + +import com.google.java.contract.Ensures; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; + +import java.util.Collection; +import java.util.LinkedList; + +/** + * A band pass filtering version of the activity profile + * + * Applies a band pass filter with a Gaussian kernel to the input state probabilities to smooth + * them out of an interval + * + * @author Mark DePristo + * @since 2011 + */ +public class BandPassIncrementalActivityProfile extends IncrementalActivityProfile { + public static final int DEFAULT_FILTER_SIZE = 80; + + private final int filterSize; + private final double[] GaussianKernel; + + /** + * Create a band pass activity profile with the default band size + * @param parser our genome loc parser + */ + public BandPassIncrementalActivityProfile(final GenomeLocParser parser) { + this(parser, DEFAULT_FILTER_SIZE); + } + + /** + * Create an activity profile that implements a band pass filter on the states + * @param parser our genome loc parser + * @param filterSize the size (in bp) of the band pass filter. The filter size is the number of bp to each + * side that are included in the band. So a filter size of 1 implies that the actual band + * is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc. + */ + public BandPassIncrementalActivityProfile(final GenomeLocParser parser, final int filterSize) { + super(parser); + + if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); + + // setup the Gaussian kernel for the band pass filter + this.filterSize = filterSize; + final double[] kernel = new double[getBandSize()]; + for( int iii = 0; iii < 2* filterSize + 1; iii++ ) { + kernel[iii] = MathUtils.NormalDistribution(filterSize, 55.0, iii); + } + this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); + } + + /** + * Get the size (in bp) of the band pass filter + * @return a positive integer + */ + @Ensures("result >= 1") + public int getBandSize() { + return 2 * filterSize + 1; + } + + /** + * Get the filter size (which is the size of each wing of the band, minus the center point) + * @return a positive integer + */ + @Ensures("result >= 0") + public int getFilteredSize() { + return filterSize; + } + + /** + * Get the kernel of this band pass filter. Do not modify returned result + * @return the kernel used in this band pass filter + */ + @Ensures({"result != null", "result.length == getBandSize()"}) + protected double[] getKernel() { + return GaussianKernel; + } + + /** + * Band pass the probabilities in the ActivityProfile, producing a new profile that's band pass filtered + * @return a new double[] that's the band-pass filtered version of this profile + */ + @Override + protected Collection processState(final ActivityProfileState justAddedState) { + final Collection states = new LinkedList(); + + for ( final ActivityProfileState superState : super.processState(justAddedState) ) { + for( int jjj = -filterSize; jjj <= filterSize; jjj++ ) { + final GenomeLoc loc = getLocForOffset(justAddedState.getLoc(), jjj); + if ( loc != null ) { + final double newProb = superState.isActiveProb * GaussianKernel[jjj + filterSize]; + states.add(new ActivityProfileState(loc, newProb)); + } + } + } + + return states; + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java index 3cbad54e9..1292b3176 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java @@ -51,35 +51,13 @@ public class IncrementalActivityProfile { /** * Create a new empty IncrementalActivityProfile - * @param parser the parser we can use to create genome locs + * @param parser the parser we can use to create genome locs, cannot be null */ public IncrementalActivityProfile(final GenomeLocParser parser) { - this(parser, new ArrayList(), null); - } + if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); - /** - * Create a new IncrementalActivityProfile using state list (not copied) and starting at regionStartLoc - * @param parser the parser we can use to create genome locs - */ - @Deprecated - protected IncrementalActivityProfile(final GenomeLocParser parser, final List stateList, final GenomeLoc regionStartLoc) { this.parser = parser; - this.stateList = stateList; - this.regionStartLoc = regionStartLoc; - } - - /** - * Create a profile of the same class as this object containing just the provided stateList - * - * Used by clients to create derived activity profiles (such as ones without the starting X - * sites because they've been removed in an ActiveRegion) of the same class. - * - * @param isActiveList the active results list to use in the derived instance - * @return a freshly allocated data set - */ - @Deprecated - protected IncrementalActivityProfile createDerivedProfile(final List isActiveList) { - return new IncrementalActivityProfile(parser, isActiveList, regionStartLoc); + this.stateList = new ArrayList(); } @Override @@ -150,6 +128,19 @@ public class IncrementalActivityProfile { return stateList; } + /** + * Get the probabilities of the states as a single linear array of doubles + * @return a non-null array + */ + @Ensures("result != null") + protected double[] getProbabilitiesAsArray() { + final double[] probs = new double[getStateList().size()]; + int i = 0; + for ( final ActivityProfileState state : getStateList() ) + probs[i++] = state.isActiveProb; + return probs; + } + /** * Helper function that gets the genome loc for a site offset from relativeLoc, protecting ourselves from * falling off the edge of the contig. diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java new file mode 100644 index 000000000..be90353b3 --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2012 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + * THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.activeregion; + + +// the imports for unit testing. + + +import net.sf.picard.reference.ReferenceSequenceFile; +import org.apache.commons.lang.ArrayUtils; +import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.util.*; + + +public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { + private GenomeLocParser genomeLocParser; + + @BeforeClass + public void init() throws FileNotFoundException { + // sequence + ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); + genomeLocParser = new GenomeLocParser(seq); + } + + @DataProvider(name = "BandPassBasicTest") + public Object[][] makeBandPassTest() { + final List tests = new LinkedList(); + + for ( int start : Arrays.asList(1, 10, 100, 1000) ) { + for ( boolean precedingIsActive : Arrays.asList(true, false) ) { + for ( int precedingSites: Arrays.asList(0, 1, 10, 100) ) { + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100) ) { +// for ( int start : Arrays.asList(10) ) { +// for ( boolean precedingIsActive : Arrays.asList(false) ) { +// for ( int precedingSites: Arrays.asList(0) ) { +// for ( int bandPassSize : Arrays.asList(1) ) { + tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize }); + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "BandPassBasicTest") + public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) { + final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); + + final int expectedBandSize = bandPassSize * 2 + 1; + Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size"); + + final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); + final double precedingProb = precedingIsActive ? 1.0 : 0.0; + for ( int i = 0; i < nPrecedingSites; i++ ) { + final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + start); + final ActivityProfileState state = new ActivityProfileState(loc, precedingProb); + profile.add(state); + } + + final GenomeLoc nextLoc = genomeLocParser.createGenomeLoc(contig, nPrecedingSites + start); + profile.add(new ActivityProfileState(nextLoc, 1.0)); + + if ( precedingIsActive == false && nPrecedingSites >= bandPassSize && bandPassSize < start ) { + // we have enough space that all probs fall on the genome + final double[] probs = profile.getProbabilitiesAsArray(); + Assert.assertEquals(MathUtils.sum(probs), 1.0 * (nPrecedingSites * precedingProb + 1), 1e-3, "Activity profile doesn't sum to number of non-zero prob states"); + } + } + + private double[] bandPassInOnePass(final BandPassIncrementalActivityProfile profile, final double[] activeProbArray) { + final double[] bandPassProbArray = new double[activeProbArray.length]; + + // apply the band pass filter for activeProbArray into filteredProbArray + final double[] GaussianKernel = profile.getKernel(); + for( int iii = 0; iii < activeProbArray.length; iii++ ) { + final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(profile.getFilteredSize() - iii, 0), Math.min(GaussianKernel.length, profile.getFilteredSize() + activeProbArray.length - iii)); + final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - profile.getFilteredSize()), Math.min(activeProbArray.length,iii + profile.getFilteredSize() + 1)); + bandPassProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); + } + + return bandPassProbArray; + } + + @DataProvider(name = "BandPassComposition") + public Object[][] makeBandPassComposition() { + final List tests = new LinkedList(); + + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassIncrementalActivityProfile.DEFAULT_FILTER_SIZE) ) { + for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { + tests.add(new Object[]{ bandPassSize, integrationLength }); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test( dataProvider = "BandPassComposition") + public void testBandPassComposition(final int bandPassSize, final int integrationLength) { + final int start = 1; + final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); + final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; + + // add a buffer so that we can get all of the band pass values + final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); + int pos = start; + int rawProbsOffset = 0; + for ( int i = 0; i < bandPassSize; i++ ) { + final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, pos++); + final ActivityProfileState state = new ActivityProfileState(loc, 0.0); + profile.add(state); + rawActiveProbs[rawProbsOffset++] = 0.0; + rawActiveProbs[rawActiveProbs.length - rawProbsOffset] = 0.0; + } + + for ( int i = 0; i < integrationLength; i++ ) { + final GenomeLoc nextLoc = genomeLocParser.createGenomeLoc(contig, pos++); + profile.add(new ActivityProfileState(nextLoc, 1.0)); + rawActiveProbs[rawProbsOffset++] = 1.0; + + for ( int j = 0; j < profile.size(); j++ ) { + Assert.assertTrue(profile.getStateList().get(j).isActiveProb >= 0.0, "State probability < 0 at " + j); + Assert.assertTrue(profile.getStateList().get(j).isActiveProb <= 1.0 + 1e-3, "State probability > 1 at " + j); + } + } + + final double[] expectedProbs = bandPassInOnePass(profile, rawActiveProbs); + for ( int j = 0; j < profile.size(); j++ ) { + Assert.assertEquals(profile.getStateList().get(j).isActiveProb, expectedProbs[j], "State probability not expected at " + j); + } + } +} diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java index 16b9b1877..64065029c 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java @@ -85,7 +85,9 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { public IncrementalActivityProfile makeProfile() { switch ( type ) { case Base: return new IncrementalActivityProfile(genomeLocParser); - case BandPass: //return new BandPassActivityProfile(genomeLocParser); + case BandPass: + // zero size => equivalent to IncrementalActivityProfile + return new BandPassIncrementalActivityProfile(genomeLocParser, 0); default: throw new IllegalStateException(type.toString()); } } @@ -111,13 +113,11 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @DataProvider(name = "BasicActivityProfileTestProvider") public Object[][] makeQualIntervalTestProvider() { for ( final ProfileType type : ProfileType.values() ) { - if ( type != ProfileType.BandPass ) { // todo -- re-enable - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); - new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); - } + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); + new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); } return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); @@ -135,20 +135,15 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { double p = cfg.probs.get(i); GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); profile.add(new ActivityProfileState(loc, p)); - Assert.assertFalse(profile.isEmpty()); + Assert.assertFalse(profile.isEmpty(), "Profile shouldn't be empty after adding a state"); } - Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); + Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ), "Start loc should be the start of the region"); - Assert.assertEquals(profile.size(), cfg.probs.size()); + Assert.assertEquals(profile.size(), cfg.probs.size(), "Should have exactly the number of states we expected to add"); assertProbsAreEqual(profile.stateList, cfg.probs); // TODO -- reanble tests //assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); - - Assert.assertEquals(profile.createDerivedProfile(profile.stateList).getClass(), profile.getClass()); - - final List empty = new LinkedList(); - Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0); } private void assertRegionsAreEqual(List actual, List expected) { From e917f56df8e248dc472f2516a43acb280fbfb3ed Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Tue, 22 Jan 2013 16:02:09 -0500 Subject: [PATCH 18/46] Remove old ActivityProfile and old BandPassActivityProfile --- .../utils/activeregion/ActivityProfile.java | 243 ------------------ .../activeregion/BandPassActivityProfile.java | 84 ------ .../activeregion/ActivityProfileUnitTest.java | 166 ------------ 3 files changed, 493 deletions(-) delete mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java delete mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java delete mode 100644 public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java deleted file mode 100644 index 8d6012fac..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Copyright (c) 2012 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -package org.broadinstitute.sting.utils.activeregion; - -import com.google.java.contract.Ensures; -import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Class holding information about per-base activity scores for the - * active region traversal - * - * @author Mark DePristo - * @since Date created - */ -public class ActivityProfile { - private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author - - protected final List isActiveList; - protected final GenomeLocParser parser; - - protected GenomeLoc regionStartLoc = null; - protected GenomeLoc regionStopLoc = null; - - public ActivityProfile(final GenomeLocParser parser) { - this(parser, new ArrayList(), null); - } - - protected ActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { - this.parser = parser; - this.isActiveList = isActiveList; - this.regionStartLoc = regionStartLoc; - } - - /** - * Create a profile of the same class as this object containing just the provided stateList - * - * Used by clients to create derived activity profiles (such as ones without the starting X - * sites because they've been removed in an ActiveRegion) of the same class. - * - * @param isActiveList the active results list to use in the derived instance - * @return a freshly allocated data set - */ - protected ActivityProfile createDerivedProfile(final List isActiveList) { - return new ActivityProfile(parser, isActiveList, regionStartLoc); - } - - @Override - public String toString() { - return "ActivityProfile{" + - "start=" + regionStartLoc + - ", stop=" + regionStopLoc + - '}'; - } - - /** - * Add the next ActivityProfileState to this profile. - * - * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown - * - * @param result a well-formed ActivityProfileState result to incorporate into this profile - */ - @Requires("result != null") - public void add(final ActivityProfileState result) { - final GenomeLoc loc = result.getLoc(); - - if ( regionStartLoc == null ) { - regionStartLoc = loc; - regionStopLoc = loc; - } else { - if ( regionStopLoc.getStart() != loc.getStart() - 1 ) - throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); - regionStopLoc = loc; - } - - isActiveList.add(result); - } - - /** - * How many profile results are in this profile? - * @return the number of profile results - */ - @Ensures("result >= 0") - public int size() { - return isActiveList.size(); - } - - /** - * Is this profile empty? - * @return true if the profile is empty - */ - @Ensures("isEmpty() == (size() == 0)") - public boolean isEmpty() { - return isActiveList.isEmpty(); - } - - /** - * Get the list of active profile results in this object - * @return a non-null, ordered list of active profile results - */ - @Ensures("result != null") - protected List getActiveList() { - return isActiveList; - } - - /** - * Finalize the probabilities in this activity profile, preparing it for a future - * call to createActiveRegions. This function returns a new profile with cleaned - * up activity estimates. - * - * This code looks at the current list of states, cleans them up, and then returns - * a newly allocated ActivityProfile - * - * @return a newly allocated ActivityProfile based on the current state of this - * profile, but that has been "finalized" as required by the profile implementation - */ - public ActivityProfile finalizeProfile() { - int iii = 0; - for( final double prob : finalizeProbabilities() ) { - final ActivityProfileState result = isActiveList.get(iii++); - result.isActiveProb = prob; - result.resultState = ActivityProfileState.Type.NONE; - result.resultValue = null; - } - - return createDerivedProfile(isActiveList); - } - - public double[] finalizeProbabilities() { - final double[] activeProbArray = new double[isActiveList.size()]; - - int iii = 0; - for( final ActivityProfileState result : isActiveList ) { - activeProbArray[iii++] = result.isActiveProb; - } - - iii = 0; - for( final ActivityProfileState result : isActiveList ) { - if( result.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups - final int numHQClips = result.resultValue.intValue(); - for( int jjj = Math.max(0, iii - numHQClips); jjj < Math.min(activeProbArray.length, iii+numHQClips); jjj++ ) { - activeProbArray[jjj] = Math.max(activeProbArray[jjj], activeProbArray[iii]); - } - } - iii++; - } - - return activeProbArray; - } - - /** - * Partition this profile into active regions - * @param activeRegionExtension the amount of margin overlap in the active region - * @return the list of active regions - */ - public List createActiveRegions( final int activeRegionExtension, final int maxRegionSize ) { - final ArrayList returnList = new ArrayList(); - - if( isActiveList.size() == 0 ) { - // no elements in the active list, just return an empty one - return Collections.emptyList(); - } else if( isActiveList.size() == 1 ) { - // there's a single element, it's either active or inactive - boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; - returnList.addAll(createActiveRegion(isActive, 0, 0, activeRegionExtension, maxRegionSize)); - } else { - // there are 2+ elements, divide these up into regions - boolean isActive = isActiveList.get(0).isActiveProb > ACTIVE_PROB_THRESHOLD; - int curStart = 0; - for(int iii = 1; iii < isActiveList.size(); iii++ ) { - final boolean thisStatus = isActiveList.get(iii).isActiveProb > ACTIVE_PROB_THRESHOLD; - if( isActive != thisStatus ) { - returnList.addAll(createActiveRegion(isActive, curStart, iii - 1, activeRegionExtension, maxRegionSize)); - isActive = thisStatus; - curStart = iii; - } - } - returnList.addAll(createActiveRegion(isActive, curStart, isActiveList.size() - 1, activeRegionExtension, maxRegionSize)); // close out the current active region - } - return returnList; - } - - /** - * Helper routine to create an active region based on our current start and end offsets - * @param isActive should the region be active? - * @param curStart offset (0-based) from the start of this region - * @param curEnd offset (0-based) from the start of this region - * @param activeRegionExtension the amount of margin overlap in the active region - * @return a fully initialized ActiveRegion with the above properties - */ - private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) { - return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); - } - - private List createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List returnList) { - if( !isActive || curEnd - curStart < maxRegionSize ) { - final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd); - returnList.add(new ActiveRegion(loc, isActive, parser, activeRegionExtension)); - return returnList; - } - // find the best place to break up the large active region - Double minProb = Double.MAX_VALUE; - int cutPoint = -1; - - final int size = curEnd - curStart + 1; - for( int iii = curStart + (int)(size*0.15); iii < curEnd - (int)(size*0.15); iii++ ) { - if( isActiveList.get(iii).isActiveProb < minProb ) { minProb = isActiveList.get(iii).isActiveProb; cutPoint = iii; } - } - final List leftList = createActiveRegion(isActive, curStart, cutPoint, activeRegionExtension, maxRegionSize, new ArrayList()); - final List rightList = createActiveRegion(isActive, cutPoint+1, curEnd, activeRegionExtension, maxRegionSize, new ArrayList()); - returnList.addAll( leftList ); - returnList.addAll( rightList ); - return returnList; - } -} diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java deleted file mode 100644 index cef700419..000000000 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java +++ /dev/null @@ -1,84 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils.activeregion; - -import org.apache.commons.lang.ArrayUtils; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.MathUtils; - -import java.util.ArrayList; -import java.util.List; - -/** - * - * - * @author Mark DePristo - * @since 2011 - */ -public class BandPassActivityProfile extends ActivityProfile { - private static final int FILTER_SIZE = 80; - private static final double[] GaussianKernel; - - static { - GaussianKernel = new double[2*FILTER_SIZE + 1]; - for( int iii = 0; iii < 2*FILTER_SIZE + 1; iii++ ) { - GaussianKernel[iii] = MathUtils.NormalDistribution(FILTER_SIZE, 55.0, iii); - } - } - - public BandPassActivityProfile(final GenomeLocParser parser) { - this(parser, new ArrayList(), null); - } - - public BandPassActivityProfile(final GenomeLocParser parser, final List isActiveList, final GenomeLoc regionStartLoc) { - super(parser, isActiveList, regionStartLoc); - } - - @Override - protected ActivityProfile createDerivedProfile(List isActiveList) { - return new BandPassActivityProfile(parser, isActiveList, regionStartLoc); - } - - /** - * Band pass the probabilities in the ActivityProfile, producing a new profile that's band pass filtered - * @return a new double[] that's the band-pass filtered version of this profile - */ - @Override - public double[] finalizeProbabilities() { - final double[] activeProbArray = super.finalizeProbabilities(); - final double[] bandPassProbArray = new double[activeProbArray.length]; - - // apply the band pass filter for activeProbArray into filteredProbArray - for( int iii = 0; iii < activeProbArray.length; iii++ ) { - final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii)); - final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1)); - bandPassProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel); - } - - return bandPassProbArray; - } -} diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java deleted file mode 100644 index 430e0b5c6..000000000 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ /dev/null @@ -1,166 +0,0 @@ -/* -* Copyright (c) 2012 The Broad Institute -* -* Permission is hereby granted, free of charge, to any person -* obtaining a copy of this software and associated documentation -* files (the "Software"), to deal in the Software without -* restriction, including without limitation the rights to use, -* copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the -* Software is furnished to do so, subject to the following -* conditions: -* -* The above copyright notice and this permission notice shall be -* included in all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR -* THE USE OR OTHER DEALINGS IN THE SOFTWARE. -*/ - -package org.broadinstitute.sting.utils.activeregion; - - -// the imports for unit testing. - - -import net.sf.picard.reference.ReferenceSequenceFile; -import org.broadinstitute.sting.BaseTest; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; -import org.testng.Assert; -import org.testng.annotations.BeforeClass; -import org.testng.annotations.DataProvider; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileNotFoundException; -import java.util.*; - - -public class ActivityProfileUnitTest extends BaseTest { - private GenomeLocParser genomeLocParser; - private GenomeLoc startLoc; - - @BeforeClass - public void init() throws FileNotFoundException { - // sequence - ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); - genomeLocParser = new GenomeLocParser(seq); - startLoc = genomeLocParser.createGenomeLoc("chr1", 1, 1, 100); - } - - // -------------------------------------------------------------------------------- - // - // Basic tests Provider - // - // -------------------------------------------------------------------------------- - - private class BasicActivityProfileTestProvider extends TestDataProvider { - List probs; - List expectedRegions; - int extension = 0; - GenomeLoc regionStart = startLoc; - final ProfileType type; - - public BasicActivityProfileTestProvider(final ProfileType type, final List probs, boolean startActive, int ... startsAndStops) { - super(BasicActivityProfileTestProvider.class); - this.type = type; - this.probs = probs; - this.expectedRegions = toRegions(startActive, startsAndStops); - setName(getName()); - } - - private String getName() { - return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); - } - - public ActivityProfile makeProfile() { - switch ( type ) { - case Base: return new ActivityProfile(genomeLocParser); - case BandPass: return new BandPassActivityProfile(genomeLocParser); - default: throw new IllegalStateException(type.toString()); - } - } - - private List toRegions(boolean isActive, int[] startsAndStops) { - List l = new ArrayList(); - for ( int i = 0; i < startsAndStops.length - 1; i++) { - int start = regionStart.getStart() + startsAndStops[i]; - int end = regionStart.getStart() + startsAndStops[i+1] - 1; - GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); - ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension); - l.add(r); - isActive = ! isActive; - } - return l; - } - } - - private enum ProfileType { - Base, BandPass - } - - @DataProvider(name = "BasicActivityProfileTestProvider") - public Object[][] makeQualIntervalTestProvider() { - for ( final ProfileType type : ProfileType.values() ) { - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0), true, 0, 1); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0), true, 0, 1, 2); - new BasicActivityProfileTestProvider(type, Arrays.asList(0.0, 1.0), false, 0, 1, 2); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 0.0, 1.0), true, 0, 1, 2, 3); - new BasicActivityProfileTestProvider(type, Arrays.asList(1.0, 1.0, 1.0), true, 0, 3); - } - - return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); - } - - @Test(dataProvider = "BasicActivityProfileTestProvider") - public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { - ActivityProfile profile = cfg.makeProfile(); - - Assert.assertTrue(profile.isEmpty()); - - Assert.assertEquals(profile.parser, genomeLocParser); - - for ( int i = 0; i < cfg.probs.size(); i++ ) { - double p = cfg.probs.get(i); - GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i); - profile.add(new ActivityProfileState(loc, p)); - Assert.assertFalse(profile.isEmpty()); - } - Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() )); - - Assert.assertEquals(profile.size(), cfg.probs.size()); - assertProbsAreEqual(profile.isActiveList, cfg.probs); - - assertRegionsAreEqual(profile.createActiveRegions(0, 100), cfg.expectedRegions); - - Assert.assertEquals(profile.createDerivedProfile(profile.isActiveList).getClass(), profile.getClass()); - - final List empty = new LinkedList(); - Assert.assertEquals(profile.createDerivedProfile(empty).size(), 0); - } - - private void assertRegionsAreEqual(List actual, List expected) { - Assert.assertEquals(actual.size(), expected.size()); - for ( int i = 0; i < actual.size(); i++ ) { - Assert.assertTrue(actual.get(i).equalExceptReads(expected.get(i))); - } - } - - private void assertProbsAreEqual(List actual, List expected) { - Assert.assertEquals(actual.size(), expected.size()); - for ( int i = 0; i < actual.size(); i++ ) { - Assert.assertEquals(actual.get(i).isActiveProb, expected.get(i)); - } - } - - // todo -- test extensions -} \ No newline at end of file From 8e8126506b81fccf34a06aec8dcad873f2f28e09 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 09:44:46 -0500 Subject: [PATCH 19/46] Renaming IncrementalActivityProfile to ActivityProfile -- Also adding a work in progress functionality to make it easy to visualize activity profiles and active regions in IGV --- .../traversals/TraverseActiveRegions.java | 12 +++++++++--- .../gatk/walkers/ActiveRegionWalker.java | 2 ++ .../activeregion/ActiveRegionReadState.java | 5 ++--- ...ivityProfile.java => ActivityProfile.java} | 10 +++++----- .../activeregion/ActivityProfileState.java | 3 ++- ...file.java => BandPassActivityProfile.java} | 19 ++++++++++++++++--- ...Test.java => ActivityProfileUnitTest.java} | 18 +++++++++--------- ...a => BandPassActivityProfileUnitTest.java} | 11 +++++------ 8 files changed, 50 insertions(+), 30 deletions(-) rename public/java/src/org/broadinstitute/sting/utils/activeregion/{IncrementalActivityProfile.java => ActivityProfile.java} (98%) rename public/java/src/org/broadinstitute/sting/utils/activeregion/{BandPassIncrementalActivityProfile.java => BandPassActivityProfile.java} (87%) rename public/java/test/org/broadinstitute/sting/utils/activeregion/{IncrementalActivityProfileUnitTest.java => ActivityProfileUnitTest.java} (95%) rename public/java/test/org/broadinstitute/sting/utils/activeregion/{BandPassIncrementalActivityProfileUnitTest.java => BandPassActivityProfileUnitTest.java} (93%) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 436edbdf1..071b4d806 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -77,7 +77,7 @@ public class TraverseActiveRegions extends TraversalEngine myReads = new LinkedList(); private GenomeLoc spanOfLastReadSeen = null; - private IncrementalActivityProfile activityProfile = null; + private ActivityProfile activityProfile = null; int maxReadsInMemory = 0; @Override @@ -94,7 +94,7 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile); workQueue.addAll(activeRegions); - if ( logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); + if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); } if ( walker.activeRegionOutStream != null ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 85d7c8293..e268bba0d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -61,6 +61,8 @@ import java.util.*; @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { + @Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false) + public PrintStream activityProfileOutStream = null; @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) public PrintStream activeRegionOutStream = null; diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java index d9b458f51..5da88cb6d 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java @@ -26,12 +26,11 @@ package org.broadinstitute.sting.utils.activeregion; /** - * Created with IntelliJ IDEA. + * Describes how a read relates to an assigned ActiveRegion + * * User: thibault * Date: 11/26/12 * Time: 2:35 PM - * - * Describes how a read relates to an assigned ActiveRegion */ public enum ActiveRegionReadState { PRIMARY, // This is the read's primary region diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java similarity index 98% rename from public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java rename to public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 1292b3176..a863d695e 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -39,7 +39,7 @@ import java.util.*; * @author Mark DePristo * @since Date created */ -public class IncrementalActivityProfile { +public class ActivityProfile { private final static int MAX_PROB_PROPOGATION_DISTANCE = 10; private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author @@ -50,10 +50,10 @@ public class IncrementalActivityProfile { protected GenomeLoc regionStopLoc = null; /** - * Create a new empty IncrementalActivityProfile + * Create a new empty ActivityProfile * @param parser the parser we can use to create genome locs, cannot be null */ - public IncrementalActivityProfile(final GenomeLocParser parser) { + public ActivityProfile(final GenomeLocParser parser) { if ( parser == null ) throw new IllegalArgumentException("parser cannot be null"); this.parser = parser; @@ -79,7 +79,7 @@ public class IncrementalActivityProfile { * @return a positive integer distance in bp */ @Ensures("result >= 0") - public int getMaxProbPropogationDistance() { + public int getMaxProbPropagationDistance() { return MAX_PROB_PROPOGATION_DISTANCE; } @@ -377,6 +377,6 @@ public class IncrementalActivityProfile { } // we're one past the end, so i must be decremented - return forceConversion || i + getMaxProbPropogationDistance() < stateList.size() ? i - 1 : -1; + return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index df21672a9..272596be3 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -30,7 +30,8 @@ import com.google.java.contract.Requires; import org.broadinstitute.sting.utils.GenomeLoc; /** - * Created with IntelliJ IDEA. + * The state of an active region walker's isActive call at a specific locus in the genome + * * User: rpoplin * Date: 7/27/12 */ diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java similarity index 87% rename from public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java rename to public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java index 805a0b60a..1a8bac086 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -42,7 +42,7 @@ import java.util.LinkedList; * @author Mark DePristo * @since 2011 */ -public class BandPassIncrementalActivityProfile extends IncrementalActivityProfile { +public class BandPassActivityProfile extends ActivityProfile { public static final int DEFAULT_FILTER_SIZE = 80; private final int filterSize; @@ -52,7 +52,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi * Create a band pass activity profile with the default band size * @param parser our genome loc parser */ - public BandPassIncrementalActivityProfile(final GenomeLocParser parser) { + public BandPassActivityProfile(final GenomeLocParser parser) { this(parser, DEFAULT_FILTER_SIZE); } @@ -63,7 +63,7 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi * side that are included in the band. So a filter size of 1 implies that the actual band * is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc. */ - public BandPassIncrementalActivityProfile(final GenomeLocParser parser, final int filterSize) { + public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) { super(parser); if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); @@ -77,6 +77,19 @@ public class BandPassIncrementalActivityProfile extends IncrementalActivityProfi this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); } + /** + * Our maximize propagation distance is whatever our parent's is, plus our filter size + * + * Stops the profile from interpreting sites that aren't yet fully determined due to + * propagation of the probabilities. + * + * @return the distance in bp we might move our probabilities around for some site i + */ + @Override + public int getMaxProbPropagationDistance() { + return super.getMaxProbPropagationDistance() + filterSize; + } + /** * Get the size (in bp) of the band pass filter * @return a positive integer diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java similarity index 95% rename from public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 64065029c..7cfc5ebb7 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/IncrementalActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -45,7 +45,7 @@ import java.io.FileNotFoundException; import java.util.*; -public class IncrementalActivityProfileUnitTest extends BaseTest { +public class ActivityProfileUnitTest extends BaseTest { private GenomeLocParser genomeLocParser; private GenomeLoc startLoc; @@ -82,12 +82,12 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { return String.format("type=%s probs=%s expectedRegions=%s", type, Utils.join(",", probs), Utils.join(",", expectedRegions)); } - public IncrementalActivityProfile makeProfile() { + public ActivityProfile makeProfile() { switch ( type ) { - case Base: return new IncrementalActivityProfile(genomeLocParser); + case Base: return new ActivityProfile(genomeLocParser); case BandPass: - // zero size => equivalent to IncrementalActivityProfile - return new BandPassIncrementalActivityProfile(genomeLocParser, 0); + // zero size => equivalent to ActivityProfile + return new BandPassActivityProfile(genomeLocParser, 0); default: throw new IllegalStateException(type.toString()); } } @@ -125,7 +125,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BasicActivityProfileTestProvider") public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { - IncrementalActivityProfile profile = cfg.makeProfile(); + ActivityProfile profile = cfg.makeProfile(); Assert.assertTrue(profile.isEmpty()); @@ -228,7 +228,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @Test(enabled = true, dataProvider = "RegionCreationTests") public void testRegionCreation(final int start, final List probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { - final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser); Assert.assertNotNull(profile.toString()); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); @@ -253,7 +253,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { } for ( int i = 0; i < probs.size(); i++ ) { - if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropogationDistance() < probs.size())) + if ( forceConversion || (i + maxRegionSize + profile.getMaxProbPropagationDistance() < probs.size())) // only require a site to be seen if we are forcing conversion or the site is more than maxRegionSize from the end Assert.assertTrue(seenSites.get(i), "Missed site " + i); } @@ -314,7 +314,7 @@ public class IncrementalActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "SoftClipsTest") public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { - final IncrementalActivityProfile profile = new IncrementalActivityProfile(genomeLocParser); + final ActivityProfile profile = new ActivityProfile(genomeLocParser); final int contigLength = genomeLocParser.getContigs().getSequences().get(0).getSequenceLength(); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java similarity index 93% rename from public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java rename to public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index be90353b3..a2a85f1d0 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassIncrementalActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -35,7 +35,6 @@ import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.MathUtils; -import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -47,7 +46,7 @@ import java.io.FileNotFoundException; import java.util.*; -public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { +public class BandPassActivityProfileUnitTest extends BaseTest { private GenomeLocParser genomeLocParser; @BeforeClass @@ -80,7 +79,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) { - final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size"); @@ -103,7 +102,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { } } - private double[] bandPassInOnePass(final BandPassIncrementalActivityProfile profile, final double[] activeProbArray) { + private double[] bandPassInOnePass(final BandPassActivityProfile profile, final double[] activeProbArray) { final double[] bandPassProbArray = new double[activeProbArray.length]; // apply the band pass filter for activeProbArray into filteredProbArray @@ -121,7 +120,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { public Object[][] makeBandPassComposition() { final List tests = new LinkedList(); - for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassIncrementalActivityProfile.DEFAULT_FILTER_SIZE) ) { + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) { for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { tests.add(new Object[]{ bandPassSize, integrationLength }); } @@ -133,7 +132,7 @@ public class BandPassIncrementalActivityProfileUnitTest extends BaseTest { @Test( dataProvider = "BandPassComposition") public void testBandPassComposition(final int bandPassSize, final int integrationLength) { final int start = 1; - final BandPassIncrementalActivityProfile profile = new BandPassIncrementalActivityProfile(genomeLocParser, bandPassSize); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize); final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; // add a buffer so that we can get all of the band pass values From 8026199e4cfc2cb7dba2b8f9129b7060a7a86ef9 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 09:47:22 -0500 Subject: [PATCH 20/46] Updating md5s for CountReadsInActiveRegions and HaplotypeCaller to reflect new activity profile mechanics -- In this process I discovered a few missed sites in the old code. The new approach actually produces better HC results than the previous version. --- .../HaplotypeCallerIntegrationTest.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 939b9873c..41f9ab680 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "b8f7b741445ce6b6ea491c794ce75c17"); + HCTest(CEUTRIO_BAM, "", "c142bc73447c72286ca48f4a4966d9b6"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "a2c63f6e6e51a01019bdbd23125bdb15"); + HCTest(NA12878_BAM, "", "d172eb9447015ea50220c6947be145ea"); } @Test(enabled = false) @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "c679ae7f04bdfda896b5c046d35e043c"); + "2c56ffc3b7fbbf154ae9ca355780a78f"); } private void HCTestComplexGGA(String bam, String args, String md5) { @@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "8730a9ebaeecae913dca2fb5a0d4e946"); + "66bd513d25b691a5b0c5084924b4a308"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "d590c8d6d5e58d685401b65a23846893"); + "d0fcbfa2ccce0ca4a2e81f31dc43d79d"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "6c0c441b71848c2eea38ab5e2afe1120"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7e8a6ed62f866fc47c92af0e255ca180"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "29f1125df5ab27cc937a144ae08ac735"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "602aabbbe710ac90b16e474c869e8a86"); } // That problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -146,14 +146,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("31db0a2d9eb07f86e0a89f0d97169072")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c23b589be3072027ff2da93067dbf549")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("add0f4f51969b7caeea99005a7ba1aa4")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a612fe84dd7f80c4ad2d20d27fc6744e")); executeTest("HCTestStructuralIndels: ", spec); } @@ -175,7 +175,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("8a400b0c46f41447fcc35a907e34f384")); + Arrays.asList("0cb9132967fa9811e04f528be9f686dc")); executeTest("HC calling on a ReducedRead BAM", spec); } @@ -183,7 +183,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void testReducedBamWithReadsNotFullySpanningDeletion() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "reduced.readNotFullySpanningDeletion.bam -o %s -L 1:167871297", 1, - Arrays.asList("4e8121dd9dc90478f237bd6ae4d19920")); + Arrays.asList("36a90309dde1a325c274388e302ffaa5")); executeTest("test calling on a ReducedRead BAM where the reads do not fully span a deletion", spec); } } From 09edc6baebe835cce54a9d77caac4406d9c43dd3 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 13:44:40 -0500 Subject: [PATCH 21/46] TraverseActiveRegions now writes out very nice active region and activity profile IGV formatted files --- .../traversals/TraverseActiveRegions.java | 169 +++++++++++++----- .../gatk/walkers/ActiveRegionWalker.java | 20 ++- .../utils/activeregion/ActiveRegion.java | 7 +- .../utils/activeregion/ActivityProfile.java | 6 +- 4 files changed, 156 insertions(+), 46 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 071b4d806..ac1c751ca 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -25,6 +25,7 @@ package org.broadinstitute.sting.gatk.traversals; +import com.google.java.contract.Ensures; import com.google.java.contract.Requires; import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; @@ -38,10 +39,12 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.utils.GenomeLoc; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.activeregion.*; import org.broadinstitute.sting.utils.progressmeter.ProgressMeter; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import java.io.PrintStream; import java.util.*; /** @@ -79,26 +82,35 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine 0" + }) + private void printIGVFormatHeader(final PrintStream out, final String graphType, final String ... columns ) { + out.printf("#track graphType=%s%n", graphType); + out.printf("Chromosome\tStart\tEnd\tFeature\t%s%n", Utils.join("\t", columns)); + + } + + /** + * Helper function to write out a IGV formatted line to out, at loc, with values + * + * http://www.broadinstitute.org/software/igv/IGV + * + * @param out a non-null PrintStream where we'll write our line + * @param loc the location of values + * @param featureName string name of this feature (see IGV format) + * @param values the floating point values to associate with loc and feature name in out + */ + @Requires({ + "out != null", + "loc != null", + "values.length > 0" + }) + private void printIGVFormatRow(final PrintStream out, final GenomeLoc loc, final String featureName, final double ... values) { + // note that start and stop are 0 based, but the stop is exclusive so we don't subtract 1 + out.printf("%s\t%d\t%d\t%s", loc.getContig(), loc.getStart() - 1, loc.getStop(), featureName); + for ( final double value : values ) + out.print(String.format("\t%.3f", value)); + out.println(); + } + + /** + * Write out activity profile information, if requested by the walker + * + * @param states the states in the current activity profile + */ + @Requires("states != null") + private void writeActivityProfile(final List states) { + if ( walker.activityProfileOutStream != null ) { + initializeOutputStreamsIfNecessary(); + for ( final ActivityProfileState state : states ) { + printIGVFormatRow(walker.activityProfileOutStream, state.getLoc(), "state", state.isActiveProb); + } + } + } + + /** + * Write out each active region to the walker activeRegionOutStream + * + * @param region the region we're currently operating on + */ + @Requires("region != null") + private void writeActiveRegion(final ActiveRegion region) { + if( walker.activeRegionOutStream != null ) { + initializeOutputStreamsIfNecessary(); + printIGVFormatRow(walker.activeRegionOutStream, region.getLocation().getStartLocation(), + "end-marker", 0.0); + printIGVFormatRow(walker.activeRegionOutStream, region.getLocation(), + "size=" + region.getLocation().size(), region.isActive ? 1.0 : -1.0); + } + } + + // ------------------------------------------------------------------------------------- // // Functions to process active regions that are ready for map / reduce calls @@ -349,26 +459,6 @@ public class TraverseActiveRegions extends TraversalEngine walker ) { - // Just want to output the active regions to a file, not actually process them - for( final ActiveRegion activeRegion : workQueue ) { - if( activeRegion.isActive ) { - walker.activeRegionOutStream.println( activeRegion.getLocation() ); - } - } } /** @@ -384,23 +474,20 @@ public class TraverseActiveRegions extends TraversalEngine walker) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index e268bba0d..24e512a7b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -61,10 +61,26 @@ import java.util.*; @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { - @Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results bed file", required = false) + /** + * If provided, this walker will write out its activity profile (per bp probabilities of being active) + * to this file in the IGV formatted TAB deliminated output: + * + * http://www.broadinstitute.org/software/igv/IGV + * + * Intended to make debugging the activity profile calculations easier + */ + @Output(fullName="activityProfileOut", shortName="APO", doc="Output the raw activity profile results in IGV format", required = false) public PrintStream activityProfileOutStream = null; - @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this interval list file", required = false) + /** + * If provided, this walker will write out its active and inactive regions + * to this file in the IGV formatted TAB deliminated output: + * + * http://www.broadinstitute.org/software/igv/IGV + * + * Intended to make debugging the active region calculations easier + */ + @Output(fullName="activeRegionOut", shortName="ARO", doc="Output the active region to this IGV formatted file", required = false) public PrintStream activeRegionOutStream = null; @Input(fullName="activeRegionIn", shortName="AR", doc="Use this interval list file as the active regions to process", required = false) diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 8f04c09cb..66485c8cf 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; +import java.util.List; /** * Created by IntelliJ IDEA. @@ -44,6 +45,7 @@ import java.util.ArrayList; public class ActiveRegion implements HasGenomeLocation { private final ArrayList reads = new ArrayList(); + private final List supportingStates; private final GenomeLoc activeRegionLoc; private final GenomeLoc extendedLoc; private final int extension; @@ -51,8 +53,9 @@ public class ActiveRegion implements HasGenomeLocation { private final GenomeLocParser genomeLocParser; public final boolean isActive; - public ActiveRegion( final GenomeLoc activeRegionLoc, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { + public ActiveRegion( final GenomeLoc activeRegionLoc, final List supportingStates, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { this.activeRegionLoc = activeRegionLoc; + this.supportingStates = new ArrayList(supportingStates); this.isActive = isActive; this.genomeLocParser = genomeLocParser; this.extension = extension; @@ -112,6 +115,8 @@ public class ActiveRegion implements HasGenomeLocation { public GenomeLoc getExtendedLoc() { return extendedLoc; } public GenomeLoc getReferenceLoc() { return fullExtentReferenceLoc; } + public List getSupportingStates() { return supportingStates; } + public int getExtension() { return extension; } public int size() { return reads.size(); } public void clearReads() { reads.clear(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index a863d695e..ab9095106 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -336,7 +336,9 @@ public class ActivityProfile { return null; // we need to create the active region, and clip out the states we're extracting from this profile - stateList.subList(0, offsetOfNextRegionEnd + 1).clear(); + final List sub = stateList.subList(0, offsetOfNextRegionEnd + 1); + final List supportingStates = new ArrayList(sub); + sub.clear(); // update the start and stop locations as necessary if ( stateList.isEmpty() ) { @@ -345,7 +347,7 @@ public class ActivityProfile { regionStartLoc = stateList.get(0).getLoc(); } final GenomeLoc regionLoc = parser.createGenomeLoc(first.getLoc().getContig(), first.getLoc().getStart(), first.getLoc().getStart() + offsetOfNextRegionEnd); - return new ActiveRegion(regionLoc, isActiveRegion, parser, activeRegionExtension); + return new ActiveRegion(regionLoc, supportingStates, isActiveRegion, parser, activeRegionExtension); } /** From ee8039bf2559d7257f8c9ddc6724aef8fffea058 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 13:51:58 -0500 Subject: [PATCH 22/46] Fix trivial call in unit test --- .../sting/utils/activeregion/ActivityProfileUnitTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 7cfc5ebb7..311d43206 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -98,7 +98,7 @@ public class ActivityProfileUnitTest extends BaseTest { int start = regionStart.getStart() + startsAndStops[i]; int end = regionStart.getStart() + startsAndStops[i+1] - 1; GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); - ActiveRegion r = new ActiveRegion(activeLoc, isActive, genomeLocParser, extension); + ActiveRegion r = new ActiveRegion(activeLoc, null, isActive, genomeLocParser, extension); l.add(r); isActive = ! isActive; } From 7fcfa4668cad4dd9d6ecf9b9c44b65850dfbfbf1 Mon Sep 17 00:00:00 2001 From: Chris Hartl Date: Wed, 23 Jan 2013 14:47:23 -0500 Subject: [PATCH 27/46] Since GenotypeConcordance is now a standalone walker, remove the old GenotypeConcordance evaluation module and the associated integration tests. --- .../evaluators/GenotypeConcordance.java | 273 ------------------ .../VariantEvalIntegrationTest.java | 16 - 2 files changed, 289 deletions(-) delete mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java deleted file mode 100644 index bc38a63f7..000000000 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypeConcordance.java +++ /dev/null @@ -1,273 +0,0 @@ -/* -* By downloading the PROGRAM you agree to the following terms of use: -* -* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY -* -* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). -* -* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and -* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. -* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: -* -* 1. DEFINITIONS -* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. -* -* 2. LICENSE -* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. -* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. -* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. -* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. -* -* 3. OWNERSHIP OF INTELLECTUAL PROPERTY -* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. -* Copyright 2012 Broad Institute, Inc. -* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. -* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. -* -* 4. INDEMNIFICATION -* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. -* -* 5. NO REPRESENTATIONS OR WARRANTIES -* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. -* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. -* -* 6. ASSIGNMENT -* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. -* -* 7. MISCELLANEOUS -* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. -* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. -* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. -* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. -* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. -* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. -* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. -*/ - -package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators; - -import org.apache.log4j.Logger; -import org.broadinstitute.sting.gatk.contexts.AlignmentContext; -import org.broadinstitute.sting.gatk.contexts.ReferenceContext; -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis; -import org.broadinstitute.sting.gatk.walkers.varianteval.util.Molten; -import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.variant.variantcontext.Genotype; -import org.broadinstitute.variant.variantcontext.GenotypeType; -import org.broadinstitute.variant.variantcontext.VariantContext; - -import java.util.*; - -/* - * Copyright (c) 2010 The Broad Institute - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, - * copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following - * conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR - * THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * a table of sample names to genotype concordance figures - */ -@Analysis(name = "Genotype Concordance Detailed", description = "Determine the genotype concordance between the genotypes in difference tracks, and concordance statistics") -public class GenotypeConcordance extends VariantEvaluator { - protected final static Logger logger = Logger.getLogger(GenotypeConcordance.class); - - @Molten(variableFormat = "%s", valueFormat = "%s") - public final Map map = new TreeMap(); - - // concordance counts - private final long[][] truthByCalledGenotypeCounts; - - /** - * Initialize this object - */ - public GenotypeConcordance() { - final int nGenotypeTypes = GenotypeType.values().length; - truthByCalledGenotypeCounts = new long[nGenotypeTypes][nGenotypeTypes]; - } - - @Override - public int getComparisonOrder() { - return 2; - } - - @Override - public void update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { - // sanity check that we at least have either eval or validation data - if ( (validation != null && !validation.hasGenotypes()) || eval == null && !isValidVC(validation)) { - return; - } else { - final boolean validationIsValidVC = isValidVC(validation); - - // determine concordance for eval data - if (eval != null) { - for (final Genotype g : eval.getGenotypes() ) { - final String sample = g.getSampleName(); - final GenotypeType called = g.getType(); - final GenotypeType truth; - - if (!validationIsValidVC || !validation.hasGenotype(sample)) { - truth = GenotypeType.NO_CALL; - } else { - truth = validation.getGenotype(sample).getType(); - } - - incrValue(truth, called); - } - } - - // otherwise, mark no-calls for all samples - else { - final GenotypeType called = GenotypeType.NO_CALL; - - for (final Genotype g : validation.getGenotypes()) { - final GenotypeType truth = g.getType(); - incrValue(truth, called); - - // print out interesting sites - /* - if ( PRINT_INTERESTING_SITES && super.getVEWalker().gcLog != null ) { - if ( (truth == GenotypeType.HOM_VAR || truth == GenotypeType.HET) && called == GenotypeType.NO_CALL ) { - super.getVEWalker().gcLog.printf("%s FN %s%n", group, validation); - } - if ( (called == GenotypeType.HOM_VAR || called == GenotypeType.HET) && truth == GenotypeType.HOM_REF ) { - super.getVEWalker().gcLog.printf("%s FP %s%n", group, validation); - } - } - */ - } - } - } - } - - private static boolean isValidVC(final VariantContext vc) { - return (vc != null && !vc.isFiltered()); - } - - /** - * increment the specified value - * @param truth the truth type - * @param called the called type - */ - private void incrValue(final GenotypeType truth, final GenotypeType called) { - truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]++; - } - - private long count(final GenotypeType truth, final GenotypeType called) { - return truthByCalledGenotypeCounts[truth.ordinal()][called.ordinal()]; - } - - private long count(final EnumSet truth, final GenotypeType called) { - return count(truth, EnumSet.of(called)); - } - - private long count(final GenotypeType truth, final EnumSet called) { - return count(EnumSet.of(truth), called); - } - - private long count(final EnumSet truth, final EnumSet called) { - long sum = 0; - for ( final GenotypeType truth1 : truth ) { - for ( final GenotypeType called1 : called ) { - sum += count(truth1, called1); - } - } - return sum; - } - - private long countDiag( final EnumSet d1 ) { - long sum = 0; - - for(final GenotypeType e1 : d1 ) { - sum += truthByCalledGenotypeCounts[e1.ordinal()][e1.ordinal()]; - } - - return sum; - } - - @Override - public void finalizeEvaluation() { - final EnumSet allVariantGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET); - final EnumSet allCalledGenotypes = EnumSet.of(GenotypeType.HOM_VAR, GenotypeType.HET, GenotypeType.HOM_REF); - final EnumSet allGenotypes = EnumSet.allOf(GenotypeType.class); - - // exact values of the table - for ( final GenotypeType truth : GenotypeType.values() ) { - for ( final GenotypeType called : GenotypeType.values() ) { - final String field = String.format("n_true_%s_called_%s", truth, called); - final Long value = count(truth, called); - map.put(field, value.toString()); - } - } - - // counts of called genotypes - for ( final GenotypeType called : GenotypeType.values() ) { - final String field = String.format("total_called_%s", called); - final Long value = count(allGenotypes, called); - map.put(field, value.toString()); - } - - // counts of true genotypes - for ( final GenotypeType truth : GenotypeType.values() ) { - final String field = String.format("total_true_%s", truth); - final Long value = count(truth, allGenotypes); - map.put(field, value.toString()); - } - - for ( final GenotypeType genotype : GenotypeType.values() ) { - final String field = String.format("percent_%s_called_%s", genotype, genotype); - long numer = count(genotype, genotype); - long denom = count(EnumSet.of(genotype), allGenotypes); - map.put(field, Utils.formattedPercent(numer, denom)); - } - - { - // % non-ref called as non-ref - // MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp) - final String field = "percent_non_reference_sensitivity"; - long numer = count(allVariantGenotypes, allVariantGenotypes); - long denom = count(allVariantGenotypes, allGenotypes); - map.put(field, Utils.formattedPercent(numer, denom)); - } - - { - // overall genotype concordance of sites called in eval track - // MAD: this is the tradition genotype concordance - final String field = "percent_overall_genotype_concordance"; - long numer = countDiag(allCalledGenotypes); - long denom = count(allCalledGenotypes, allCalledGenotypes); - map.put(field, Utils.formattedPercent(numer, denom)); - } - - { - // overall genotype concordance of sites called non-ref in eval track - // MAD: this is the non-reference discrepancy rate - final String field = "percent_non_reference_discrepancy_rate"; - long homrefConcords = count(GenotypeType.HOM_REF, GenotypeType.HOM_REF); - long allNoHomRef = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords; - long numer = allNoHomRef - countDiag(allVariantGenotypes); - long denom = count(allCalledGenotypes, allCalledGenotypes) - homrefConcords; - map.put(field, Utils.formattedPercent(numer, denom)); - } - } -} - diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 748a16db9..d695f2d13 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -334,16 +334,6 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTestParallel("testSelect1", spec); } - @Test - public void testVEGenotypeConcordance() { - String vcfFile = "GenotypeConcordanceEval.vcf"; - - WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -ST CpG --eval:VCF3 " + validationDataLocation + vcfFile + " --comp:VCF3 " + validationDataLocation + "GenotypeConcordanceComp.vcf -noEV -EV GenotypeConcordance -o %s", - 1, - Arrays.asList("810d55b67de592f6375d9dfb282145ef")); - executeTestParallel("testVEGenotypeConcordance" + vcfFile, spec); - } - @Test public void testVEMendelianViolationEvaluator() { String vcfFile = "/MendelianViolationEval.vcf"; @@ -355,12 +345,6 @@ public class VariantEvalIntegrationTest extends WalkerTest { executeTestParallel("testVEMendelianViolationEvaluator" + vcfFile, spec); } - @Test - public void testCompVsEvalAC() { - String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("659a15cc842f0310106fa595a26da71d")); - executeTestParallel("testCompVsEvalAC",spec); - } private static String withSelect(String cmd, String select, String name) { return String.format("%s -select '%s' -selectName %s", cmd, select, name); From 643a508564fc8345a79bf49df42e7fc04661182b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Wed, 23 Jan 2013 20:53:36 -0700 Subject: [PATCH 29/46] Added atlassian intellij plugin file to .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 927caf98d..6e2c2f224 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ dist/ dump/ lib/ out/ +/atlassian-ide-plugin.xml From 6790e103e00704640effc7710e1c0f819a2e8104 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 24 Jan 2013 11:42:49 -0500 Subject: [PATCH 35/46] Moving lots of walkers back from protected to public (along with several of the VA annotations). Let's see whether Mauricio's automatic git hook really works! --- .../sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java | 0 .../sting/gatk/walkers/annotator/AlleleBalance.java | 0 .../sting/gatk/walkers/annotator/AlleleBalanceBySample.java | 0 .../sting/gatk/walkers/annotator/AverageAltAlleleLength.java | 0 .../broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java | 0 .../org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java | 0 .../sting/gatk/walkers/annotator/MappingQualityZeroBySample.java | 0 .../sting/gatk/walkers/annotator/MappingQualityZeroFraction.java | 0 .../broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java | 0 .../org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java | 0 .../sting/gatk/walkers/annotator/TechnologyComposition.java | 0 .../sting/gatk/walkers/annotator/VariantAnnotator.java | 0 .../sting/gatk/walkers/annotator/VariantAnnotatorEngine.java | 0 .../walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java | 0 .../walkers/annotator/interfaces/AnnotationInterfaceManager.java | 0 .../sting/gatk/walkers/annotator/interfaces/AnnotationType.java | 0 .../gatk/walkers/annotator/interfaces/AnnotatorCompatible.java | 0 .../gatk/walkers/annotator/interfaces/ExperimentalAnnotation.java | 0 .../gatk/walkers/annotator/interfaces/GenotypeAnnotation.java | 0 .../gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java | 0 .../gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java | 0 .../gatk/walkers/annotator/interfaces/StandardAnnotation.java | 0 .../walkers/annotator/interfaces/VariantAnnotatorAnnotation.java | 0 .../walkers/annotator/interfaces/WorkInProgressAnnotation.java | 0 .../sting/gatk/walkers/beagle/BeagleOutputToVCF.java | 0 .../sting/gatk/walkers/beagle/ProduceBeagleInput.java | 0 .../sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java | 0 .../sting/gatk/walkers/coverage/DepthOfCoverage.java | 0 .../sting/gatk/walkers/coverage/DepthOfCoverageStats.java | 0 .../sting/gatk/walkers/diffengine/BAMDiffableReader.java | 0 .../sting/gatk/walkers/diffengine/GATKReportDiffableReader.java | 0 .../sting/gatk/walkers/diffengine/VCFDiffableReader.java | 0 .../sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java | 0 .../sting/gatk/walkers/fasta/FastaReferenceMaker.java | 0 .../broadinstitute/sting/gatk/walkers/fasta/FastaSequence.java | 0 .../broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java | 0 .../sting/gatk/walkers/filters/FiltrationContext.java | 0 .../sting/gatk/walkers/filters/FiltrationContextWindow.java | 0 .../sting/gatk/walkers/filters/VariantFiltration.java | 0 .../sting/gatk/walkers/validation/ValidationAmplicons.java | 0 .../sting/gatk/walkers/varianteval/VariantEval.java | 0 .../sting/gatk/walkers/varianteval/VariantEvalReportWriter.java | 0 .../sting/gatk/walkers/varianteval/evaluators/CompOverlap.java | 0 .../sting/gatk/walkers/varianteval/evaluators/CountVariants.java | 0 .../gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java | 0 .../sting/gatk/walkers/varianteval/evaluators/IndelSummary.java | 0 .../varianteval/evaluators/MendelianViolationEvaluator.java | 0 .../gatk/walkers/varianteval/evaluators/MultiallelicSummary.java | 0 .../gatk/walkers/varianteval/evaluators/PrintMissingComp.java | 0 .../sting/gatk/walkers/varianteval/evaluators/StandardEval.java | 0 .../walkers/varianteval/evaluators/ThetaVariantEvaluator.java | 0 .../gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java | 0 .../gatk/walkers/varianteval/evaluators/ValidationReport.java | 0 .../gatk/walkers/varianteval/evaluators/VariantEvaluator.java | 0 .../sting/gatk/walkers/varianteval/evaluators/VariantSummary.java | 0 .../gatk/walkers/varianteval/stratifications/AlleleCount.java | 0 .../gatk/walkers/varianteval/stratifications/AlleleFrequency.java | 0 .../sting/gatk/walkers/varianteval/stratifications/CompRod.java | 0 .../sting/gatk/walkers/varianteval/stratifications/Contig.java | 0 .../sting/gatk/walkers/varianteval/stratifications/CpG.java | 0 .../gatk/walkers/varianteval/stratifications/Degeneracy.java | 0 .../varianteval/stratifications/DynamicStratification.java | 0 .../sting/gatk/walkers/varianteval/stratifications/EvalRod.java | 0 .../sting/gatk/walkers/varianteval/stratifications/Filter.java | 0 .../gatk/walkers/varianteval/stratifications/FunctionalClass.java | 0 .../sting/gatk/walkers/varianteval/stratifications/IndelSize.java | 0 .../varianteval/stratifications/IntervalStratification.java | 0 .../gatk/walkers/varianteval/stratifications/JexlExpression.java | 0 .../sting/gatk/walkers/varianteval/stratifications/Novelty.java | 0 .../gatk/walkers/varianteval/stratifications/OneBPIndel.java | 0 .../varianteval/stratifications/RequiredStratification.java | 0 .../sting/gatk/walkers/varianteval/stratifications/Sample.java | 0 .../varianteval/stratifications/StandardStratification.java | 0 .../gatk/walkers/varianteval/stratifications/TandemRepeat.java | 0 .../walkers/varianteval/stratifications/VariantStratifier.java | 0 .../gatk/walkers/varianteval/stratifications/VariantType.java | 0 .../walkers/varianteval/stratifications/manager/StratNode.java | 0 .../varianteval/stratifications/manager/StratNodeIterator.java | 0 .../stratifications/manager/StratificationManager.java | 0 .../walkers/varianteval/stratifications/manager/Stratifier.java | 0 .../sting/gatk/walkers/varianteval/util/Analysis.java | 0 .../gatk/walkers/varianteval/util/AnalysisModuleScanner.java | 0 .../sting/gatk/walkers/varianteval/util/DataPoint.java | 0 .../sting/gatk/walkers/varianteval/util/EvaluationContext.java | 0 .../sting/gatk/walkers/varianteval/util/Molten.java | 0 .../gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java | 0 .../sting/gatk/walkers/varianteval/util/VariantEvalUtils.java | 0 .../gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java | 0 .../sting/gatk/walkers/variantutils/ConcordanceMetrics.java | 0 .../sting/gatk/walkers/variantutils/FilterLiftedVariants.java | 0 .../sting/gatk/walkers/variantutils/GenotypeConcordance.java | 0 .../sting/gatk/walkers/variantutils/LeftAlignVariants.java | 0 .../sting/gatk/walkers/variantutils/LiftoverVariants.java | 0 .../sting/gatk/walkers/variantutils/RandomlySplitVariants.java | 0 .../sting/gatk/walkers/variantutils/SelectHeaders.java | 0 .../sting/gatk/walkers/variantutils/ValidateVariants.java | 0 .../gatk/walkers/variantutils/VariantValidationAssessor.java | 0 .../sting/gatk/walkers/variantutils/VariantsToBinaryPed.java | 0 .../sting/gatk/walkers/variantutils/VariantsToTable.java | 0 .../sting/gatk/walkers/variantutils/VariantsToVCF.java | 0 .../sting/utils/genotyper/PerReadAlleleLikelihoodMap.java | 0 101 files changed, 0 insertions(+), 0 deletions(-) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationInterfaceManager.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationType.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ExperimentalAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/StandardAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/WorkInProgressAnnotation.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaSequence.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/StandardEval.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/RequiredStratification.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StandardStratification.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNode.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNodeIterator.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/Stratifier.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java (100%) rename {protected => public}/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java (100%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java rename to public/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AverageAltAlleleLength.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ActiveRegionBasedAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationInterfaceManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationInterfaceManager.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationInterfaceManager.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationInterfaceManager.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationType.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationType.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotationType.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatible.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ExperimentalAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ExperimentalAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ExperimentalAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/ExperimentalAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/GenotypeAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/InfoFieldAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/RodRequiringAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/StandardAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/StandardAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/StandardAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/StandardAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/WorkInProgressAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/WorkInProgressAnnotation.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/WorkInProgressAnnotation.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/WorkInProgressAnnotation.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCF.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInput.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/VariantsToBeagleUnphased.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageStats.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/BAMDiffableReader.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/GATKReportDiffableReader.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/diffengine/VCFDiffableReader.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceMaker.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaSequence.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaSequence.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaSequence.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaSequence.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/filters/ClusteredSnps.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContext.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/filters/FiltrationContextWindow.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/filters/VariantFiltration.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalReportWriter.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelSummary.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MendelianViolationEvaluator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/MultiallelicSummary.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/PrintMissingComp.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/StandardEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/StandardEval.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/StandardEval.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/StandardEval.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantEvaluator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantSummary.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CompRod.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Contig.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/CpG.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/DynamicStratification.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/EvalRod.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Filter.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IndelSize.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/IntervalStratification.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/JexlExpression.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Novelty.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/OneBPIndel.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/RequiredStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/RequiredStratification.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/RequiredStratification.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/RequiredStratification.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Sample.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StandardStratification.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StandardStratification.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StandardStratification.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/StandardStratification.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/TandemRepeat.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantStratifier.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/VariantType.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNode.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNode.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNode.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNode.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNodeIterator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNodeIterator.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNodeIterator.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratNodeIterator.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/StratificationManager.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/Stratifier.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/Stratifier.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/Stratifier.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/manager/Stratifier.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Analysis.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/AnalysisModuleScanner.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/DataPoint.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/EvaluationContext.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/Molten.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/SortableJexlVCMatchExp.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VQSRCalibrationCurve.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ConcordanceMetrics.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/FilterLiftedVariants.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/GenotypeConcordance.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LeftAlignVariants.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/LiftoverVariants.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RandomlySplitVariants.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectHeaders.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/ValidateVariants.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantValidationAssessor.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToBinaryPed.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToVCF.java diff --git a/protected/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java similarity index 100% rename from protected/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java rename to public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java From cd91e365f4a4f0dfb75a2455e80dc3447f676e22 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 14:13:21 -0500 Subject: [PATCH 37/46] Optimize getCurrentContigLength and getLocForOffset in ActivityProfile --- .../sting/utils/activeregion/ActivityProfile.java | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index ab9095106..80484e12f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -49,6 +49,12 @@ public class ActivityProfile { protected GenomeLoc regionStartLoc = null; protected GenomeLoc regionStopLoc = null; + /** + * A cached value of the regionStartLoc contig length, to make calls to + * getCurrentContigLength efficient + */ + protected int contigLength = -1; + /** * Create a new empty ActivityProfile * @param parser the parser we can use to create genome locs, cannot be null @@ -155,7 +161,7 @@ public class ActivityProfile { if ( start < 0 || start > getCurrentContigLength() ) { return null; } else { - return parser.createGenomeLoc(regionStartLoc.getContig(), start); + return parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getContigIndex(), start, start); } } @@ -166,8 +172,7 @@ public class ActivityProfile { @Requires("regionStartLoc != null") @Ensures("result > 0") private int getCurrentContigLength() { - // TODO -- fix performance problem with getContigInfo - return parser.getContigInfo(regionStartLoc.getContig()).getSequenceLength(); + return contigLength; } // -------------------------------------------------------------------------------- @@ -190,6 +195,7 @@ public class ActivityProfile { if ( regionStartLoc == null ) { regionStartLoc = loc; regionStopLoc = loc; + contigLength = parser.getContigInfo(regionStartLoc.getContig()).getSequenceLength(); } else { // TODO -- need to figure out where to add loc as the regions will be popping off the front if ( regionStopLoc.getStart() != loc.getStart() - 1 ) From 9e43a2028d3f172782b648ebf56b6a4bf14d4025 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 14:50:14 -0500 Subject: [PATCH 38/46] Making band pass filter size, sigma, active region max size and extension all accessible from the command line --- .../traversals/TraverseActiveRegions.java | 9 ++++++--- .../gatk/walkers/ActiveRegionExtension.java | 16 +++++++++++++++ .../gatk/walkers/ActiveRegionWalker.java | 20 ++++++++++++++++--- .../broadinstitute/sting/utils/MathUtils.java | 2 +- .../activeregion/BandPassActivityProfile.java | 19 +++++++++++++++--- .../BandPassActivityProfileUnitTest.java | 12 +++++++---- 6 files changed, 64 insertions(+), 14 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index ac1c751ca..21971e189 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -102,11 +102,14 @@ public class TraverseActiveRegions extends TraversalEngine extends Walker> activeRegionBindings = null; + @Advanced + @Argument(fullName="activeRegionExtension", shortName="activeRegionExtension", doc="The active region extension; if not provided defaults to Walker annotated default", required = false) + public Integer activeRegionExtension = null; + + @Advanced + @Argument(fullName="activeRegionMaxSize", shortName="activeRegionMaxSize", doc="The active region maximum size; if not provided defaults to Walker annotated default", required = false) + public Integer activeRegionMaxSize = null; + + @Advanced + @Argument(fullName="bandPassFilterSize", shortName="bandPassFilterSize", doc="The filter size of band pass filter; if not provided defaults to Walker annotated default", required = false) + public Integer bandPassFilterSize = null; + + @Advanced + @Argument(fullName="bandPassSigma", shortName="bandPassSigma", doc="The sigma of the band pass filter Gaussian kernel; if not provided defaults to Walker annotated default", required = false) + public Double bandPassSigma = null; + private GenomeLocSortedSet presetActiveRegions = null; @Override diff --git a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java index f1f0ab9b1..0c3ed87c0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java +++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java @@ -651,7 +651,7 @@ public class MathUtils { final double sum = sum(array); final double[] normalized = new double[array.length]; - if ( sum < 0.0 || sum > 1.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum); + if ( sum < 0.0 ) throw new IllegalArgumentException("Values in probability array sum to a negative number " + sum); for ( int i = 0; i < array.length; i++ ) { normalized[i] = array[i] / sum; } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java index 1a8bac086..5c6389c26 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfile.java @@ -44,8 +44,10 @@ import java.util.LinkedList; */ public class BandPassActivityProfile extends ActivityProfile { public static final int DEFAULT_FILTER_SIZE = 80; + public static final double DEFAULT_SIGMA = 55.0; private final int filterSize; + private final double sigma; private final double[] GaussianKernel; /** @@ -53,7 +55,7 @@ public class BandPassActivityProfile extends ActivityProfile { * @param parser our genome loc parser */ public BandPassActivityProfile(final GenomeLocParser parser) { - this(parser, DEFAULT_FILTER_SIZE); + this(parser, DEFAULT_FILTER_SIZE, DEFAULT_SIGMA); } /** @@ -63,16 +65,18 @@ public class BandPassActivityProfile extends ActivityProfile { * side that are included in the band. So a filter size of 1 implies that the actual band * is 3 bp, 1 for the center site and 1 on each size. 2 => 5, etc. */ - public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize) { + public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) { super(parser); if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); + if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma); // setup the Gaussian kernel for the band pass filter this.filterSize = filterSize; + this.sigma = sigma; final double[] kernel = new double[getBandSize()]; for( int iii = 0; iii < 2* filterSize + 1; iii++ ) { - kernel[iii] = MathUtils.NormalDistribution(filterSize, 55.0, iii); + kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii); } this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); } @@ -108,6 +112,15 @@ public class BandPassActivityProfile extends ActivityProfile { return filterSize; } + /** + * Get the Gaussian kernel sigma value + * @return a positive double + */ + @Ensures("result >= 0") + public double getSigma() { + return sigma; + } + /** * Get the kernel of this band pass filter. Do not modify returned result * @return the kernel used in this band pass filter diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index a2a85f1d0..0a71bad14 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -64,11 +64,13 @@ public class BandPassActivityProfileUnitTest extends BaseTest { for ( boolean precedingIsActive : Arrays.asList(true, false) ) { for ( int precedingSites: Arrays.asList(0, 1, 10, 100) ) { for ( int bandPassSize : Arrays.asList(0, 1, 10, 100) ) { + for ( double sigma : Arrays.asList(1.0, 2.0, BandPassActivityProfile.DEFAULT_SIGMA) ) { // for ( int start : Arrays.asList(10) ) { // for ( boolean precedingIsActive : Arrays.asList(false) ) { // for ( int precedingSites: Arrays.asList(0) ) { // for ( int bandPassSize : Arrays.asList(1) ) { - tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize }); + tests.add(new Object[]{ start, precedingIsActive, precedingSites, bandPassSize, sigma }); + } } } } @@ -78,10 +80,12 @@ public class BandPassActivityProfileUnitTest extends BaseTest { } @Test(dataProvider = "BandPassBasicTest") - public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize); + public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) { + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma); final int expectedBandSize = bandPassSize * 2 + 1; + Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size"); + Assert.assertEquals(profile.getSigma(), sigma, "Wrong sigma"); Assert.assertEquals(profile.getBandSize(), expectedBandSize, "Wrong expected band size"); final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); @@ -132,7 +136,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test( dataProvider = "BandPassComposition") public void testBandPassComposition(final int bandPassSize, final int integrationLength) { final int start = 1; - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, BandPassActivityProfile.DEFAULT_SIGMA); final double[] rawActiveProbs = new double[integrationLength + bandPassSize * 2]; // add a buffer so that we can get all of the band pass values From 0c94e3d96e0a6c1251bb814fbbd18e9a93511d57 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 15:41:56 -0500 Subject: [PATCH 39/46] Adaptively compute the band pass filter from the sigma, up to a maximum size of 50 bp -- Previously we allowed band pass filter size to be specified along with the sigma. But now that sigma is controllable from walkers and from the command line, we instead compute the filter size given the kernel from the sigma, including all kernel points with p > 1e-5 in the kernel. This means that if you use a smaller kernel you get a small band size and therefore faster ART -- Update, as discussed with Ryan, the sigma and band size to 17 bp for HC (default ART wide) and max band size of 50 bp --- .../traversals/TraverseActiveRegions.java | 3 +- .../gatk/walkers/ActiveRegionExtension.java | 8 --- .../gatk/walkers/ActiveRegionWalker.java | 4 -- .../activeregion/BandPassActivityProfile.java | 52 +++++++++++------- .../activeregion/ActivityProfileUnitTest.java | 4 +- .../BandPassActivityProfileUnitTest.java | 53 ++++++++++++++++++- 6 files changed, 87 insertions(+), 37 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 21971e189..52ac783a9 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -105,11 +105,10 @@ public class TraverseActiveRegions extends TraversalEngine extends Walker 5, etc. + * @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma + * @param sigma the variance of the Gaussian kernel for this band pass filter */ - public BandPassActivityProfile(final GenomeLocParser parser, final int filterSize, final double sigma) { + public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma) { + this(parser, maxFilterSize, sigma, true); + } + + public BandPassActivityProfile(final GenomeLocParser parser, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize) { super(parser); - if ( filterSize < 0 ) throw new IllegalArgumentException("Filter size must be greater than or equal to 0 but got " + filterSize); if ( sigma < 0 ) throw new IllegalArgumentException("Sigma must be greater than or equal to 0 but got " + sigma); // setup the Gaussian kernel for the band pass filter - this.filterSize = filterSize; this.sigma = sigma; - final double[] kernel = new double[getBandSize()]; - for( int iii = 0; iii < 2* filterSize + 1; iii++ ) { + final double[] fullKernel = makeKernel(maxFilterSize, sigma); + this.filterSize = adaptiveFilterSize ? determineFilterSize(fullKernel, MIN_PROB_TO_KEEP_IN_FILTER) : maxFilterSize; + this.GaussianKernel = makeKernel(this.filterSize, sigma); + } + + protected static int determineFilterSize(final double[] kernel, final double minProbToKeepInFilter) { + final int middle = (kernel.length - 1) / 2; + int filterEnd = middle; + while ( filterEnd > 0 ) { + if ( kernel[filterEnd - 1] < minProbToKeepInFilter ) { + break; + } + filterEnd--; + } + return middle - filterEnd; + } + + protected static double[] makeKernel(final int filterSize, final double sigma) { + final int bandSize = 2 * filterSize + 1; + final double[] kernel = new double[bandSize]; + for( int iii = 0; iii < bandSize; iii++ ) { kernel[iii] = MathUtils.NormalDistribution(filterSize, sigma, iii); } - this.GaussianKernel = MathUtils.normalizeFromRealSpace(kernel); + return MathUtils.normalizeFromRealSpace(kernel); } /** diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index 311d43206..bce1722cd 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -87,7 +87,7 @@ public class ActivityProfileUnitTest extends BaseTest { case Base: return new ActivityProfile(genomeLocParser); case BandPass: // zero size => equivalent to ActivityProfile - return new BandPassActivityProfile(genomeLocParser, 0); + return new BandPassActivityProfile(genomeLocParser, 0, 0.01, false); default: throw new IllegalStateException(type.toString()); } } @@ -98,7 +98,7 @@ public class ActivityProfileUnitTest extends BaseTest { int start = regionStart.getStart() + startsAndStops[i]; int end = regionStart.getStart() + startsAndStops[i+1] - 1; GenomeLoc activeLoc = genomeLocParser.createGenomeLoc(regionStart.getContig(), start, end); - ActiveRegion r = new ActiveRegion(activeLoc, null, isActive, genomeLocParser, extension); + ActiveRegion r = new ActiveRegion(activeLoc, Collections.emptyList(), isActive, genomeLocParser, extension); l.add(r); isActive = ! isActive; } diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java index 0a71bad14..ff1c9bdef 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/BandPassActivityProfileUnitTest.java @@ -81,7 +81,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { @Test(dataProvider = "BandPassBasicTest") public void testBandPass(final int start, final boolean precedingIsActive, final int nPrecedingSites, final int bandPassSize, final double sigma) { - final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma); + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, bandPassSize, sigma, false); final int expectedBandSize = bandPassSize * 2 + 1; Assert.assertEquals(profile.getFilteredSize(), bandPassSize, "Wrong filter size"); @@ -124,7 +124,7 @@ public class BandPassActivityProfileUnitTest extends BaseTest { public Object[][] makeBandPassComposition() { final List tests = new LinkedList(); - for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.DEFAULT_FILTER_SIZE) ) { + for ( int bandPassSize : Arrays.asList(0, 1, 10, 100, BandPassActivityProfile.MAX_FILTER_SIZE) ) { for ( int integrationLength : Arrays.asList(1, 10, 100, 1000) ) { tests.add(new Object[]{ bandPassSize, integrationLength }); } @@ -167,4 +167,53 @@ public class BandPassActivityProfileUnitTest extends BaseTest { Assert.assertEquals(profile.getStateList().get(j).isActiveProb, expectedProbs[j], "State probability not expected at " + j); } } + + // ------------------------------------------------------------------------------------ + // + // Code to test the creation of the kernels + // + // ------------------------------------------------------------------------------------ + + /** + + kernel <- function(sd, pThres) { + raw = dnorm(-80:81, mean=0, sd=sd) + norm = raw / sum(raw) + bad = norm < pThres + paste(norm[! bad], collapse=", ") + } + + print(kernel(0.01, 1e-5)) + print(kernel(1, 1e-5)) + print(kernel(5, 1e-5)) + print(kernel(17, 1e-5)) + + * @return + */ + + @DataProvider(name = "KernelCreation") + public Object[][] makeKernelCreation() { + final List tests = new LinkedList(); + + tests.add(new Object[]{ 0.01, 1000, new double[]{1.0}}); + tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}}); + tests.add(new Object[]{ 1.0, 0, new double[]{1.0}}); + tests.add(new Object[]{ 1.0, 1, new double[]{0.2740686, 0.4518628, 0.2740686}}); + tests.add(new Object[]{ 1.0, 2, new double[]{0.05448868, 0.24420134, 0.40261995, 0.24420134, 0.05448868}}); + tests.add(new Object[]{ 1.0, 1000, new double[]{0.0001338302, 0.004431848, 0.053990966, 0.241970723, 0.398942278, 0.241970723, 0.053990966, 0.004431848, 0.0001338302}}); + tests.add(new Object[]{ 5.0, 1000, new double[]{1.1788613551308e-05, 2.67660451529771e-05, 5.83893851582921e-05, 0.000122380386022754, 0.000246443833694604, 0.000476817640292968, 0.000886369682387602, 0.00158309031659599, 0.00271659384673712, 0.00447890605896858, 0.00709491856924629, 0.0107981933026376, 0.0157900316601788, 0.0221841669358911, 0.029945493127149, 0.0388372109966426, 0.0483941449038287, 0.0579383105522965, 0.0666449205783599, 0.0736540280606647, 0.0782085387950912, 0.0797884560802865, 0.0782085387950912, 0.0736540280606647, 0.0666449205783599, 0.0579383105522965, 0.0483941449038287, 0.0388372109966426, 0.029945493127149, 0.0221841669358911, 0.0157900316601788, 0.0107981933026376, 0.00709491856924629, 0.00447890605896858, 0.00271659384673712, 0.00158309031659599, 0.000886369682387602, 0.000476817640292968, 0.000246443833694604, 0.000122380386022754, 5.83893851582921e-05, 2.67660451529771e-05, 1.1788613551308e-05}}); + tests.add(new Object[]{17.0, 1000, new double[]{1.25162575710745e-05, 1.57001772728555e-05, 1.96260034693739e-05, 2.44487374842009e-05, 3.03513668801384e-05, 3.75489089511911e-05, 4.62928204154855e-05, 5.68757597480354e-05, 6.96366758708924e-05, 8.49661819944029e-05, 0.000103312156275406, 0.000125185491708561, 0.000151165896477646, 0.000181907623161359, 0.000218144981137171, 0.000260697461819069, 0.000310474281706066, 0.000368478124457557, 0.000435807841336874, 0.00051365985048857, 0.000603327960854364, 0.000706201337376934, 0.000823760321812988, 0.000957569829285965, 0.00110927005589186, 0.00128056425833231, 0.00147320340358764, 0.00168896753568649, 0.00192964376796036, 0.00219700088266432, 0.00249276060490197, 0.00281856571330067, 0.00317594525418154, 0.00356627723683793, 0.00399074930220799, 0.00445031797242299, 0.00494566720070898, 0.00547716704583487, 0.00604483338842317, 0.00664828968356621, 0.00728673180099395, 0.00795889703644795, 0.00866303838230695, 0.00939690511889675, 0.0101577307281371, 0.010942229037054, 0.0117465993701676, 0.0125665413280325, 0.0133972796167302, 0.0142335991336574, 0.0150698902735454, 0.0159002041614507, 0.0167183172536454, 0.0175178044808441, 0.0182921198494897, 0.0190346831745763, 0.0197389714002676, 0.020398612780527, 0.0210074820484496, 0.0215597946062309, 0.0220501977225941, 0.022473856734247, 0.0228265343139947, 0.0231046609899767, 0.0233053952756892, 0.0234266719946158, 0.0234672376502799, 0.0234266719946158, 0.0233053952756892, 0.0231046609899767, 0.0228265343139947, 0.022473856734247, 0.0220501977225941, 0.0215597946062309, 0.0210074820484496, 0.020398612780527, 0.0197389714002676, 0.0190346831745763, 0.0182921198494897, 0.0175178044808441, 0.0167183172536454, 0.0159002041614507, 0.0150698902735454, 0.0142335991336574, 0.0133972796167302, 0.0125665413280325, 0.0117465993701676, 0.010942229037054, 0.0101577307281371, 0.00939690511889675, 0.00866303838230695, 0.00795889703644795, 0.00728673180099395, 0.00664828968356621, 0.00604483338842317, 0.00547716704583487, 0.00494566720070898, 0.00445031797242299, 0.00399074930220799, 0.00356627723683793, 0.00317594525418154, 0.00281856571330067, 0.00249276060490197, 0.00219700088266432, 0.00192964376796036, 0.00168896753568649, 0.00147320340358764, 0.00128056425833231, 0.00110927005589186, 0.000957569829285965, 0.000823760321812988, 0.000706201337376934, 0.000603327960854364, 0.00051365985048857, 0.000435807841336874, 0.000368478124457557, 0.000310474281706066, 0.000260697461819069, 0.000218144981137171, 0.000181907623161359, 0.000151165896477646, 0.000125185491708561, 0.000103312156275406, 8.49661819944029e-05, 6.96366758708924e-05, 5.68757597480354e-05, 4.62928204154855e-05, 3.75489089511911e-05, 3.03513668801384e-05, 2.44487374842009e-05, 1.96260034693739e-05, 1.57001772728555e-05, 1.25162575710745e-05}}); + + return tests.toArray(new Object[][]{}); + } + + @Test( dataProvider = "KernelCreation") + public void testKernelCreation(final double sigma, final int maxSize, final double[] expectedKernel) { + final BandPassActivityProfile profile = new BandPassActivityProfile(genomeLocParser, maxSize, sigma, true); + + final double[] kernel = profile.getKernel(); + Assert.assertEquals(kernel.length, expectedKernel.length); + for ( int i = 0; i < kernel.length; i++ ) + Assert.assertEquals(kernel[i], expectedKernel[i], 1e-3, "Kernels not equal at " + i); + } } From c96b64973ab7b943fb489ac7513ad35bae4a2bbf Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 23 Jan 2013 15:52:00 -0500 Subject: [PATCH 40/46] Soft clip probability propagation is capped by the MAX_PROB_PROPAGATION_DISTANCE, which is 50 bp --- .../sting/utils/activeregion/ActivityProfile.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 80484e12f..16cb2fd84 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -40,7 +40,7 @@ import java.util.*; * @since Date created */ public class ActivityProfile { - private final static int MAX_PROB_PROPOGATION_DISTANCE = 10; + private final static int MAX_PROB_PROPOGATION_DISTANCE = 50; private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author protected final List stateList; @@ -259,7 +259,8 @@ public class ActivityProfile { if ( justAddedState.resultState.equals(ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS) ) { // special code to deal with the problem that high quality soft clipped bases aren't added to pileups final List states = new LinkedList(); - final int numHQClips = justAddedState.resultValue.intValue(); + // add no more than the max prob propagation distance num HQ clips + final int numHQClips = Math.min(justAddedState.resultValue.intValue(), getMaxProbPropagationDistance()); for( int jjj = - numHQClips; jjj <= numHQClips; jjj++ ) { final GenomeLoc loc = getLocForOffset(justAddedState.getLoc(), jjj); if ( loc != null ) From 592f90aaefdb46315862db069e57e4397bb558cc Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 24 Jan 2013 11:28:02 -0500 Subject: [PATCH 41/46] ActivityProfile now cuts intelligently at the best local minimum when in a larger than max size active region -- This new algorithm is essential to properly handle activity profiles that have many large active regions generated from lots of dense variant events. The new algorithm passes unit tests and passes visualize visual inspection of both running on 1000G and NA12878 -- Misc. commenting of the code -- Updated ActiveRegionExtension to include a min active region size -- Renamed ActiveRegionExtension to ActiveRegionTraversalParameters, as it carries more than just the traversal extension now --- .../targets/FindCoveredIntervals.java | 4 +- .../haplotypecaller/HaplotypeCaller.java | 2 +- .../traversals/TraverseActiveRegions.java | 14 +- ...a => ActiveRegionTraversalParameters.java} | 36 +++- .../gatk/walkers/ActiveRegionWalker.java | 4 +- .../utils/activeregion/ActivityProfile.java | 82 +++++++-- .../activeregion/ActivityProfileState.java | 5 +- .../activeregion/ActivityProfileUnitTest.java | 170 +++++++++++++++++- 8 files changed, 283 insertions(+), 34 deletions(-) rename public/java/src/org/broadinstitute/sting/gatk/walkers/{ActiveRegionExtension.java => ActiveRegionTraversalParameters.java} (54%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java index 74ff77e4b..3712a8e51 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java @@ -52,7 +52,7 @@ import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.PartitionBy; import org.broadinstitute.sting.gatk.walkers.PartitionType; @@ -64,7 +64,7 @@ import java.io.PrintStream; @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.CONTIG) -@ActiveRegionExtension(extension = 0, maxRegion = 50000) +@ActiveRegionTraversalParameters(extension = 0, maxRegion = 50000) public class FindCoveredIntervals extends ActiveRegionWalker { @Output(required = true) private PrintStream out; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 9bb04421c..a3d764141 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -131,7 +131,7 @@ import java.util.*; @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} ) @PartitionBy(PartitionType.LOCUS) @BAQMode(ApplicationTime = ReadTransformer.ApplicationTime.FORBIDDEN) -@ActiveRegionExtension(extension=65, maxRegion=300) +@ActiveRegionTraversalParameters(extension=65, maxRegion=300) //@Downsample(by= DownsampleType.BY_SAMPLE, toCoverage=5) public class HaplotypeCaller extends ActiveRegionWalker implements AnnotatorCompatible { diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java index 52ac783a9..bff696f13 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java +++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java @@ -34,7 +34,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.datasources.providers.*; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; -import org.broadinstitute.sting.gatk.walkers.ActiveRegionExtension; +import org.broadinstitute.sting.gatk.walkers.ActiveRegionTraversalParameters; import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker; import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.Walker; @@ -75,6 +75,7 @@ public class TraverseActiveRegions extends TraversalEngine workQueue = new LinkedList(); @@ -102,9 +103,10 @@ public class TraverseActiveRegions extends TraversalEngine extends TraversalEngine extends TraversalEngine extends TraversalEngine walker, T sum, final boolean flushActivityProfile, final boolean forceAllRegionsToBeActive) { if ( ! walkerHasPresetRegions ) { // We don't have preset regions, so we get our regions from the activity profile - final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMaxRegionSize(), flushActivityProfile); + final Collection activeRegions = activityProfile.popReadyActiveRegions(getActiveRegionExtension(), getMinRegionSize(), getMaxRegionSize(), flushActivityProfile); workQueue.addAll(activeRegions); if ( ! activeRegions.isEmpty() && logger.isDebugEnabled() ) logger.debug("Integrated " + activityProfile.size() + " isActive calls into " + activeRegions.size() + " regions." ); } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionExtension.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionTraversalParameters.java similarity index 54% rename from public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionExtension.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionTraversalParameters.java index 72c409f62..cdb45db7b 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionExtension.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionTraversalParameters.java @@ -33,7 +33,8 @@ import java.lang.annotation.Retention; import java.lang.annotation.RetentionPolicy; /** - * Describes the size of the buffer region that is added to each active region when pulling in covered reads. + * Describes the parameters that this walker requires of the active region traversal + * * User: rpoplin * Date: 1/18/12 */ @@ -41,13 +42,40 @@ import java.lang.annotation.RetentionPolicy; @Inherited @Retention(RetentionPolicy.RUNTIME) -public @interface ActiveRegionExtension { +public @interface ActiveRegionTraversalParameters { + /** + * How far to either side of the active region itself should we include reads? + * + * That is, if the active region is 10 bp wide, and extension is 5, ART will provide + * the walker with active regions 10 bp, with 5 bp of extension on either side, and + * all reads that cover the 20 bp of the region + extension. + * + * @return the size of the active region extension we'd like + */ public int extension() default 0; + + /** + * The minimum number of bp for an active region, when we need to chop it up into pieces because + * it's become too big. This only comes into effect when there's literally no good place to chop + * that does make the region smaller than this value. + * + * @return the min size in bp of regions + */ + public int minRegion() default 50; + + /** + * The maximum size in bp of active regions wanted by this walker + * + * Active regions larger than this value are automatically cut up by ART into smaller + * regions of size <= this value. + * + * @return the max size in bp of regions + */ public int maxRegion() default 1500; /** - * The sigma value for the Gaussian kernel of the band pass filter - * @return + * The variance value for the Gaussian kernel of the band pass filter employed by ART + * @return the breadth of the band pass gaussian kernel we want for our traversal */ public double bandPassSigma() default BandPassActivityProfile.DEFAULT_SIGMA; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java index 92504e3ba..e14e50b1a 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java @@ -55,7 +55,7 @@ import java.util.*; @By(DataSource.READS) @Requires({DataSource.READS, DataSource.REFERENCE}) @PartitionBy(PartitionType.READ) -@ActiveRegionExtension(extension=50,maxRegion=1500) +@ActiveRegionTraversalParameters(extension=50,maxRegion=1500) @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, MappingQualityUnavailableFilter.class}) @RemoveProgramRecords public abstract class ActiveRegionWalker extends Walker { @@ -160,7 +160,7 @@ public abstract class ActiveRegionWalker extends Walker allIntervals = new ArrayList(); for( final GenomeLoc interval : intervals.toList() ) { final int start = Math.max( 1, interval.getStart() - activeRegionExtension ); diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java index 16cb2fd84..f265f9d60 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java @@ -41,7 +41,7 @@ import java.util.*; */ public class ActivityProfile { private final static int MAX_PROB_PROPOGATION_DISTANCE = 50; - private final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author + protected final static double ACTIVE_PROB_THRESHOLD = 0.002; // TODO: needs to be set-able by the walker author protected final List stateList; protected final GenomeLocParser parser; @@ -197,7 +197,6 @@ public class ActivityProfile { regionStopLoc = loc; contigLength = parser.getContigInfo(regionStartLoc.getContig()).getSequenceLength(); } else { - // TODO -- need to figure out where to add loc as the regions will be popping off the front if ( regionStopLoc.getStart() != loc.getStart() - 1 ) throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc ); regionStopLoc = loc; @@ -294,6 +293,7 @@ public class ActivityProfile { * No returned region will be larger than maxRegionSize. * * @param activeRegionExtension the extension value to provide to the constructed regions + * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large * @param maxRegionSize the maximize size of the returned region * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the * stateList. Used to close out the active region when we've hit some kind of end (such @@ -301,14 +301,15 @@ public class ActivityProfile { * @return a non-null list of active regions */ @Ensures("result != null") - public List popReadyActiveRegions(final int activeRegionExtension, final int maxRegionSize, final boolean forceConversion) { + public List popReadyActiveRegions(final int activeRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) { if ( activeRegionExtension < 0 ) throw new IllegalArgumentException("activeRegionExtension must be >= 0 but got " + activeRegionExtension); + if ( minRegionSize < 1 ) throw new IllegalArgumentException("minRegionSize must be >= 1 but got " + minRegionSize); if ( maxRegionSize < 1 ) throw new IllegalArgumentException("maxRegionSize must be >= 1 but got " + maxRegionSize); final LinkedList regions = new LinkedList(); while ( true ) { - final ActiveRegion nextRegion = popNextReadyActiveRegion(activeRegionExtension, maxRegionSize, forceConversion); + final ActiveRegion nextRegion = popNextReadyActiveRegion(activeRegionExtension, minRegionSize, maxRegionSize, forceConversion); if ( nextRegion == null ) return regions; else { @@ -325,19 +326,20 @@ public class ActivityProfile { * are also updated. * * @param activeRegionExtension the extension value to provide to the constructed regions + * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large * @param maxRegionSize the maximize size of the returned region * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the * stateList. Used to close out the active region when we've hit some kind of end (such * as the end of the contig) * @return a fully formed active region, or null if none can be made */ - private ActiveRegion popNextReadyActiveRegion(final int activeRegionExtension, final int maxRegionSize, final boolean forceConversion) { + private ActiveRegion popNextReadyActiveRegion(final int activeRegionExtension, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) { if ( stateList.isEmpty() ) return null; final ActivityProfileState first = stateList.get(0); final boolean isActiveRegion = first.isActiveProb > ACTIVE_PROB_THRESHOLD; - final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, maxRegionSize, forceConversion); + final int offsetOfNextRegionEnd = findEndOfRegion(isActiveRegion, minRegionSize, maxRegionSize, forceConversion); if ( offsetOfNextRegionEnd == -1 ) // couldn't find a valid ending offset, so we return null return null; @@ -363,9 +365,14 @@ public class ActivityProfile { * The current region is defined from the start of the stateList, looking for elements that have the same isActiveRegion * flag (i.e., if isActiveRegion is true we are looking for states with isActiveProb > threshold, or alternatively * for states < threshold). The maximize size of the returned region is maxRegionSize. If forceConversion is - * true, then we'll return the region end even if this isn't safely beyond the max prob propogation distance. + * true, then we'll return the region end even if this isn't safely beyond the max prob propagation distance. + * + * Note that if isActiveRegion is true, and we can construct a active region > maxRegionSize in bp, we + * find the further local minimum within that max region, and cut the region there, under the constraint + * that the resulting region must be at least minRegionSize in bp. * * @param isActiveRegion is the region we're looking for an active region or inactive region? + * @param minRegionSize the minimum region size, in the case where we have to cut up regions that are too large * @param maxRegionSize the maximize size of the returned region * @param forceConversion if true, we'll return a region whose end isn't sufficiently far from the end of the * stateList. Used to close out the active region when we've hit some kind of end (such @@ -376,16 +383,65 @@ public class ActivityProfile { "result >= -1", "result == -1 || result < maxRegionSize", "! (result == -1 && forceConversion)"}) - private int findEndOfRegion(final boolean isActiveRegion, final int maxRegionSize, final boolean forceConversion) { - int i = 0; - while ( i < stateList.size() && i < maxRegionSize ) { - if ( stateList.get(i).isActiveProb > ACTIVE_PROB_THRESHOLD != isActiveRegion ) { + private int findEndOfRegion(final boolean isActiveRegion, final int minRegionSize, final int maxRegionSize, final boolean forceConversion) { + final int nStates = stateList.size(); + int endOfActiveRegion = 0; + while ( endOfActiveRegion < nStates && endOfActiveRegion < maxRegionSize ) { + if ( getProb(endOfActiveRegion) > ACTIVE_PROB_THRESHOLD != isActiveRegion ) { break; } - i++; + endOfActiveRegion++; + } + + if ( isActiveRegion && endOfActiveRegion == maxRegionSize ) { + // we've run to the end of the region, let's find a good place to cut + int minI = endOfActiveRegion - 1; + double minP = Double.MAX_VALUE; + for ( int i = minI; i >= minRegionSize - 1; i-- ) { + double cur = getProb(i); + if ( cur < minP && isMinimum(i) ) { + minP = cur; + minI = i; + } + } + + endOfActiveRegion = minI + 1; } // we're one past the end, so i must be decremented - return forceConversion || i + getMaxProbPropagationDistance() < stateList.size() ? i - 1 : -1; + return forceConversion || endOfActiveRegion + getMaxProbPropagationDistance() < stateList.size() ? endOfActiveRegion - 1 : -1; + } + + /** + * Helper function to get the probability of the state at offset index + * @param index a valid offset into the state list + * @return the isActiveProb of the state at index + */ + @Requires({"index >= 0", "index < stateList.size()"}) + private double getProb(final int index) { + return stateList.get(index).isActiveProb; + } + + /** + * Is the probability at index in a local minimum? + * + * Checks that the probability at index is <= both the probabilities to either side. + * Returns false if index is at the end or the start of the state list. + * + * @param index the index of the state we want to test + * @return true if prob at state is a minimum, false otherwise + */ + @Requires({"index >= 0", "index < stateList.size()"}) + private boolean isMinimum(final int index) { + if ( index == stateList.size() - 1 ) + // we cannot be at a minimum if the current position is the last in the state list + return false; + else if ( index < 1 ) + // we cannot be at a minimum if the current position is the first or second + return false; + else { + final double indexP = getProb(index); + return indexP <= getProb(index+1) && indexP < getProb(index-1); + } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java index 272596be3..5bba7db17 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileState.java @@ -59,7 +59,10 @@ public class ActivityProfileState { /** * Create a new ActivityProfileState at loc with probability of being active of isActiveProb that maintains some - * information about the result state and value (TODO RYAN -- what do these mean?) + * information about the result state and value + * + * The only state value in use is HIGH_QUALITY_SOFT_CLIPS, and here the value is interpreted as the number + * of bp affected by the soft clips. * * @param loc the position of the result profile (for debugging purposes) * @param isActiveProb the probability of being active (between 0 and 1) diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java index bce1722cd..f6246b137 100644 --- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java @@ -33,6 +33,7 @@ import net.sf.picard.reference.ReferenceSequenceFile; import org.broadinstitute.sting.BaseTest; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.testng.Assert; @@ -46,6 +47,7 @@ import java.util.*; public class ActivityProfileUnitTest extends BaseTest { + private final static boolean DEBUG = false; private GenomeLocParser genomeLocParser; private GenomeLoc startLoc; @@ -123,7 +125,7 @@ public class ActivityProfileUnitTest extends BaseTest { return BasicActivityProfileTestProvider.getTests(BasicActivityProfileTestProvider.class); } - @Test(dataProvider = "BasicActivityProfileTestProvider") + @Test(enabled = ! DEBUG, dataProvider = "BasicActivityProfileTestProvider") public void testBasicActivityProfile(BasicActivityProfileTestProvider cfg) { ActivityProfile profile = cfg.makeProfile(); @@ -226,7 +228,7 @@ public class ActivityProfileUnitTest extends BaseTest { } - @Test(enabled = true, dataProvider = "RegionCreationTests") + @Test(enabled = !DEBUG, dataProvider = "RegionCreationTests") public void testRegionCreation(final int start, final List probs, int maxRegionSize, final int nParts, final boolean forceConversion, final boolean waitUntilEnd) { final ActivityProfile profile = new ActivityProfile(genomeLocParser); Assert.assertNotNull(profile.toString()); @@ -242,13 +244,13 @@ public class ActivityProfileUnitTest extends BaseTest { Assert.assertNotNull(profile.toString()); if ( ! waitUntilEnd ) { - final List regions = profile.popReadyActiveRegions(0, maxRegionSize, false); + final List regions = profile.popReadyActiveRegions(0, 1, maxRegionSize, false); lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites); } } if ( waitUntilEnd || forceConversion ) { - final List regions = profile.popReadyActiveRegions(0, maxRegionSize, forceConversion); + final List regions = profile.popReadyActiveRegions(0, 1, maxRegionSize, forceConversion); lastRegion = assertGoodRegions(start, regions, maxRegionSize, lastRegion, probs, seenSites); } @@ -312,7 +314,7 @@ public class ActivityProfileUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "SoftClipsTest") + @Test(enabled = ! DEBUG, dataProvider = "SoftClipsTest") public void testSoftClips(final int start, int nPrecedingSites, final int softClipSize) { final ActivityProfile profile = new ActivityProfile(genomeLocParser); @@ -327,14 +329,15 @@ public class ActivityProfileUnitTest extends BaseTest { final GenomeLoc softClipLoc = genomeLocParser.createGenomeLoc(contig, nPrecedingSites + start); profile.add(new ActivityProfileState(softClipLoc, 1.0, ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS, softClipSize)); + final int actualNumOfSoftClips = Math.min(softClipSize, profile.getMaxProbPropagationDistance()); if ( nPrecedingSites == 0 ) { - final int profileSize = Math.min(start + softClipSize, contigLength) - start + 1; + final int profileSize = Math.min(start + actualNumOfSoftClips, contigLength) - start + 1; Assert.assertEquals(profile.size(), profileSize, "Wrong number of states in the profile"); } for ( int i = 0; i < profile.size(); i++ ) { final ActivityProfileState state = profile.getStateList().get(i); - final boolean withinSCRange = state.getLoc().distance(softClipLoc) <= softClipSize; + final boolean withinSCRange = state.getLoc().distance(softClipLoc) <= actualNumOfSoftClips; if ( withinSCRange ) { Assert.assertTrue(state.isActiveProb > 0.0, "active prob should be changed within soft clip size"); } else { @@ -342,4 +345,157 @@ public class ActivityProfileUnitTest extends BaseTest { } } } + + // ------------------------------------------------------------------------------------- + // + // Tests to ensure we cut large active regions in the right place + // + // ------------------------------------------------------------------------------------- + + private void addProb(final List l, final double v) { + l.add(v); + } + + @DataProvider(name = "ActiveRegionCutTests") + public Object[][] makeActiveRegionCutTests() { + final List tests = new LinkedList(); + +// for ( final int activeRegionSize : Arrays.asList(30) ) { +// for ( final int minRegionSize : Arrays.asList(5) ) { + for ( final int activeRegionSize : Arrays.asList(10, 12, 20, 30, 40) ) { + for ( final int minRegionSize : Arrays.asList(1, 5, 10) ) { + final int maxRegionSize = activeRegionSize * 2 / 3; + if ( minRegionSize >= maxRegionSize ) continue; + { // test flat activity profile + final List probs = Collections.nCopies(activeRegionSize, 1.0); + tests.add(new Object[]{minRegionSize, maxRegionSize, maxRegionSize, probs}); + } + + { // test point profile is properly handled + for ( int end = 1; end < activeRegionSize; end++ ) { + final List probs = Collections.nCopies(end, 1.0); + tests.add(new Object[]{minRegionSize, maxRegionSize, Math.min(end, maxRegionSize), probs}); + } + } + + { // test increasing activity profile + final List probs = new ArrayList(activeRegionSize); + for ( int i = 0; i < activeRegionSize; i++ ) { + addProb(probs, (1.0*(i+1))/ activeRegionSize); + } + tests.add(new Object[]{minRegionSize, maxRegionSize, maxRegionSize, probs}); + } + + { // test decreasing activity profile + final List probs = new ArrayList(activeRegionSize); + for ( int i = 0; i < activeRegionSize; i++ ) { + addProb(probs, 1 - (1.0*(i+1))/ activeRegionSize); + } + tests.add(new Object[]{minRegionSize, maxRegionSize, maxRegionSize, probs}); + } + + { // test two peaks +// for ( final double rootSigma : Arrays.asList(2.0) ) { +// int maxPeak1 = 9; { +// int maxPeak2 = 16; { + for ( final double rootSigma : Arrays.asList(1.0, 2.0, 3.0) ) { + for ( int maxPeak1 = 0; maxPeak1 < activeRegionSize / 2; maxPeak1++ ) { + for ( int maxPeak2 = activeRegionSize / 2 + 1; maxPeak2 < activeRegionSize; maxPeak2++ ) { + final double[] gauss1 = makeGaussian(maxPeak1, activeRegionSize, rootSigma); + final double[] gauss2 = makeGaussian(maxPeak2, activeRegionSize, rootSigma+1); + final List probs = new ArrayList(activeRegionSize); + for ( int i = 0; i < activeRegionSize; i++ ) { + addProb(probs, gauss1[i] + gauss2[i]); + } + final int cutSite = findCutSiteForTwoMaxPeaks(probs, minRegionSize); + if ( cutSite != -1 && cutSite < maxRegionSize ) + tests.add(new Object[]{minRegionSize, maxRegionSize, Math.max(cutSite, minRegionSize), probs}); + } + } + } + } + + { // test that the lowest of two minima is taken + // looks like a bunch of 1s, 0.5, some 1.0s, 0.75, some more 1s +// int firstMin = 0; { +// int secondMin = 4; { + for ( int firstMin = 1; firstMin < activeRegionSize; firstMin++ ) { + for ( int secondMin = firstMin + 1; secondMin < activeRegionSize; secondMin++ ) { + final List probs = new ArrayList(Collections.nCopies(activeRegionSize, 1.0)); + probs.set(firstMin, 0.5); + probs.set(secondMin, 0.75); + final int expectedCut; + if ( firstMin + 1 < minRegionSize ) { + if ( firstMin == secondMin - 1 ) // edge case for non-min at minRegionSize + expectedCut = maxRegionSize; + else + expectedCut = secondMin + 1 > maxRegionSize ? maxRegionSize : ( secondMin + 1 < minRegionSize ? maxRegionSize : secondMin + 1); + } else if ( firstMin + 1 > maxRegionSize ) + expectedCut = maxRegionSize; + else { + expectedCut = firstMin + 1; + } + + Math.min(firstMin + 1, maxRegionSize); + tests.add(new Object[]{minRegionSize, maxRegionSize, expectedCut, probs}); + } + } + } + } + } + + return tests.toArray(new Object[][]{}); + } + + private double[] makeGaussian(final int mean, final int range, final double sigma) { + final double[] gauss = new double[range]; + for( int iii = 0; iii < range; iii++ ) { + gauss[iii] = MathUtils.NormalDistribution(mean, sigma, iii) + ActivityProfile.ACTIVE_PROB_THRESHOLD; + } + return gauss; + } + + private int findCutSiteForTwoMaxPeaks(final List probs, final int minRegionSize) { + for ( int i = probs.size() - 2; i > minRegionSize; i-- ) { + double prev = probs.get(i - 1); + double next = probs.get(i + 1); + double cur = probs.get(i); + if ( cur < next && cur < prev ) + return i + 1; + } + + return -1; + } + + +// private int findCutSite(final List probs) { +// for ( int i = probs.size() - 2; i > 0; i-- ) { +// double prev = probs.get(i + 1); +// double next = probs.get(i-1); +// double cur = probs.get(i); +// if ( cur < next && cur < prev ) +// return i + 1; +// } +// +// return -1; +// } + + @Test(dataProvider = "ActiveRegionCutTests") + public void testActiveRegionCutTests(final int minRegionSize, final int maxRegionSize, final int expectedRegionSize, final List probs) { + final ActivityProfile profile = new ActivityProfile(genomeLocParser); + + final String contig = genomeLocParser.getContigs().getSequences().get(0).getSequenceName(); + for ( int i = 0; i <= maxRegionSize + profile.getMaxProbPropagationDistance(); i++ ) { + final GenomeLoc loc = genomeLocParser.createGenomeLoc(contig, i + 1); + final double prob = i < probs.size() ? probs.get(i) : 0.0; + final ActivityProfileState state = new ActivityProfileState(loc, prob); + profile.add(state); + } + + final List regions = profile.popReadyActiveRegions(0, minRegionSize, maxRegionSize, false); + Assert.assertTrue(regions.size() >= 1, "Should only be one regions for this test"); + final ActiveRegion region = regions.get(0); + Assert.assertEquals(region.getLocation().getStart(), 1, "Region should start at 1"); + Assert.assertEquals(region.getLocation().size(), expectedRegionSize, "Incorrect region size; cut must have been incorrect"); + } } \ No newline at end of file From c7a29b1d390d690899198526c0205411a91c54d7 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 24 Jan 2013 13:47:16 -0500 Subject: [PATCH 42/46] Fixed NPE in ActiveRegionUnitTest by allowing null supporting states in ActiveRegion --- .../sting/utils/activeregion/ActiveRegion.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java index 66485c8cf..575e94b99 100644 --- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java +++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java @@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; import java.util.ArrayList; +import java.util.Collections; import java.util.List; /** @@ -54,8 +55,12 @@ public class ActiveRegion implements HasGenomeLocation { public final boolean isActive; public ActiveRegion( final GenomeLoc activeRegionLoc, final List supportingStates, final boolean isActive, final GenomeLocParser genomeLocParser, final int extension ) { + if ( activeRegionLoc == null ) throw new IllegalArgumentException("activeRegionLoc cannot be null"); + if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null"); + if ( extension < 0 ) throw new IllegalArgumentException("extension cannot be < 0 but got " + extension); + this.activeRegionLoc = activeRegionLoc; - this.supportingStates = new ArrayList(supportingStates); + this.supportingStates = supportingStates == null ? Collections.emptyList() : new ArrayList(supportingStates); this.isActive = isActive; this.genomeLocParser = genomeLocParser; this.extension = extension; From 6dd0e1ddd6afb559d892d211fdca9acfa3909201 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 25 Jan 2013 09:42:04 -0500 Subject: [PATCH 43/46] Pulled out the --regenotype functionality from SelectVariants into its own tool: RegenotypeVariants. This allows us to move SelectVariants into the public suite of tools now. --- .../variantutils/RegenotypeVariants.java | 185 ++++++++++++++++++ .../RegenotypeVariantsIntegrationTest.java | 69 +++++++ .../SelectVariantsIntegrationTest.java | 26 --- .../walkers/variantutils/SelectVariants.java | 44 +---- 4 files changed, 255 insertions(+), 69 deletions(-) create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java create mode 100644 protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariantsIntegrationTest.java rename {protected => public}/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java (93%) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java new file mode 100644 index 000000000..c8fc27e6a --- /dev/null +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariants.java @@ -0,0 +1,185 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.commandline.*; +import org.broadinstitute.sting.gatk.CommandLineGATK; +import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.RodWalker; +import org.broadinstitute.sting.gatk.walkers.TreeReducible; +import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper; +import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; +import org.broadinstitute.sting.utils.SampleUtils; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.variant.GATKVCFUtils; +import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; +import org.broadinstitute.variant.variantcontext.*; +import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter; +import org.broadinstitute.variant.vcf.*; + +import java.util.*; + +/** + * Regenotypes the variants from a VCF. VCF records must contain PLs or GLs. + * + *

+ * This tool triggers re-genotyping of the samples through the Exact Allele Frequency calculation model. Note that this is truly the + * mathematically correct way to select samples from a larger set (especially when calls were generated from low coverage sequencing data); + * using the hard genotypes to select (i.e. the default mode of SelectVariants) can lead to false positives when errors are confused for + * variants in the original genotyping. This functionality used to comprise the --regenotype option in SelectVariants but we pulled it out + * into its own tool for technical purposes. + * + *

Input

+ *

+ * A variant set to regenotype. + *

+ * + *

Output

+ *

+ * A re-genotyped VCF. + *

+ * + *

Examples

+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T RegenotypeVariants \
+ *   --variant input.vcf \
+ *   -o output.vcf
+ * 
+ * + */ +@DocumentedGATKFeature( groupName = "Variant Evaluation and Manipulation Tools", extraDocs = {CommandLineGATK.class} ) +public class RegenotypeVariants extends RodWalker implements TreeReducible { + + @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection(); + + @Output(doc="File to which variants should be written",required=true) + protected VariantContextWriter vcfWriter = null; + + private UnifiedGenotyperEngine UG_engine = null; + + public void initialize() { + final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); + UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; + UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; + UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; + + String trackName = variantCollection.variants.getName(); + Set samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)); + UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); + + final Set hInfo = new HashSet(); + hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName))); + hInfo.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); + + vcfWriter.writeHeader(new VCFHeader(hInfo, samples)); + } + + /** + * Subset VC record if necessary and emit the modified record (provided it satisfies criteria for printing) + * + * @param tracker the ROD tracker + * @param ref reference information + * @param context alignment info + * @return 1 if the record was printed to the output file, 0 if otherwise + */ + @Override + public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + if ( tracker == null ) + return 0; + + Collection vcs = tracker.getValues(variantCollection.variants, context.getLocation()); + + if ( vcs == null || vcs.size() == 0) { + return 0; + } + + for (VariantContext vc : vcs) { + + if ( vc.isPolymorphicInSamples() && hasPLs(vc) ) { + synchronized (UG_engine) { + final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(vc)).filters(vc.getFiltersMaybeNull()); + VariantContextUtils.calculateChromosomeCounts(builder, false); + vc = builder.make(); + } + } + + vcfWriter.add(vc); + } + + return 1; + } + + private boolean hasPLs(final VariantContext vc) { + for ( Genotype g : vc.getGenotypes() ) { + if ( g.hasLikelihoods() ) + return true; + } + return false; + } + + @Override + public Integer reduceInit() { return 0; } + + @Override + public Integer reduce(Integer value, Integer sum) { return value + sum; } + + @Override + public Integer treeReduce(Integer lhs, Integer rhs) { + return lhs + rhs; + } + + public void onTraversalDone(Integer result) { + logger.info(result + " records processed."); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariantsIntegrationTest.java new file mode 100644 index 000000000..2fe50ff68 --- /dev/null +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/RegenotypeVariantsIntegrationTest.java @@ -0,0 +1,69 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.variantutils; + +import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; +import org.testng.annotations.Test; + +import java.util.Arrays; + +public class RegenotypeVariantsIntegrationTest extends WalkerTest { + + @Test + public void testRegenotype() { + String testFile = privateTestDir + "combine.3.NA12892.vcf"; + + WalkerTestSpec spec = new WalkerTestSpec( + "-T RegenotypeVariants -R " + b36KGReference + " --variant " + testFile + " -o %s --no_cmdline_in_header", + 1, + Arrays.asList("46ff472fc7ef6734ad01170028d5924a") + ); + + executeTest("testRegenotype--" + testFile, spec); + } +} diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index aeb8d9318..c97f0bf02 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -229,19 +229,6 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testUsingDbsnpName--" + testFile, spec); } - @Test - public void testRegenotype() { - String testFile = privateTestDir + "combine.3.vcf"; - - WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", - 1, - Arrays.asList("46ff472fc7ef6734ad01170028d5924a") - ); - - executeTest("testRegenotype--" + testFile, spec); - } - @Test public void testRemoveMLE() { String testFile = privateTestDir + "vcfexample.withMLE.vcf"; @@ -255,19 +242,6 @@ public class SelectVariantsIntegrationTest extends WalkerTest { executeTest("testRemoveMLE--" + testFile, spec); } - @Test - public void testRemoveMLEAndRegenotype() { - String testFile = privateTestDir + "vcfexample.withMLE.vcf"; - - WalkerTestSpec spec = new WalkerTestSpec( - "-T SelectVariants -R " + b36KGReference + " -regenotype -sn NA12892 --variant " + testFile + " -o %s --no_cmdline_in_header", - 1, - Arrays.asList("46ff472fc7ef6734ad01170028d5924a") - ); - - executeTest("testRemoveMLEAndRegenotype--" + testFile, spec); - } - @Test public void testMultipleRecordsAtOnePosition() { String testFile = privateTestDir + "selectVariants.onePosition.vcf"; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java similarity index 93% rename from protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java rename to public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java index ebad1e2ed..f79946a2e 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java @@ -56,15 +56,9 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.RodWalker; import org.broadinstitute.sting.gatk.walkers.TreeReducible; import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCountConstants; -import org.broadinstitute.sting.gatk.walkers.annotator.ChromosomeCounts; -import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyper; -import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.utils.MendelianViolation; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.Utils; -import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.variant.vcf.*; import org.broadinstitute.sting.utils.exceptions.UserException; @@ -260,17 +254,6 @@ public class SelectVariants extends RodWalker implements TreeR @Argument(fullName="excludeFiltered", shortName="ef", doc="Don't include filtered loci in the analysis", required=false) protected boolean EXCLUDE_FILTERED = false; - /** - * This argument triggers re-genotyping of the selected samples through the Exact calculation model. Note that this is truly the - * mathematically correct way to select samples (especially when calls were generated from low coverage sequencing data); using the - * hard genotypes to select (i.e. the default mode of SelectVariants) can lead to false positives when errors are confused for variants - * in the original genotyping. We decided not to set the --regenotype option as the default though as the output can be unexpected if - * a user is strictly comparing against the original genotypes (GTs) in the file. - */ - @Argument(fullName="regenotype", shortName="regenotype", doc="re-genotype the selected samples based on their GLs (or PLs)", required=false) - protected Boolean REGENOTYPE = false; - private UnifiedGenotyperEngine UG_engine = null; - /** * When this argument is used, we can choose to include only multiallelic or biallelic sites, depending on how many alleles are listed in the ALT column of a vcf. * For example, a multiallelic record such as: @@ -471,15 +454,6 @@ public class SelectVariants extends RodWalker implements TreeR SELECT_RANDOM_FRACTION = fractionRandom > 0; if (SELECT_RANDOM_FRACTION) logger.info("Selecting approximately " + 100.0*fractionRandom + "% of the variants at random from the variant track"); - if ( REGENOTYPE ) { - final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection(); - UAC.GLmodel = GenotypeLikelihoodsCalculationModel.Model.BOTH; - UAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES; - UAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; - UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); - headerLines.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null)); - } - /** load in the IDs file to a hashset for matching */ if ( rsIDFile != null ) { IDsToKeep = new HashSet(); @@ -557,14 +531,6 @@ public class SelectVariants extends RodWalker implements TreeR VariantContext sub = subsetRecord(vc, EXCLUDE_NON_VARIANTS); - if ( REGENOTYPE && sub.isPolymorphicInSamples() && hasPLs(sub) ) { - synchronized (UG_engine) { - final VariantContextBuilder builder = new VariantContextBuilder(UG_engine.calculateGenotypes(sub)).filters(sub.getFiltersMaybeNull()); - addAnnotations(builder, sub); - sub = builder.make(); - } - } - if ( (!EXCLUDE_NON_VARIANTS || sub.isPolymorphicInSamples()) && (!EXCLUDE_FILTERED || !sub.isFiltered()) ) { boolean failedJexlMatch = false; for ( VariantContextUtils.JexlVCMatchExp jexl : jexls ) { @@ -596,19 +562,11 @@ public class SelectVariants extends RodWalker implements TreeR return false; } - private boolean hasPLs(final VariantContext vc) { - for ( Genotype g : vc.getGenotypes() ) { - if ( g.hasLikelihoods() ) - return true; - } - return false; - } - /** * Checks if vc has a variant call for (at least one of) the samples. * @param vc the variant rod VariantContext. Here, the variant is the dataset you're looking for discordances to (e.g. HapMap) * @param compVCs the comparison VariantContext (discordance - * @return + * @return true if is discordant */ private boolean isDiscordant (VariantContext vc, Collection compVCs) { if (vc == null) From f7b80116d6ea5c17d134a17972385709d9237ec2 Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Fri, 25 Jan 2013 10:52:02 -0500 Subject: [PATCH 44/46] Don't let users play with the different exact model implementations. --- .../StandardCallerArgumentCollection.java | 14 +++++++------- .../walkers/genotyper/afcalc/AFCalcFactory.java | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java index bb31b410e..3a1532bb1 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/arguments/StandardCallerArgumentCollection.java @@ -109,13 +109,6 @@ public class StandardCallerArgumentCollection { @Argument(fullName = "max_alternate_alleles", shortName = "maxAltAlleles", doc = "Maximum number of alternate alleles to genotype", required = false) public int MAX_ALTERNATE_ALLELES = 6; - /** - * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. - */ - @Advanced - @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) - public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel(); - /** * If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads. * Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we @@ -125,6 +118,13 @@ public class StandardCallerArgumentCollection { public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION; public static final double DEFAULT_CONTAMINATION_FRACTION = 0.05; + /** + * Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus. + */ + @Hidden + @Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false) + public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel(); + @Hidden @Argument(fullName = "logRemovedReadsFromContaminationFiltering", shortName="contaminationLog", required=false) public PrintStream contaminationLog = null; diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java index 131c1284f..d4bb3cab3 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcFactory.java @@ -70,7 +70,7 @@ public class AFCalcFactory { * the needs of the request (i.e., considering ploidy). */ public enum Calculation { - /** expt. implementation -- for testing only */ + /** default implementation */ EXACT_INDEPENDENT(IndependentAllelesDiploidExactAFCalc.class, 2, -1), /** reference implementation of multi-allelic EXACT model. Extremely slow for many alternate alleles */ From 008b6175770074289bb27dd7cb50d5bdca8e3a98 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 25 Jan 2013 10:13:12 -0500 Subject: [PATCH 45/46] Cleanup the getLIBS function in LocusIterator -- Now throws an UnsupportedOperationException in the base class. Only LocusView implements this function and actually returns the LIBS --- .../gatk/datasources/providers/LocusView.java | 8 +++++++- .../sting/utils/locusiterator/LocusIterator.java | 15 ++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java index f77819426..3ff6e34fb 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java +++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java @@ -214,7 +214,13 @@ public abstract class LocusView extends LocusIterator implements View { return locus.containsP(location); } - // TODO -- remove me + /** + * {@inheritDoc} + * + * Since this class has an actual LIBS, so this function will never throw an exception + * + * @return the LocusIteratorByState used by this view to get pileups + */ @Override public LocusIteratorByState getLIBS() { return loci.getLIBS(); diff --git a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java index fc114b4f0..1243b2893 100644 --- a/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java +++ b/public/java/src/org/broadinstitute/sting/utils/locusiterator/LocusIterator.java @@ -34,11 +34,6 @@ import java.util.Iterator; * Iterator that traverses a SAM File, accumulating information on a per-locus basis */ public abstract class LocusIterator implements Iterable, CloseableIterator { - // ----------------------------------------------------------------------------------------------------------------- - // - // constructors and other basic operations - // - // ----------------------------------------------------------------------------------------------------------------- public Iterator iterator() { return this; } @@ -50,9 +45,15 @@ public abstract class LocusIterator implements Iterable, Close public abstract boolean hasNext(); public abstract AlignmentContext next(); - // TODO -- remove me when ART testing is done + /** + * Get, if possible, the underlying LocusIteratorByState from this LocusIterator. + * + * @throws UnsupportedOperationException if we don't support this operation + * + * @return a non-null locus iterator by state + */ public LocusIteratorByState getLIBS() { - return null; + throw new UnsupportedOperationException("This locus iterator does not support getting the underlying LocusIteratorByState"); } public void remove() { From 3f95f39be379e20019e360c6076ee4da3342848e Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Fri, 25 Jan 2013 11:06:47 -0500 Subject: [PATCH 46/46] Updating HC md5s for new cutting algorithm and default band pass filter parameters --- .../HaplotypeCallerIntegrationTest.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 41f9ab680..97b9ce746 100644 --- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -68,12 +68,12 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSample() { - HCTest(CEUTRIO_BAM, "", "c142bc73447c72286ca48f4a4966d9b6"); + HCTest(CEUTRIO_BAM, "", "11290b619bc79b629cf29b8f428254ce"); } @Test public void testHaplotypeCallerSingleSample() { - HCTest(NA12878_BAM, "", "d172eb9447015ea50220c6947be145ea"); + HCTest(NA12878_BAM, "", "897abb2b4f98e9e460f373f9e0db5033"); } @Test(enabled = false) @@ -84,7 +84,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGA() { HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", - "2c56ffc3b7fbbf154ae9ca355780a78f"); + "efc2cae94069a1d6ee5fdcc7afeaa0ed"); } private void HCTestComplexGGA(String bam, String args, String md5) { @@ -96,13 +96,13 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleGGAComplex() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538", - "66bd513d25b691a5b0c5084924b4a308"); + "01f42c311fc3ce4f07ef86f8c01facfb"); } @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "d0fcbfa2ccce0ca4a2e81f31dc43d79d"); + "4c117c84d1abeade1dee3f7b52a4a585"); } private void HCTestComplexVariants(String bam, String args, String md5) { @@ -113,7 +113,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerMultiSampleComplex() { - HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "7e8a6ed62f866fc47c92af0e255ca180"); + HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "939847eb7bbafc798916acffdb1b5697"); } private void HCTestSymbolicVariants(String bam, String args, String md5) { @@ -124,7 +124,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleSymbolic() { - HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "0761ff5cbf279be467833fa6708bf360"); + HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "25806874242973f00fb6f2a320ed4d9c"); } private void HCTestIndelQualityScores(String bam, String args, String md5) { @@ -135,7 +135,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void testHaplotypeCallerSingleSampleIndelQualityScores() { - HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "602aabbbe710ac90b16e474c869e8a86"); + HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "c50b06d56cf3d0ef53e73a4973207949"); } // That problem bam came from a user on the forum and it spotted a problem where the ReadClipper @@ -146,14 +146,14 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { @Test public void HCTestProblematicReadsModifiedInActiveRegions() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c23b589be3072027ff2da93067dbf549")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ae2470e294d99ff2b825281b84730c72")); executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec); } @Test public void HCTestStructuralIndels() { final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730"; - final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("a612fe84dd7f80c4ad2d20d27fc6744e")); + final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("6f18ae64bf466476d780a083dcb5fc43")); executeTest("HCTestStructuralIndels: ", spec); } @@ -175,7 +175,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestReducedBam() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1, - Arrays.asList("0cb9132967fa9811e04f528be9f686dc")); + Arrays.asList("ecdb8e30ec5dd91efc179ab6732499f9")); executeTest("HC calling on a ReducedRead BAM", spec); }