diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculator.java index ff824ac26..b07334f04 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculator.java @@ -89,7 +89,6 @@ public class GenotypeLikelihoodCalculator { */ private final GenotypeAlleleCounts[] genotypeAlleleCounts; - /** * Number of genotypes given this calculator {@link #ploidy} and {@link #alleleCount}. */ diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/AlleleListUnitTester.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/AlleleListUnitTester.java new file mode 100644 index 000000000..2eefe64aa --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/AlleleListUnitTester.java @@ -0,0 +1,171 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import htsjdk.variant.variantcontext.Allele; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.utils.RandomDNA; +import org.testng.Assert; +import org.testng.SkipException; + +import java.util.HashSet; +import java.util.List; +import java.util.Random; +import java.util.Set; + +/** + * Helper class for those unit-test classes that test on implementations of SampleList. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class AlleleListUnitTester { + + private static final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + private static final RandomDNA rndDNA = new RandomDNA(rnd); + + /** + * Test that the contents of an allele-list are the ones expected. + *

+ *

+ * This method perform various consistency check involving all the {@link org.broadinstitute.gatk.genotyping.AlleleList} interface methods. + * Therefore calling this method is equivalent to a thorough check of the {@link org.broadinstitute.gatk.genotyping.AlleleList} aspect of + * the {@code actual} argument. + *

+ * + * @param actual the sample-list to assess. + * @param expected the expected sample-list. + * @throws IllegalArgumentException if {@code expected} is {@code null} or contains + * {@code null}s which is an indication of an bug in the testing code. + * @throws RuntimeException if there is some testing assertion exception which + * is an indication of an actual bug the code that is been tested. + */ + public static void assertAlleleList(final AlleleList actual, final List expected) { + if (expected == null) + throw new IllegalArgumentException("the expected list cannot be null"); + final Set expectedAlleleSet = new HashSet<>(expected.size()); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.alleleCount(), expected.size()); + for (int i = 0; i < expected.size(); i++) { + final A expectedAllele = expected.get(i); + if (expectedAllele == null) + throw new IllegalArgumentException("the expected sample cannot be null"); + if (expectedAllele.equals(NEVER_USE_ALLELE)) + throw new IllegalArgumentException("you cannot use the forbidden sample name"); + if (expectedAlleleSet.contains(expected.get(i))) + throw new IllegalArgumentException("repeated allele in the expected list, this is a test bug"); + final A actualAllele = actual.alleleAt(i); + Assert.assertNotNull(actualAllele, "allele cannot be null"); + Assert.assertFalse(expectedAlleleSet.contains(actualAllele), "repeated allele: " + actualAllele); + Assert.assertEquals(actualAllele, expectedAllele, "wrong allele order; index = " + i); + Assert.assertEquals(actual.alleleIndex(actualAllele), i, "allele index mismatch"); + expectedAlleleSet.add(actualAllele); + } + + Assert.assertEquals(actual.alleleIndex((A) NEVER_USE_ALLELE), -1); + } + + /** + * Save to assume that this allele will never be used. + */ + private static final Allele NEVER_USE_ALLELE = Allele.create(new String("ACTGACTGACTGACTGACTGACTGACTGACTGGTCAGTCAGTCAGTCAGTCAGTCA").getBytes(), false); + + /** + * Generate testing alleles. + * + *

+ * Basically all are random alleles given the maximum allele length. + *

+ * + *

+ * So with a low max-allele-length and high allele-count you can force repeats. + *

+ * + * @param alleleCount number of alleles to generate. + * @param maxAlleleLength the maximum length of the allele in bases. + * + * @throws RuntimeException if {@code alleleCount} is negative or {@code maxAlleleLength} is less than 1. + * @return never {@code null}. + */ + public static Allele[] generateRandomAlleles(final int alleleCount, final int maxAlleleLength) { + if (maxAlleleLength < 1) + throw new IllegalArgumentException("the max allele length cannot be less than 1"); + final Allele[] result = new Allele[alleleCount]; + for (int i = 0; i < alleleCount; i++) { + final int alleleLength = rnd.nextInt(maxAlleleLength) + 1; + result[i] = Allele.create(rndDNA.nextBases(alleleLength)); + } + return result; + } + + /** + * Generate testing alleles. + * + *

+ * Basically all are random alleles given the maximum allele length. + *

+ * + *

+ * So with a low max-allele-length and high allele-count you can force repeats. + *

+ * + * @param alleleCount number of alleles to generate. + * @param maxAlleleLength the maximum length of the allele in bases. + * @param skipIfRepeats throw an test-skip exception {@link SkipException} if the resulting allele-list + * has repeats, thus is size is less than {@code alleleCount} + * + * @throws RuntimeException if {@code alleleCount} is negative or {@code maxAlleleLength} is less than 1. + * @return never {@code null}. + */ + static AlleleList alleleList(final int alleleCount, final int maxAlleleLength, final boolean skipIfRepeats) { + final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount,maxAlleleLength); + if (alleleCount > 0) + alleles[0] = Allele.create(alleles[0].getBases(),true); + final AlleleList alleleList = new IndexedAlleleList<>(alleles); + if (skipIfRepeats && alleleList.alleleCount() != alleles.length) + throw new SkipException("repeated alleles, should be infrequent"); + return alleleList; + } +} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/AlleleListUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/AlleleListUtilsUnitTest.java new file mode 100644 index 000000000..a7e2dce88 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/AlleleListUtilsUnitTest.java @@ -0,0 +1,226 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import htsjdk.variant.variantcontext.Allele; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.testng.Assert; +import org.testng.SkipException; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Test {@link org.broadinstitute.gatk.genotyping.AlleleListUtils}. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class AlleleListUtilsUnitTest { + + @Test(dataProvider = "singleAlleleListData") + public void testAsList(final List alleles1) { + final Allele[] uniqueAlleles = new LinkedHashSet<>(alleles1).toArray(new Allele[0]); + final AlleleList alleleList = new IndexedAlleleList<>(alleles1); + final List asList = AlleleListUtils.asList(alleleList); + final Allele[] asListArray = asList.toArray(new Allele[asList.size()]); + Assert.assertTrue(Arrays.equals(uniqueAlleles,asListArray)); + } + + @Test(dataProvider = "singleAlleleListData") + public void testIndexOfReference(final List alleles1) { + final Allele[] uniqueAlleles = new LinkedHashSet<>(alleles1).toArray(new Allele[0]); + for (int i = 0; i < uniqueAlleles.length; i++) { + final Allele[] actualAlleles = uniqueAlleles.clone(); + actualAlleles[i] = Allele.create(actualAlleles[i].getBases(),true); + final AlleleList alleleList = new IndexedAlleleList<>(actualAlleles); + Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),i); + } + final AlleleList alleleList = new IndexedAlleleList<>(uniqueAlleles); + Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),-1); + } + + @Test(dataProvider = "twoAlleleListData", dependsOnMethods={"testAsList"}) + public void testEquals(final List alleles1, final List alleles2) { + final AlleleList alleleList1 = new IndexedAlleleList(alleles1); + final AlleleList alleleList2 = new IndexedAlleleList(alleles2); + Assert.assertTrue(AlleleListUtils.equals(alleleList1,alleleList1)); + Assert.assertTrue(AlleleListUtils.equals(alleleList2,alleleList2)); + Assert.assertEquals(AlleleListUtils.equals(alleleList1, alleleList2), + Arrays.equals(AlleleListUtils.asList(alleleList1).toArray(new Allele[alleleList1.alleleCount()]), + AlleleListUtils.asList(alleleList2).toArray(new Allele[alleleList2.alleleCount()])) + ); + Assert.assertEquals(AlleleListUtils.equals(alleleList1,alleleList2), + AlleleListUtils.equals(alleleList2,alleleList1)); + } + + @Test(dataProvider = "singleAlleleListData", dependsOnMethods= "testEquals" ) + public void testSelfPermutation(final List alleles1) { + final AlleleList originalAlleleList = new IndexedAlleleList<>(alleles1); + final AlleleListPermutation selfPermutation = AlleleListUtils.permutation(originalAlleleList,originalAlleleList); + Assert.assertEquals(selfPermutation.fromSize(),originalAlleleList.alleleCount()); + Assert.assertEquals(selfPermutation.toSize(),originalAlleleList.alleleCount()); + Assert.assertTrue(selfPermutation.isNonPermuted()); + Assert.assertFalse(selfPermutation.isPartial()); + for (int i = 0; i < originalAlleleList.alleleCount(); i++) { + Assert.assertEquals(selfPermutation.fromIndex(i), i); + Assert.assertEquals(selfPermutation.toIndex(i),i); + Assert.assertEquals(selfPermutation.fromList(),selfPermutation.toList()); + AlleleListUnitTester.assertAlleleList(originalAlleleList, selfPermutation.fromList()); + } + Assert.assertTrue(AlleleListUtils.equals(selfPermutation,originalAlleleList)); + } + + private final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + @Test(dataProvider = "singleAlleleListData", dependsOnMethods = "testEquals") + public void testSubsetPermutation(final List alleles1) { + final List subsetAlleles = new ArrayList<>(alleles1.size()); + for (final Allele allele : alleles1) + if (rnd.nextBoolean()) subsetAlleles.add(allele); + final AlleleList originalAlleleList = new IndexedAlleleList<>(alleles1); + final AlleleList targetAlleleList = new IndexedAlleleList<>(subsetAlleles); + final AlleleListPermutation subset = AlleleListUtils.permutation(originalAlleleList,targetAlleleList); + if (originalAlleleList.alleleCount() == targetAlleleList.alleleCount()) + throw new SkipException("no real subset"); + Assert.assertTrue(subset.isPartial()); + Assert.assertFalse(subset.isNonPermuted()); + Assert.assertEquals(subset.fromSize(),originalAlleleList.alleleCount()); + Assert.assertEquals(subset.toSize(),targetAlleleList.alleleCount()); + AlleleListUnitTester.assertAlleleList(originalAlleleList,subset.fromList()); + AlleleListUnitTester.assertAlleleList(targetAlleleList,subset.toList()); + + for (int i = 0; i < targetAlleleList.alleleCount(); i++) + Assert.assertEquals(subset.fromIndex(i), originalAlleleList.alleleIndex(targetAlleleList.alleleAt(i))); + + for (int j = 0; j < originalAlleleList.alleleCount(); j++) { + final Allele allele = originalAlleleList.alleleAt(j); + Assert.assertEquals(subset.toIndex(j),targetAlleleList.alleleIndex(allele)); + } + + Assert.assertTrue(AlleleListUtils.equals(subset,targetAlleleList)); + } + + @Test(dataProvider = "singleAlleleListData", dependsOnMethods = {"testAsList","testEquals"}) + public void testShufflePermutation(final List alleles1) { + final AlleleList originalAlleleList = new IndexedAlleleList<>(alleles1); + if (originalAlleleList.alleleCount() <= 1) + throw new SkipException("non-shuffle allele-list"); + + final Allele[] targetAlleleArray = AlleleListUtils.asList(originalAlleleList).toArray(new Allele[originalAlleleList.alleleCount()]); + final int[] fromIndex = new int[targetAlleleArray.length]; + for (int i = 0; i < fromIndex.length; i++) + fromIndex[i] = i; + + for (int i = 0; i < targetAlleleArray.length - 1; i++) { + final int swapIndex = rnd.nextInt(targetAlleleArray.length - i - 1); + final int otherIndex = fromIndex[swapIndex + i + 1]; + final Allele other = targetAlleleArray[swapIndex + i + 1]; + fromIndex[swapIndex + i + 1] = fromIndex[i]; + fromIndex[i] = otherIndex; + targetAlleleArray[swapIndex + i + 1] = targetAlleleArray[i]; + targetAlleleArray[i] = other; + } + final AlleleList targetAlleleList = new IndexedAlleleList<>(targetAlleleArray); + + final AlleleListPermutation permutation = AlleleListUtils.permutation(originalAlleleList,targetAlleleList); + Assert.assertFalse(permutation.isNonPermuted()); + AlleleListUnitTester.assertAlleleList(originalAlleleList,permutation.fromList()); + AlleleListUnitTester.assertAlleleList(targetAlleleList,permutation.toList()); + Assert.assertFalse(permutation.isPartial()); + Assert.assertEquals(permutation.fromSize(),originalAlleleList.alleleCount()); + Assert.assertEquals(permutation.toSize(),targetAlleleList.alleleCount()); + for (int i = 0; i < permutation.fromSize(); i++) { + Assert.assertEquals(permutation.toIndex(i),targetAlleleList.alleleIndex(originalAlleleList.alleleAt(i))); + Assert.assertEquals(permutation.fromIndex(i),originalAlleleList.alleleIndex(targetAlleleList.alleleAt(i))); + Assert.assertEquals(permutation.fromIndex(i),fromIndex[i]); + } + Assert.assertTrue(AlleleListUtils.equals(permutation,targetAlleleList)); + + } + + + private List[] alleleLists; + + @BeforeClass + public void setUp() { + alleleLists = new List[ALLELE_COUNT.length * MAX_ALLELE_LENGTH.length]; + int nextIndex = 0; + for (int i = 0; i < ALLELE_COUNT.length; i++) + for (int j = 0; j < MAX_ALLELE_LENGTH.length; j++) + alleleLists[nextIndex++] = Arrays.asList(AlleleListUnitTester.generateRandomAlleles(ALLELE_COUNT[i], MAX_ALLELE_LENGTH[j])); + } + + private static final int[] ALLELE_COUNT = { 0, 1, 5, 10, 20}; + + private static final int[] MAX_ALLELE_LENGTH = { 1, 2, 3, 10 }; + + @DataProvider(name="singleAlleleListData") + public Object[][] singleAlleleListData() { + final Object[][] result = new Object[alleleLists.length][]; + for (int i = 0; i < alleleLists.length; i++) + result[i] = new Object[] { alleleLists[i]}; + return result; + } + + @DataProvider(name="twoAlleleListData") + public Object[][] twoAlleleListData() { + final Object[][] result = new Object[alleleLists.length * alleleLists.length][]; + int index = 0; + for (int i = 0; i < alleleLists.length; i++) + for (int j = 0; j < alleleLists.length; j++) + result[index++] = new Object[] { alleleLists[i], alleleLists[j]}; + return result; + } + + + + + + + +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypeAlleleCountsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypeAlleleCountsUnitTest.java new file mode 100644 index 000000000..8506b96e9 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypeAlleleCountsUnitTest.java @@ -0,0 +1,328 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Test {@link GenotypeAlleleCounts} + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class GenotypeAlleleCountsUnitTest { + + @Test(dataProvider="ploidyData") + public void testFirst(final int ploidy) { + final GenotypeAlleleCounts subject = GenotypeAlleleCounts.first(ploidy); + Assert.assertNotNull(subject); + Assert.assertEquals(subject.ploidy(), ploidy); + Assert.assertEquals(subject.distinctAlleleCount(),1); + Assert.assertEquals(subject.alleleCountAt(0),ploidy); + Assert.assertEquals(subject.alleleCountFor(0),ploidy); + Assert.assertEquals(subject.alleleRankFor(0),0); + Assert.assertEquals(subject.alleleRankFor(1),-2); + Assert.assertTrue(subject.containsAllele(0)); + Assert.assertFalse(subject.containsAllele(1)); + Assert.assertEquals(subject.alleleIndexAt(0),0); + Assert.assertEquals(subject.maximumAlleleIndex(),0); + Assert.assertEquals(subject.minimumAlleleIndex(),0); + Assert.assertTrue(subject.compareTo(subject) == 0); + Assert.assertTrue(subject.equals(subject)); + Assert.assertEquals(subject.index(),0); + for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) { + final int[] expected = new int[maximumAlleleIndex + 1]; + expected[0] = ploidy; + Assert.assertEquals(subject.alleleCountsByIndex(maximumAlleleIndex),expected); + } + } + + @Test(dataProvider = "ploidyData",dependsOnMethods = "testFirst") + public void testNext(final int ploidy) { + if (ploidy == 0) + testNextZeroPloidy(); + else if (ploidy == 1) + testNextOnePloidy(); + else + testPloidyTwoOrMore(ploidy); + } + + @Test(dataProvider = "ploidyData",dependsOnMethods = "testNext") + public void testIncrease(final int ploidy) { + if (ploidy == 0) + testNextZeroPloidyIncrease(); + else if (ploidy == 1) + testNextOnePloidyIncrease(); + else + testPloidyTwoOrMoreIncrease(ploidy); + } + + private void testNextZeroPloidy() { + final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(0); + final GenotypeAlleleCounts next = first.next(); + Assert.assertEquals(first,next); + Assert.assertEquals(first.compareTo(next),0); + Assert.assertEquals(next.compareTo(first), 0); + Assert.assertEquals(next.distinctAlleleCount(),0); + Assert.assertEquals(next.ploidy(),0); + Assert.assertEquals(next.index(),0); + for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) { + final int[] expected = new int[maximumAlleleIndex + 1]; + Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected); + } + } + + private void testNextOnePloidy() { + final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(1); + GenotypeAlleleCounts current = first; + + while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) { + final GenotypeAlleleCounts next = current.next(); + Assert.assertEquals(next.minimumAlleleIndex(),next.maximumAlleleIndex()); + Assert.assertEquals(next.minimumAlleleIndex(),current.minimumAlleleIndex() + 1); + Assert.assertEquals(next.alleleCountAt(0),1); + Assert.assertEquals(next.alleleIndexAt(0),next.minimumAlleleIndex()); + Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex()),0); + Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex() + 1),-2); + Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()),1); + Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()+1),0); + Assert.assertEquals(next.ploidy(),1); + + Assert.assertTrue(next.compareTo(current) > 0); + Assert.assertTrue(current.compareTo(next) < 0); + Assert.assertTrue(next.compareTo(next) == 0); + Assert.assertTrue(next.equals(next)); + Assert.assertFalse(next.equals(current)); + Assert.assertFalse(current.equals(next)); + + Assert.assertEquals(next.index(), current.index() + 1); + Assert.assertEquals(next.ploidy(),current.ploidy()); + + for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) { + final int[] expected = new int[maximumAlleleIndex + 1]; + if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) expected[current.minimumAlleleIndex() + 1] = 1; + Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected); + } + current = next; + } + } + + private void testPloidyTwoOrMore(final int ploidy) { + if (ploidy < 2) + throw new IllegalArgumentException(); + + GenotypeAlleleCounts current = GenotypeAlleleCounts.first(ploidy); + + while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) { + final GenotypeAlleleCounts next = current.next(); + if (current.distinctAlleleCount() == 1) { + Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1); + Assert.assertEquals(next.distinctAlleleCount(), 2 ); + Assert.assertEquals(next.minimumAlleleIndex(), 0 ); + } else { + Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex()); + Assert.assertEquals(next.minimumAlleleIndex(),current.alleleCountAt(0) > 1 ? 0 + : current.alleleCountAt(0) == 1 ? current.minimumAlleleIndex() + 1 : current.minimumAlleleIndex()); + } + + // Checking on 0's new count and current.minAllele + 1 alleles. + Assert.assertEquals(next.alleleCountFor(0),current.alleleCountFor(current.minimumAlleleIndex()) - 1); + Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex() + 1), + current.alleleCountFor(current.minimumAlleleIndex() + 1) + 1); + + // Checks current.minAllele count + Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex()), + current.minimumAlleleIndex() == 0 ? current.alleleCountAt(0) - 1 : 0); + + int totalCountSum = 0; + final int[] expectedAlleleCountsByIndex = new int[Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex()) + 1]; + for (int i = 0; i < next.distinctAlleleCount(); i++) { + final int count = next.alleleCountAt(i); + final int index = next.alleleIndexAt(i); + expectedAlleleCountsByIndex[index] = count; + // Check consistency of alleleCountAt(x) and alleleCountFor(alleleIndexAt(x)) + Assert.assertEquals(next.alleleCountFor(index),count); + totalCountSum += count; + // Check on counts of, in theory, unaffected allele counts. + if (index > current.minimumAlleleIndex() + 1) + Assert.assertEquals(next.alleleCountFor(index),current.alleleCountFor(index)); + } + Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex())),expectedAlleleCountsByIndex)); + Assert.assertEquals(totalCountSum,ploidy); + + Assert.assertTrue(next.compareTo(current) > 0); + Assert.assertTrue(current.compareTo(next) < 0); + Assert.assertTrue(next.compareTo(next) == 0); + Assert.assertTrue(next.equals(next)); + Assert.assertFalse(next.equals(current)); + Assert.assertFalse(current.equals(next)); + Assert.assertEquals(next.index(),current.index() + 1); + Assert.assertEquals(next.ploidy(),ploidy); + current = next; + } + } + + private void testNextZeroPloidyIncrease() { + final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(0); + final GenotypeAlleleCounts next = first.clone(); + next.increase(); + Assert.assertEquals(first,next); + Assert.assertEquals(first.compareTo(next),0); + Assert.assertEquals(next.compareTo(first), 0); + Assert.assertEquals(next.distinctAlleleCount(),0); + Assert.assertEquals(next.ploidy(),0); + Assert.assertEquals(next.index(),0); + for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) { + final int[] expected = new int[maximumAlleleIndex + 1]; + Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected); + } + } + + private void testNextOnePloidyIncrease() { + final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(1); + GenotypeAlleleCounts next = first; + + while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) { + final GenotypeAlleleCounts current = next.clone(); + next.increase(); + Assert.assertEquals(next.minimumAlleleIndex(),next.maximumAlleleIndex()); + Assert.assertEquals(next.minimumAlleleIndex(),current.minimumAlleleIndex() + 1); + Assert.assertEquals(next.alleleCountAt(0),1); + Assert.assertEquals(next.alleleIndexAt(0),next.minimumAlleleIndex()); + Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex()),0); + Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex() + 1),-2); + Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()),1); + Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()+1),0); + Assert.assertEquals(next.ploidy(),1); + + Assert.assertTrue(next.compareTo(current) > 0); + Assert.assertTrue(current.compareTo(next) < 0); + Assert.assertTrue(next.compareTo(next) == 0); + Assert.assertTrue(next.equals(next)); + Assert.assertFalse(next.equals(current)); + Assert.assertFalse(current.equals(next)); + + Assert.assertEquals(next.index(), current.index() + 1); + Assert.assertEquals(next.ploidy(),current.ploidy()); + + for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) { + final int[] expected = new int[maximumAlleleIndex + 1]; + if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) expected[current.minimumAlleleIndex() + 1] = 1; + Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected); + } + } + } + + private void testPloidyTwoOrMoreIncrease(final int ploidy) { + if (ploidy < 2) + throw new IllegalArgumentException(); + + GenotypeAlleleCounts next = GenotypeAlleleCounts.first(ploidy); + + while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) { + final GenotypeAlleleCounts current = next.clone(); + next.increase(); + if (current.distinctAlleleCount() == 1) { + Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1); + Assert.assertEquals(next.distinctAlleleCount(), 2 ); + Assert.assertEquals(next.minimumAlleleIndex(), 0 ); + } else { + Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex()); + Assert.assertEquals(next.minimumAlleleIndex(),current.alleleCountAt(0) > 1 ? 0 + : current.alleleCountAt(0) == 1 ? current.minimumAlleleIndex() + 1 : current.minimumAlleleIndex()); + } + + // Checking on 0's new count and current.minAllele + 1 alleles. + Assert.assertEquals(next.alleleCountFor(0),current.alleleCountFor(current.minimumAlleleIndex()) - 1); + Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex() + 1), + current.alleleCountFor(current.minimumAlleleIndex() + 1) + 1); + + // Checks current.minAllele count + Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex()), + current.minimumAlleleIndex() == 0 ? current.alleleCountAt(0) - 1 : 0); + + int totalCountSum = 0; + final int[] expectedAlleleCountsByIndex = new int[Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex()) + 1]; + for (int i = 0; i < next.distinctAlleleCount(); i++) { + final int count = next.alleleCountAt(i); + final int index = next.alleleIndexAt(i); + expectedAlleleCountsByIndex[index] = count; + // Check consistency of alleleCountAt(x) and alleleCountFor(alleleIndexAt(x)) + Assert.assertEquals(next.alleleCountFor(index),count); + totalCountSum += count; + // Check on counts of, in theory, unaffected allele counts. + if (index > current.minimumAlleleIndex() + 1) + Assert.assertEquals(next.alleleCountFor(index),current.alleleCountFor(index)); + } + Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex())),expectedAlleleCountsByIndex)); + Assert.assertEquals(totalCountSum,ploidy); + + Assert.assertTrue(next.compareTo(current) > 0); + Assert.assertTrue(current.compareTo(next) < 0); + Assert.assertTrue(next.compareTo(next) == 0); + Assert.assertTrue(next.equals(next)); + Assert.assertFalse(next.equals(current)); + Assert.assertFalse(current.equals(next)); + Assert.assertEquals(next.index(),current.index() + 1); + Assert.assertEquals(next.ploidy(),ploidy); + } + } + + private static final int MAXIMUM_ALLELE_INDEX = 10; + + private static final int[] PLOIDY = new int[] { 1, 2, 3, 7, 10}; + + @DataProvider(name="ploidyData") + public Object[][] ploidyData() { + final Object[][] result = new Object[PLOIDY.length][]; + for (int i = 0; i < PLOIDY.length; i++) + result[i] = new Object[] { PLOIDY[i ]}; + return result; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculatorUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculatorUnitTest.java new file mode 100644 index 000000000..99f4c0422 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypeLikelihoodCalculatorUnitTest.java @@ -0,0 +1,172 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.gatk.utils.MathUtils; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.Arrays; + +/** + * Tests {@link GenotypeLikelihoodCalculators} and {@link GenotypeLikelihoodCalculator}. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class GenotypeLikelihoodCalculatorUnitTest { + + @Test(dataProvider = "ploidyAndMaximumAlleleData") + public void testPloidyAndMaximumAllele(final int ploidy, final int alleleCount) { + final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, alleleCount); + Assert.assertNotNull(calculator); + Assert.assertEquals(calculator.ploidy(),ploidy); + Assert.assertEquals(calculator.alleleCount(), alleleCount); + Assert.assertEquals(calculator.genotypeCount(),calculateGenotypeCount(ploidy, alleleCount)," ploidy = " + ploidy + " alleleCount = " + alleleCount); + final int genotypeCount = calculator.genotypeCount(); + final int testGenotypeCount = Math.min(30000,genotypeCount); + for (int i = 0; i < testGenotypeCount; i++) { + final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(i); + Assert.assertNotNull(alleleCounts); + if (i > 0) + Assert.assertTrue(calculator.genotypeAlleleCountsAt(i - 1).compareTo(alleleCounts) < 0); + final int[] alleleArray = new int[ploidy]; + int index = 0; + for (int j = 0; j < alleleCounts.distinctAlleleCount(); j++) + Arrays.fill(alleleArray, index, index += alleleCounts.alleleCountAt(j), alleleCounts.alleleIndexAt(j)); + final int[] alleleCountArray = new int[alleleCounts.distinctAlleleCount() << 1]; + alleleCounts.copyAlleleCounts(alleleCountArray,0); + Assert.assertEquals(index,ploidy); + Assert.assertEquals(calculator.allelesToIndex(alleleArray),i); + Assert.assertEquals(calculator.alleleCountsToIndex(alleleCountArray),i); + } + } + + @Test(dataProvider = "ploidyAndMaximumAlleleAndReadCountsData", dependsOnMethods = "testPloidyAndMaximumAllele") + public void testLikelihoodCalculation(final int ploidy, final int alleleCount, final int[] readCount) { + final ReadLikelihoods readLikelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount,readCount); + final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, alleleCount); + final int genotypeCount = calculator.genotypeCount(); + final int testGenotypeCount = Math.min(30000,genotypeCount); + final int sampleCount = readCount.length; + for (int s = 0; s < sampleCount ; s++) { + final ReadLikelihoods.Matrix sampleLikelihoods = readLikelihoods.sampleMatrix(s); + final GenotypeLikelihoods genotypeLikelihoods = calculator.genotypeLikelihoods(sampleLikelihoods); + final double[] genotypeLikelihoodsDoubles = genotypeLikelihoods.getAsVector(); + Assert.assertEquals(genotypeLikelihoodsDoubles.length,genotypeCount); + for (int i = 0; i < testGenotypeCount; i++) { + final GenotypeAlleleCounts genotypeAlleleCounts = calculator.genotypeAlleleCountsAt(i); + Assert.assertNotNull(genotypeLikelihoods); + final double[] readGenotypeLikelihoods = new double[sampleLikelihoods.readCount()]; + for (int r = 0; r < sampleLikelihoods.readCount(); r++) { + final double[] compoments = new double[genotypeAlleleCounts.distinctAlleleCount()]; + for (int ar = 0; ar < genotypeAlleleCounts.distinctAlleleCount(); ar++) { + final int a = genotypeAlleleCounts.alleleIndexAt(ar); + final int aCount = genotypeAlleleCounts.alleleCountAt(ar); + final double readLk = sampleLikelihoods.get(a, r); + compoments[ar] = readLk + Math.log10(aCount); + } + readGenotypeLikelihoods[r] = MathUtils.approximateLog10SumLog10(compoments) - Math.log10(ploidy); + } + final double genotypeLikelihood = MathUtils.sum(readGenotypeLikelihoods); + Assert.assertEquals(genotypeLikelihoodsDoubles[i], genotypeLikelihood, 0.0001); + } + } + + } + + + // Simple inefficient calculation of the genotype count given the ploidy. + private int calculateGenotypeCount(final int ploidy, final int alleleCount) { + if (ploidy == 0) + return 0; + else if (ploidy == 1) + return alleleCount; + else if (ploidy == 2) + return ((alleleCount) * (alleleCount + 1)) >> 1; + else if (alleleCount == 0) + return 0; + else { + return calculateGenotypeCount(ploidy - 1, alleleCount) + + calculateGenotypeCount(ploidy, alleleCount - 1); + } + } + + private static final int[] MAXIMUM_ALLELE = new int[] { 1, 2, 5, 6 }; + + private static final int[] PLOIDY = new int[] { 1, 2, 3, 20 }; + + private static final int[][] READ_COUNTS = new int[][] { + { 10 , 100, 50 }, + { 0, 100, 10, 1 , 50 }, + { 1, 2, 3, 4, 20 }, + { 10, 0 }, + }; + + @DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData") + public Object[][] ploidyAndMaximumAlleleAndReadCountsData() { + final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length * READ_COUNTS.length][]; + int index = 0; + for (final int i : PLOIDY) + for (final int j : MAXIMUM_ALLELE) + for (final int[] k : READ_COUNTS) + result[index++] = new Object[] { i, j, k }; + return result; + } + + @DataProvider(name="ploidyAndMaximumAlleleData") + public Object[][] ploidyAndMaximumAlleleData() { + final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length][]; + int index = 0; + for (final int i : PLOIDY) + for (final int j : MAXIMUM_ALLELE) + result[index++] = new Object[] { i, j }; + return result; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypingDataUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypingDataUnitTest.java new file mode 100644 index 000000000..59e14e14c --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/GenotypingDataUnitTest.java @@ -0,0 +1,103 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import htsjdk.variant.variantcontext.Allele; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * Test {@link org.broadinstitute.gatk.genotyping.InfiniteRandomMatingPopulationModel} + */ +public class GenotypingDataUnitTest { + + @Test(dataProvider="ploidyAndMaximumAlleleAndReadCountsData") + public void testInstantiation(final int[] ploidies, final int[] readCounts) { + final ReadLikelihoods likelihoods = ReadLikelihoodsUnitTester.readLikelihoods(2,readCounts); + final SampleList sampleList = likelihoods; + final PloidyModel ploidyModel = new HeterogeneousPloidyModel(sampleList,ploidies); + final GenotypingData data = new GenotypingData<>(ploidyModel,likelihoods); + Assert.assertTrue(AlleleListUtils.equals(data,likelihoods)); + Assert.assertTrue(SampleListUtils.equals(data,likelihoods)); + Assert.assertEquals(data.readLikelihoods(),likelihoods); + Assert.assertEquals(data.ploidyModel(),ploidyModel); + } + + private static final int[][] PLOIDIES = new int[][]{ + {1, 1, 1, 1}, + {1, 2, 3, 4}, + {2, 2, 2, 2}, + {2, 1, 2, 1}, + {1}, + {2}, + {}, + }; + + + private static final int[][] READ_COUNTS = new int[][] { + { 10 , 100, 50, 20 }, + { 0, 100, 10, 1 }, + { 1, 2, 3, 4 }, + { 10, 20, 50, 40 }, + { 10 }, + { 20 }, + { } + }; + + @DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData") + public Object[][] ploidyAndMaximumAlleleAndReadCountsData() { + final List result = new ArrayList<>(PLOIDIES.length * 2); + for (int i = 0; i < PLOIDIES.length; i++) + result.add(new Object[] {PLOIDIES[i], READ_COUNTS[i]}); + return result.toArray(new Object[0][]); + } + +} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/HeterogeneousPloidyModel.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/HeterogeneousPloidyModel.java new file mode 100644 index 000000000..865668093 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/HeterogeneousPloidyModel.java @@ -0,0 +1,119 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + + +/** + * General heterogeneous ploidy model. + * + *

+ * Currenly only avaialable for testing but will be promoted at some point and have its own unit test. + *

+ */ +public class HeterogeneousPloidyModel implements PloidyModel { + + private final SampleList sampleList; + + private final int[] ploidies; + + private final int ploidySum; + + private final boolean isHomogeneous; + + public HeterogeneousPloidyModel(final SampleList sampleList, final int[] ploidies) { + if (sampleList == null) + throw new IllegalArgumentException("the sample list cannot be null"); + if (ploidies == null) + throw new IllegalArgumentException("the ploidies cannot be null"); + if (sampleList.sampleCount() != ploidies.length) + throw new IllegalArgumentException("sample-list and ploidy array length must match"); + + this.ploidies = ploidies.clone(); + + int ploidySum = 0; + for (int i = 0; i < ploidies.length; i++) { + final int p = this.ploidies[i]; + if (p < 0) + throw new IllegalArgumentException("no ploidy can be less than 0"); + ploidySum += p; + } + this.ploidySum = ploidySum; + isHomogeneous = ploidies.length == 0 || ploidies.length * this.ploidies[0] == ploidySum; + this.sampleList = sampleList; + } + + @Override + public int samplePloidy(final int sampleIndex) { + if (sampleIndex < 0 || sampleIndex > ploidies.length) + throw new IllegalArgumentException("invalid sample index: " + sampleIndex); + return ploidies[sampleIndex]; + } + + @Override + public boolean isHomogeneous() { + return isHomogeneous; + } + + @Override + public int totalPloidy() { + return ploidySum; + } + + @Override + public int sampleCount() { + return ploidies.length; + } + + @Override + public int sampleIndex(final String sample) { + return sampleList.sampleIndex(sample); + } + + @Override + public String sampleAt(int sampleIndex) { + return sampleList.sampleAt(sampleIndex); + } +} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/HomogeneousPloidyModelUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/HomogeneousPloidyModelUnitTest.java new file mode 100644 index 000000000..030ebf21b --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/HomogeneousPloidyModelUnitTest.java @@ -0,0 +1,92 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; + +/** + * Tests {@link HomogeneousPloidyModel} + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class HomogeneousPloidyModelUnitTest { + private static final int[] PLOIDY = new int[] { 1, 2, 3, 7, 10}; + + private static final int[] SAMPLE_COUNT = new int[] { 0, 1, 3, 4, 5, 6, 10, 101}; + + + @Test(dataProvider = "ploidyAndSampleListData") + public void testPloidyAndSampleList(final int ploidy, final int sampleCount) { + final List sampleNames = new ArrayList<>(sampleCount); + for (int i = 0; i < sampleCount; i++) + sampleNames.add("SAMPLE_" + i); + final IndexedSampleList sampleList = new IndexedSampleList(sampleNames); + + final HomogeneousPloidyModel ploidyModel = new HomogeneousPloidyModel(sampleList,ploidy); + Assert.assertTrue(ploidyModel.isHomogeneous()); + Assert.assertEquals(ploidyModel.totalPloidy(),sampleCount * ploidy); + + for (int i = 0; i < sampleCount; i++) + Assert.assertEquals(ploidyModel.samplePloidy(i),ploidy); + + SampleListUnitTester.assertSampleList(ploidyModel,sampleNames); + } + + @DataProvider(name="ploidyAndSampleListData") + public Object[][] ploidyAndSampleListData() { + final Object[][] result = new Object[PLOIDY.length * SAMPLE_COUNT.length][]; + int index = 0; + for (int i = 0; i < PLOIDY.length; i++) + for (int j = 0; j < SAMPLE_COUNT.length; j++ ) + result[index++] = new Object[] { PLOIDY[i], SAMPLE_COUNT[j]}; + return result; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/IndexedAlleleListUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/IndexedAlleleListUnitTest.java new file mode 100644 index 000000000..61bd9f8a7 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/IndexedAlleleListUnitTest.java @@ -0,0 +1,102 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + + +import htsjdk.variant.variantcontext.Allele; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +import static org.broadinstitute.gatk.genotyping.AlleleListUnitTester.assertAlleleList; + +/** + * Tests {@link org.broadinstitute.gatk.genotyping.IndexedSampleList}. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class IndexedAlleleListUnitTest { + + @Test + public void testEmptyConstructor() { + final IndexedAlleleList subject = new IndexedAlleleList<>(); + assertAlleleList(subject, Collections.EMPTY_LIST); + } + + @Test(dataProvider= "alleleCountMaxAlleleLengthData") + public void testArrayConstructor(final int alleleCount, final int maxAlleleLength) { + final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount, maxAlleleLength); + + final LinkedHashSet nonRepeatedAlleles = new LinkedHashSet<>(Arrays.asList(alleles)); + final IndexedAlleleList subject = new IndexedAlleleList<>(alleles); + assertAlleleList(subject, Arrays.asList(nonRepeatedAlleles.toArray(new Allele[nonRepeatedAlleles.size()]))); + } + + @Test(dataProvider= "alleleCountMaxAlleleLengthData") + public void testCollectionConstructor(final int alleleCount, final int maxAlleleLength) { + final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount, maxAlleleLength); + + final List alleleList = Arrays.asList(alleles); + final LinkedHashSet nonRepeatedAlleles = new LinkedHashSet<>(Arrays.asList(alleles)); + final IndexedAlleleList subject = new IndexedAlleleList<>(alleleList); + assertAlleleList(subject, Arrays.asList(nonRepeatedAlleles.toArray(new Allele[nonRepeatedAlleles.size()]))); + } + + private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20}; + + private static final int[] MAX_ALLELE_LENGTH = { 1, 2, 3, 10 }; + + @DataProvider(name="alleleCountMaxAlleleLengthData") + public Object[][] alleleCountMaxAlleleLengthData() { + final Object[][] result = new Object[SAMPLE_COUNT.length * MAX_ALLELE_LENGTH.length][]; + int nextIndex = 0; + for (int i = 0; i < SAMPLE_COUNT.length; i++) + for (int j = 0; j < MAX_ALLELE_LENGTH.length; j++) + result[nextIndex++] = new Object[] { SAMPLE_COUNT[i], MAX_ALLELE_LENGTH[j]}; + return result; + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/IndexedSampleListUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/IndexedSampleListUnitTest.java new file mode 100644 index 000000000..138ec0e31 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/IndexedSampleListUnitTest.java @@ -0,0 +1,133 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +import static org.broadinstitute.gatk.genotyping.SampleListUnitTester.assertSampleList; + +/** + * Tests {@link IndexedSampleList}. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class IndexedSampleListUnitTest { + + @Test + public void testEmptyConstructor() { + final IndexedSampleList subject = new IndexedSampleList(); + assertSampleList(subject, Collections.EMPTY_LIST); + } + + @Test(dataProvider="sampleCountMaxSampleIndexData") + public void testArrayConstructor(final int sampleCount, final int maxSampleIndex) { + final String[] sampleNames = generateSampleNames(sampleCount,maxSampleIndex); + + final LinkedHashSet nonRepeatedNames = new LinkedHashSet<>(Arrays.asList(sampleNames)); + final IndexedSampleList subject = new IndexedSampleList(sampleNames); + assertSampleList(subject, Arrays.asList(nonRepeatedNames.toArray(new String[nonRepeatedNames.size()]))); + } + + @Test(dataProvider="sampleCountMaxSampleIndexData") + public void testCollectionConstructor(final int sampleCount, final int maxSampleIndex) { + final String[] sampleNames = generateSampleNames(sampleCount,maxSampleIndex); + + final List sampleNameList = Arrays.asList(sampleNames); + final LinkedHashSet nonRepeatedNames = new LinkedHashSet<>(Arrays.asList(sampleNames)); + final IndexedSampleList subject = new IndexedSampleList(sampleNameList); + assertSampleList(subject, Arrays.asList(nonRepeatedNames.toArray(new String[nonRepeatedNames.size()]))); + } + + /** + * Generate testing sample names. + * + *

+ * Basically all have a common prefix "SAMPLE_" followed by a numeric index. + *

+ * + *

+ * With {@code maxSampleIndex} you can force to have some repeated sample names; + * (if {@code sampleCount < maxSampleIndex}. + *

+ * + * @param sampleCount number of sample names to generate. + * @param maxSampleIndex the maximum sample numeric index. + * + * @throws RuntimeException if {@code sampleCount} or {@code maxSampleIndex} are negative. + * @return never {@code null}. + */ + private String[] generateSampleNames(final int sampleCount, final int maxSampleIndex) { + final String[] result = new String[sampleCount]; + for (int i = 0; i < sampleCount; i++) + result[i] = "SAMPLE_" + rnd.nextInt(maxSampleIndex + 1); + return result; + } + + private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20}; + + private static final int[] MAX_SAMPLE_INDEX = { 0, 1, 4, 9, 10000}; + + private static final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + + @DataProvider(name="sampleCountMaxSampleIndexData") + public Object[][] sampleCountMaxSampleIndexData() { + final Object[][] result = new Object[SAMPLE_COUNT.length * MAX_SAMPLE_INDEX.length][]; + int nextIndex = 0; + for (int i = 0; i < SAMPLE_COUNT.length; i++) + for (int j = 0; j < MAX_SAMPLE_INDEX.length; j++) + result[nextIndex++] = new Object[] { SAMPLE_COUNT[i], MAX_SAMPLE_INDEX[j]}; + return result; + } + + + +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModelUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModelUnitTest.java new file mode 100644 index 000000000..af4b37b18 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/InfiniteRandomMatingPopulationModelUnitTest.java @@ -0,0 +1,145 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.GenotypeLikelihoods; +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.List; +import java.util.Random; + +/** + * Test {@link InfiniteRandomMatingPopulationModel} + */ +public class InfiniteRandomMatingPopulationModelUnitTest { + + @Test(dataProvider="ploidyAndMaximumAlleleAndReadCountsData") + public void testCalculateLikelihoods(final int[] ploidies, final int alleleCount, final int discardAlleleCount, final int[] readCounts) { + final ReadLikelihoods likelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount,readCounts); + final AlleleList genotypingAlleleList = discardAlleleCount == 0 ? likelihoods : discardAllelesAtRandom(likelihoods,discardAlleleCount); + final SampleList sampleList = SampleListUnitTester.sampleList(ploidies.length); + final PloidyModel ploidyModel = new HeterogeneousPloidyModel(sampleList,ploidies); + final GenotypingData data = new GenotypingData<>(ploidyModel,likelihoods); + final InfiniteRandomMatingPopulationModel model = new InfiniteRandomMatingPopulationModel(); + final GenotypingLikelihoods gLikelihoods = model.calculateLikelihoods(genotypingAlleleList,data); + Assert.assertNotNull(gLikelihoods); + AlleleListUnitTester.assertAlleleList(gLikelihoods, AlleleListUtils.asList(genotypingAlleleList)); + SampleListUnitTester.assertSampleList(gLikelihoods,SampleListUtils.asList(sampleList)); + final int sampleCount = gLikelihoods.sampleCount(); + for (int i = 0; i < sampleCount; i++) { + final GenotypeLikelihoods sampleLikelihoods = gLikelihoods.sampleLikelihoods(i); + Assert.assertNotNull(sampleLikelihoods); + final double[] values = sampleLikelihoods.getAsVector(); + Assert.assertNotNull(values); + Assert.assertEquals(values.length, GenotypeLikelihoodCalculators.getInstance(ploidies[i], genotypingAlleleList.alleleCount()).genotypeCount()); + for (int j = 0; j < values.length; j++) + Assert.assertTrue(values[j] <= 0); + } + } + + private AlleleList discardAllelesAtRandom(final AlleleList likelihoods, final int discardAlleleCount) { + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + final ArrayList subset = new ArrayList<>(AlleleListUtils.asList(likelihoods)); + for (int i = 0; i < discardAlleleCount; i++) { + subset.remove(rnd.nextInt(subset.size())); + } + return new IndexedAlleleList<>(subset); + } + + /** + * Each entry contains to value, where the first is the total number of alleles and the second + * The number to discard some arbitrary number of alleles for genotyping for the {@link #testCalculateLikelihoods}. + */ + private static final int[][] ALLELE_COUNTS = new int[][] { + {1, 0}, + {2, 1}, + {5, 2}, + {10, 4}, + {1, 0}, + {2, 1}, + {10, 7} + }; + + private static final int[][] PLOIDIES = new int[][]{ + {1, 1, 1, 1}, + {1, 2, 3, 4}, + {2, 2, 2, 2}, + {2, 1, 2, 1}, + {1}, + {2}, + {}, + }; + + + private static final int[][] READ_COUNTS = new int[][] { + { 10 , 100, 50, 20 }, + { 0, 100, 10, 1 }, + { 1, 2, 3, 4 }, + { 10, 20, 50, 40 }, + { 10 }, + { 20 }, + { } + }; + + @DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData") + public Object[][] ploidyAndMaximumAlleleAndReadCountsData() { + final List result = new ArrayList<>(PLOIDIES.length * 2); + for (int i = 0; i < PLOIDIES.length; i++) { + result.add(new Object[] {PLOIDIES[i], ALLELE_COUNTS[i][0], 0, READ_COUNTS[i]}); + final int discardAlleleCount = ALLELE_COUNTS[i][1]; + if (discardAlleleCount == 0) continue; + result.add(new Object[] { PLOIDIES[i], ALLELE_COUNTS[i][0], ALLELE_COUNTS[i][1], READ_COUNTS[i]}); + } + return result.toArray(new Object[0][]); + } + +} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/ReadLikelihoodsUnitTester.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/ReadLikelihoodsUnitTester.java new file mode 100644 index 000000000..08bfd9f60 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/ReadLikelihoodsUnitTester.java @@ -0,0 +1,124 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + + +import htsjdk.samtools.SAMFileHeader; +import htsjdk.variant.variantcontext.Allele; +import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods; +import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; +import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Constains utilities for tests that need to create read-likelihoods. + */ +public class ReadLikelihoodsUnitTester { + + + static ReadLikelihoods readLikelihoods(final int alleleCount, final int[] readCount) { + final int sampleCount = readCount.length; + final AlleleList alleleList = AlleleListUnitTester.alleleList(alleleCount,100,true); + final SampleList sampleList = SampleListUnitTester.sampleList(sampleCount); + final Map> sampleToReads = new HashMap<>(sampleCount); + for (int i = 0; i < sampleCount; i++) { + sampleToReads.put(sampleList.sampleAt(i),readList(i,readCount[i])); + } + final ReadLikelihoods likelihoods = new ReadLikelihoods<>(sampleList,alleleList, sampleToReads); + for (int s = 0; s < sampleCount; s++) { + final ReadLikelihoods.Matrix sampleLikelihoods = likelihoods.sampleMatrix(s); + for (int a = 0; a < alleleCount; a++) + for (int r = 0; r < readCount[s]; r++) + sampleLikelihoods.set(a, r, testLikelihood(s, a, r)); + } + return likelihoods; + } + + /** + * produces a test likelihood depending on the sample, read and allele index. + */ + private static double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) { + return - Math.abs(3 * (sampleIndex + 1) + 7 * (alleleIndex + 1) + 11 * (readIndex + 1)); + } + + + private static SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 1000); + + + static List readList(final int sampleIndex, final int readCount) { + final List reads = new ArrayList<>(readCount); + int readIndex = 0; + for (int j = 0; j < readCount; j++) + reads.add(ArtificialSAMUtils.createArtificialRead(SAM_HEADER, "READ_" + sampleIndex + "_" + (readIndex++), 1, 1, 100)); + return reads; + } + + + /** + * Creates a sampleToReads map given the sample list and the required read counts. + * @param sampleList the target sample-list. + * @param readCounts the target read-counts. + * @return never {@code null}. + */ + public static Map> sampleToReads(final SampleList sampleList, final int[] readCounts) { + final Map> result = new HashMap<>(sampleList.sampleCount()); + int readIndex = 0; + for (int i = 0; i < sampleList.sampleCount(); i++) { + final int readCount = readCounts[i]; + final String sample = sampleList.sampleAt(i); + final List records = new ArrayList<>(readCount); + for (int j = 0; j < readCount; j++) + records.add(ArtificialSAMUtils.createArtificialRead(SAM_HEADER,"READ_" + (readIndex++),1,1,100)); + result.put(sample,records); + } + return result; + } + +} \ No newline at end of file diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/SampleListUnitTester.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/SampleListUnitTester.java new file mode 100644 index 000000000..9bca352d2 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/SampleListUnitTester.java @@ -0,0 +1,120 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import org.testng.Assert; + +import java.util.*; + +/** + * Helper class for those unit-test classes that test on implementations of SampleList. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class SampleListUnitTester { + + /** + * Test that the contents of a sample-list are the ones expected. + * + *

+ * This method perform various consistency check involving all the {@link SampleList} interface methods. + * Therefore calling this method is equivalent to a thorough check of the {@link SampleList} aspect of + * the {@code actual} argument. + *

+ * + * @param actual the sample-list to assess. + * @param expected the expected sample-list. + * + * @throws IllegalArgumentException if {@code expected} is {@code null} or contains + * {@code null}s which is an indication of an bug in the testing code. + * + * @throws java.lang.RuntimeException if there is some testing assertion exception which + * is an indication of an actual bug the code that is been tested. + */ + public static void assertSampleList(final SampleList actual, final List expected) { + if (expected == null) + throw new IllegalArgumentException("the expected list cannot be null"); + final Set expectedNames = new HashSet<>(expected.size()); + Assert.assertNotNull(actual); + Assert.assertEquals(actual.sampleCount(),expected.size()); + for (int i = 0; i < expected.size(); i++) { + final String expectedSample = expected.get(i); + if (expectedSample == null) + throw new IllegalArgumentException("the expected sample cannot be null"); + if (expectedSample.equals(NEVER_USE_SAMPLE_NAME)) + throw new IllegalArgumentException("you cannot use the forbidden sample name"); + if (expectedNames.contains(expected.get(i))) + throw new IllegalArgumentException("repeated names in the expected list, this is a test bug"); + final String actualSample = actual.sampleAt(i); + Assert.assertNotNull(actualSample,"sample name cannot be null"); + Assert.assertFalse(expectedNames.contains(actualSample),"repeated sample name: " + actualSample); + Assert.assertEquals(actualSample,expectedSample,"wrong sample name order; index = " + i); + Assert.assertEquals(actual.sampleIndex(actualSample),i,"sample index mismatch"); + expectedNames.add(actualSample); + } + + Assert.assertEquals(actual.sampleIndex(NEVER_USE_SAMPLE_NAME),-1); + } + + /** + * Creates a sample list for testing given the number of samples in it. + * @param sampleCount the required sample count. + * @return never {@code null}. + */ + static SampleList sampleList(final int sampleCount) { + if (sampleCount < 0) + throw new IllegalArgumentException("the number of sample cannot be negative"); + final List result = new ArrayList<>(sampleCount); + for (int i =0; i < sampleCount; i++) + result.add("SAMPLE_" + i); + return new IndexedSampleList(result); + } + + /** + * Save to assume that this sample name will never be used. + */ + private static final String NEVER_USE_SAMPLE_NAME = "WHY_WOULD_YOU_CALL_A_SAMPLE_LIKE_THIS? ArE yOu Crazzzzy? " + new Date().toString(); +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/SampleListUtilsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/SampleListUtilsUnitTest.java new file mode 100644 index 000000000..71da45838 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/genotyping/SampleListUtilsUnitTest.java @@ -0,0 +1,126 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.genotyping; + +import htsjdk.variant.variantcontext.Allele; +import org.testng.Assert; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Test {@link AlleleListUtils}. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class SampleListUtilsUnitTest { + + @Test(dataProvider = "singleSampleListData") + public void testAsList(final List samples) { + final SampleList sampleList = new IndexedSampleList(samples); + final List asList = SampleListUtils.asList(sampleList); + Assert.assertEquals(samples, asList); + } + + @Test(dataProvider = "twoSampleListData", dependsOnMethods={"testAsList"}) + public void testEquals(final List sample2, final List samples2) { + final SampleList sampleList1 = new IndexedSampleList(sample2); + final SampleList sampleList2 = new IndexedSampleList(samples2); + Assert.assertTrue(SampleListUtils.equals(sampleList1, sampleList1)); + Assert.assertTrue(SampleListUtils.equals(sampleList2,sampleList2)); + Assert.assertEquals(SampleListUtils.equals(sampleList1, sampleList2), + Arrays.equals(SampleListUtils.asList(sampleList1).toArray(new String[sampleList1.sampleCount()]), + SampleListUtils.asList(sampleList2).toArray(new String[sampleList2.sampleCount()])) + ); + Assert.assertEquals(SampleListUtils.equals(sampleList1,sampleList2), + SampleListUtils.equals(sampleList2,sampleList1)); + } + + private List[] sampleLists; + + @BeforeClass + public void setUp() { + sampleLists = new List[SAMPLE_COUNT.length]; + int nextIndex = 0; + for (int i = 0; i < SAMPLE_COUNT.length; i++) { + final List sampleList = new ArrayList<>(SAMPLE_COUNT[i]); + sampleList.add("SAMPLE_" + i); + sampleLists[nextIndex++] = sampleList; + } + } + + private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20}; + + + @DataProvider(name="singleSampleListData") + public Object[][] singleSampleListData() { + final Object[][] result = new Object[sampleLists.length][]; + for (int i = 0; i < sampleLists.length; i++) + result[i] = new Object[] { sampleLists[i]}; + return result; + } + + @DataProvider(name="twoSampleListData") + public Object[][] twoAlleleListData() { + final Object[][] result = new Object[sampleLists.length * sampleLists.length][]; + int index = 0; + for (int i = 0; i < sampleLists.length; i++) + for (int j = 0; j < sampleLists.length; j++) + result[index++] = new Object[] { sampleLists[i], sampleLists[j]}; + return result; + } + + + + + + + +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java index 51f1e04a2..335c355d2 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperEngineUnitTest.java @@ -80,7 +80,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest { final UnifiedArgumentCollection args = new UnifiedArgumentCollection(); final SampleList fakeSamples = SampleListUtils.singletonList("fake"); - ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples); + ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(),engine.getArguments().BAQMode); } private UnifiedGenotypingEngine getEngine() { @@ -89,7 +89,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest { @DataProvider(name = "ReferenceQualityCalculation") public Object[][] makeReferenceQualityCalculation() { - List tests = new ArrayList(); + final List tests = new ArrayList<>(); // this functionality can be adapted to provide input data for whatever you might want in your data final double p = Math.log10(0.5); @@ -116,7 +116,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest { for ( Integer numAltAlleles = 0; numAltAlleles < 100; numAltAlleles++ ) { - Set alleles = new HashSet(); + final Set alleles = new HashSet<>(); alleles.add(Allele.create("A", true)); // ref allele for (int len = 1; len <=numAltAlleles; len++) { diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java index 7ac0c86df..0437eb375 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite1IntegrationTest.java @@ -69,12 +69,12 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe @Test(enabled = true) public void testBOTH_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "05b8af0db7b009721df209eea96bdf1a"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "4b646b6fc9c5c2ef88433a5b350310fe"); } @Test(enabled = true) public void testINDEL_GGA_Pools() { - executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "1ac510860b295d66e1da7b27ba7cafb8"); + executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "171355e4d0648fdd50d7d56de950d338"); } @Test(enabled = true) diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java index 6d95098fe..bd1daa714 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/genotyper/UnifiedGenotyperGeneralPloidySuite2IntegrationTest.java @@ -68,6 +68,6 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe @Test(enabled = true) public void testMT_SNP_GGA_sp10() { - executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "654059dda19cb2cf546097e44753ea14"); + executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "0f6fdf60d7f93b2db8c8cb92c1fd3e00"); } } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 14924bdc3..b0c157d82 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -47,18 +47,36 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller; import org.broadinstitute.gatk.engine.walkers.WalkerTest; -import org.broadinstitute.gatk.utils.collections.Pair; import org.broadinstitute.gatk.utils.exceptions.UserException; import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.File; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { + + @DataProvider(name = "MyDataProviderHaploid") + public Object[][] makeMyDataProviderHaploid() { + List tests = new ArrayList<>(); + + final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000"; + final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; + + // this functionality can be adapted to provide input data for whatever you might want in your data + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "5cc1858896aca6683282f53054bb7a61"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "010a747f5c41ddb7889168e499eb40bb"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d7dbc1c8e11a277e9db857eb766fd2c6"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "799752d88c4e15e19a953add764d2239"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "fa057b35d6fe9588c2653b6560d6e3c2"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "d10e8907594414890cbf80d282426812"}); + + return tests.toArray(new Object[][]{}); + } + + @DataProvider(name = "MyDataProvider") public Object[][] makeMyDataProvider() { List tests = new ArrayList<>(); @@ -77,6 +95,24 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { return tests.toArray(new Object[][]{}); } + @DataProvider(name = "MyDataProviderTetraploid") + public Object[][] makeMyDataProviderTetraploid() { + List tests = new ArrayList<>(); + + final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000"; + final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals; + + // this functionality can be adapted to provide input data for whatever you might want in your data + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "6e157b6fdf4071fcb7da74f40146a611"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "354b84dbfaf55947aea40865e74ce66b"}); + tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fc4b7e6528747cb20e0c92699a0787cb"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6e0f5d82b77ea79a639d43b2db70e751"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a3daf472f7ab16667e5f6dab1af392ff"}); + tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "af9230fa56752b732572ce956101a2be"}); + + return tests.toArray(new Object[][]{}); + } + /** * Example testng test using MyDataProvider */ @@ -86,7 +122,31 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); - final Pair,List> executionOutput = executeTest(name, spec); + executeTest(name, spec); + } + + /** + * Example testng test using MyDataProvider + */ + @Test(dataProvider = "MyDataProviderHaploid", enabled=false) + public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) { + final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", + b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final String name = "testHCWithGVCFHaploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); + executeTest(name, spec); + } + + /** + * Example testng test using MyDataProvider + */ + @Test(dataProvider = "MyDataProviderTetraploid", enabled=false) + public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) { + final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d", + b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final String name = "testHCWithGVCFTetraploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode; + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5)); + executeTest(name, spec); } @Test @@ -144,6 +204,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { private static final String NOCALL_GVCF_BUGFIX_INTERVALS = privateTestDir + "gvcf_nocall_bug.interval_list"; private static final String NOCALL_GVCF_BUGFIX_BAM = privateTestDir + "gvcf_nocall_bug.bam"; + private static final String GENERAL_PLOIDY_BUGFIX1_BAM = privateTestDir + "general-ploidy-arrayindex-bug-1.bam"; + private static final String GENERAL_PLOIDY_BUGFIX1_INTERVALS = privateTestDir + "general-ploidy-arrayindex-bug-1.intervals"; + private static final String GENERAL_PLOIDY_BUGFIX2_BAM = privateTestDir + "general-ploidy-arrayindex-bug-2.bam"; + private static final String GENERAL_PLOIDY_BUGFIX2_INTERVALS = privateTestDir + "general-ploidy-arrayindex-bug-2.intervals"; + @Test public void testNoCallGVCFMissingPLsBugFix() { @@ -153,4 +218,23 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { spec.disableShadowBCF(); executeTest("testNoCallGVCFMissingPLsBugFix", spec); } + + @Test(enabled=false) + public void testGeneralPloidyArrayIndexBug1Fix() { + final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189", + b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce")); + spec.disableShadowBCF(); + executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec); + } + + @Test(enabled=false) + public void testGeneralPloidyArrayIndexBug2Fix() { + final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937", + b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce")); + spec.disableShadowBCF(); + executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec); + } + } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 84c67130c..37c9cbe02 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -93,24 +93,55 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { HCTest(NA12878_BAM, "", "42de756c08b028be70287ada1022526e"); } + @Test + public void testHaplotypeCallerMultiSampleHaploid() { + HCTest(CEUTRIO_BAM, + "-ploidy 1", "b9e43506af628768fc9fd1ced49822b1"); + } + + @Test + public void testHaplotypeCallerSingleSampleHaploid() { + HCTest(NA12878_BAM, "-ploidy 1", "fb584b8c3f371ee2e438a3fc2335b26f"); + } + + @Test + public void testHaplotypeCallerSingleSampleTetraploid() { + HCTest(NA12878_BAM, "-ploidy 4", "d450b486c76520f9c803c603f25563e4"); + } + @Test public void testHaplotypeCallerMinBaseQuality() { HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46"); } + @Test + public void testHaplotypeCallerMinBaseQualityHaploid() { + HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "40259040f6febd8ea5931132cf5d8958"); + } + + @Test + public void testHaplotypeCallerMinBaseQualityTetraploid() { + HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "ca11eae5def67ca9717d129348e4cda7"); + } + @Test public void testHaplotypeCallerGraphBasedSingleSample() { HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "6cf15ddbfa4a3738e891fd9a09da8d07"); } + @Test + public void testHaplotypeCallerGraphBasedMultiSampleHaploid() { + HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "f0677e5a2882f947f437e8d2049172cb"); + } + @Test public void testHaplotypeCallerGraphBasedMultiSample() { HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "4c2a2dad6379b13fee4c7faca17441f5"); } - @Test(enabled = false) // can't annotate the rsID's yet + @Test public void testHaplotypeCallerSingleSampleWithDbsnp() { - HCTest(NA12878_BAM, "-D " + b37dbSNP132, ""); + HCTest(NA12878_BAM, "-D " + b37dbSNP132, "9d7067648561aa35b04d355184a5dea2"); } @Test @@ -120,6 +151,18 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { "669aac2aa9c22881eda86ee53b13351a"); } + @Test + public void testHaplotypeCallerMultiSampleGGAHaploid() { + HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", + "e50c55c65db3fa55c75ba03b4dd2f1a8"); + } + + @Test + public void testHaplotypeCallerMultiSampleGGATetraploid() { + HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", + "374d6db6e5f3f4fdb5ede26a529caa8b"); + } + @Test public void testHaplotypeCallerInsertionOnEdgeOfContig() { HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae"); @@ -265,7 +308,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + hg19Reference + " --no_cmdline_in_header -I " + NA12878_PCRFREE250_ADAPTER_TRIMMED + " -o %s -L 20:10,024,000-10,024,500 " , 1, Arrays.asList("")); - executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec); + executeTest("HCTestGraphBasedPCRFreePositiveLogLkFix", spec); } // -------------------------------------------------------------------------------------------------------------- @@ -346,5 +389,4 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec); } - } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java index 7c2cd8727..f268ce535 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/haplotypecaller/ReferenceConfidenceModelUnitTest.java @@ -300,6 +300,9 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest { @Test public void testRefConfidencePartialReads() { + + final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2); + final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel(); final String ref = "ACGTAACCGGTT"; for ( int readLen = 3; readLen < ref.length(); readLen++ ) { for ( int start = 0; start < ref.length() - readLen; start++ ) { @@ -307,8 +310,6 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest { final List haplotypes = Arrays.asList(data.getRefHap()); final List calls = Collections.emptyList(); - final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2); - final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel(); data.getActiveRegion().add(data.makeRead(start, readLen)); final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion()); @@ -326,6 +327,9 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest { final int start = xxxdata.getStart(); final int stop = xxxdata.getEnd(); + final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2); + final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel(); + for ( int nReads = 0; nReads < 2; nReads++ ) { final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C")); @@ -347,8 +351,6 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest { final ReadLikelihoods likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion()); - final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,HomoSapiensConstants.DEFAULT_PLOIDY); - final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel(); final List expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads); final List contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls); checkReferenceModelResult(data, contexts, expectedDPs, calls); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/RandomDNA.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/RandomDNA.java index cdcd63427..5b58fbcbe 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/RandomDNA.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/RandomDNA.java @@ -56,7 +56,8 @@ import java.util.Random; * * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ -public class RandomDNA { +public class + RandomDNA { private Random random; @@ -73,6 +74,19 @@ public class RandomDNA { random = new Random(); } + + /** + * Creates a new random DNA generator given a random number generator. + * @param rnd the underlying random number generator. + * + * @throws IllegalArgumentException if {@code rnd} is {@code null}. + */ + public RandomDNA(final Random rnd) { + if (rnd == null) + throw new IllegalArgumentException("the random number generator cannot be null"); + random = rnd; + } + /** * Constructs a new random DNA generator providing a seed. * diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/collections/IndexedSetUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/collections/IndexedSetUnitTest.java new file mode 100644 index 000000000..99d4855d0 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/collections/IndexedSetUnitTest.java @@ -0,0 +1,281 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.gatk.utils.collections; + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +/** + * Tests the working of {@link IndexedSet} + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class IndexedSetUnitTest { + + @Test(dataProvider = "initialCapacityElementCountMaxElementData") + public void testCompositionBySingleElementAddition(final int initialCapacity, + final int elementCount, final int maxElement) { + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + final IndexedSet subject = new IndexedSet<>(initialCapacity); + + final Set elementSet = new LinkedHashSet<>(); + + for (int i = 0; i < elementCount; i++) { + final int nextElement = rnd.nextInt(maxElement + 1); + final boolean isNewElement = ! elementSet.contains(nextElement); + Assert.assertEquals(subject.add(nextElement), elementSet.add(nextElement)); + Assert.assertEquals(subject.size(),elementSet.size()); + if (isNewElement) + Assert.assertEquals(subject.indexOf(nextElement),elementSet.size() - 1); + } + assertEquals(subject, elementSet); + } + + @Test(dataProvider = "initialCapacityElementCountMaxElementData") + public void testCompositionByCollectionAddition(final int initialCapacity, + final int elementCount, final int maxElement) { + final IndexedSet subject = new IndexedSet<>(initialCapacity); + final List elementList = generateElementCollection(elementCount,maxElement); + + + Assert.assertEquals(subject.addAll(elementList), !elementList.isEmpty()); + + final Set elementSet = new LinkedHashSet<>(elementCount); + elementSet.addAll(elementList); + + assertEquals(subject,elementSet); + } + + @Test(dataProvider = "elementCountMaxElementData") + public void testCompositionByCollectionConstructor(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + + final IndexedSet subject = new IndexedSet<>(elementList); + + final Set elementSet = new LinkedHashSet<>(elementList); + assertEquals(subject,elementSet); + Assert.assertFalse(subject.addAll(elementList)); + } + + private List generateElementCollection(final int elementCount, final int maxElement) { + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + final List elementList = new ArrayList<>(elementCount); + for (int i = 0; i < elementCount; i++) + elementList.add(rnd.nextInt(maxElement + 1)); + return elementList; + } + + @Test(dataProvider = "elementCountMaxElementData", + dependsOnMethods = {"testCompositionByCollectionConstructor"}) + public void testLookupByIndex(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + final IndexedSet subject = new IndexedSet<>(elementList); + final Set elementSet = new LinkedHashSet<>(elementList); + final Integer[] elementArray = elementSet.toArray(new Integer[elementSet.size()]); + + final List subjectList = subject.asList(); + for (int i = 0; i < subject.size(); i++) { + final int element = elementArray[i]; + final int subjectElement = subject.get(i); + final int subjectListElement = subjectList.get(i); + Assert.assertEquals(subjectElement,element); + Assert.assertEquals(subjectListElement,element); + } + } + + @Test(dataProvider = "elementCountMaxElementData", + dependsOnMethods = {"testCompositionByCollectionConstructor"}) + public void testIndexOf(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + final IndexedSet subject = new IndexedSet<>(elementList); + final Set elementSet = new LinkedHashSet<>(elementList); + final Integer[] elementArray = elementSet.toArray(new Integer[elementSet.size()]); + + final List subjectList = subject.asList(); + for (int i = 0; i < subject.size(); i++) { + final int element = elementArray[i]; + final int listElement = subjectList.get(i); + final int subjectIndex = subject.indexOf(element); + Assert.assertEquals(listElement,element); + Assert.assertEquals(subjectIndex,i); + Assert.assertEquals(subject.indexOf(-element - 1),-1); + } + } + + @Test(dataProvider = "elementCountMaxElementData", + dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"}) + public void testRemoveHalf(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + final IndexedSet subject = new IndexedSet<>(elementList); + final Set elementSet = new LinkedHashSet<>(elementList); + final int removeCount = (subject.size() + 1) / 2; + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + for (int i = 0; i < removeCount; i++) { + final int removeIndex = rnd.nextInt(subject.size()); + final int removeElement = subject.get(removeIndex); + subject.remove(removeElement); + elementSet.remove(removeElement); + } + + assertEquals(subject,elementSet); + } + + @Test(dataProvider = "elementCountMaxElementData", + dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"}) + public void testRemoveAll(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + final IndexedSet subject = new IndexedSet<>(elementList); + final Set elementSet = new LinkedHashSet<>(elementList); + final int removeCount = subject.size(); + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + for (int i = 0; i < removeCount; i++) { + final int removeIndex = rnd.nextInt(subject.size()); + final int removeElement = subject.get(removeIndex); + subject.remove(removeElement); + elementSet.remove(removeElement); + } + + assertEquals(subject,elementSet); + } + + @Test(dataProvider = "elementCountMaxElementData", + dependsOnMethods = {"testCompositionByCollectionConstructor"}) + public void testClear(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + final IndexedSet subject = new IndexedSet<>(elementList); + final Set elementSet = new LinkedHashSet<>(elementList); + subject.clear(); + elementSet.clear(); + + assertEquals(subject, elementSet); + } + + @Test(dataProvider = "elementCountMaxElementData", + dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"}) + public void testRemoveAndAdd(final int elementCount, final int maxElement) { + final List elementList = generateElementCollection(elementCount, maxElement); + final IndexedSet subject = new IndexedSet<>(elementList); + final Set elementSet = new LinkedHashSet<>(elementList); + final int removeCount = subject.size(); + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + for (int i = 0; i < removeCount; i++) { + final int removeIndex = rnd.nextInt(subject.size()); + final int removeElement = subject.get(removeIndex); + subject.remove(removeElement); + elementSet.remove(removeElement); + } + subject.addAll(elementList); + elementSet.addAll(elementList); + + assertEquals(subject, elementSet); + } + + private final int[] INITIAL_CAPACITY = { 0, 10, 100 }; + + private final int[] ELEMENT_COUNT = { 0, 1, 10, 100 , 1000 }; + + private final int[] MAX_ELEMENT = { 0, 1, 5, 10, 50, 100, 500 }; + + @DataProvider(name="initialCapacityElementCountMaxElementData") + public Object[][] initialCapacityElementCountMaxElementData() { + final Object[][] result = new Object[INITIAL_CAPACITY.length * ELEMENT_COUNT.length * MAX_ELEMENT.length][]; + + int nextIndex = 0; + + for (int i = 0; i < INITIAL_CAPACITY.length; i++) + for (int j = 0; j < ELEMENT_COUNT.length; j++) + for (int k = 0; k < MAX_ELEMENT.length; k++) + result[nextIndex++] = new Object[] { INITIAL_CAPACITY[i], ELEMENT_COUNT[j], MAX_ELEMENT[k]}; + + return result; + } + + @DataProvider(name="elementCountMaxElementData") + public Object[][] elementCountMaxElementData() { + final Object[][] result = new Object[ELEMENT_COUNT.length * MAX_ELEMENT.length][]; + + int nextIndex = 0; + + for (int j = 0; j < ELEMENT_COUNT.length; j++) + for (int k = 0; k < MAX_ELEMENT.length; k++) + result[nextIndex++] = new Object[] { ELEMENT_COUNT[j], MAX_ELEMENT[k]}; + + return result; + } + + /** + * Asserts that an indexed-set is equivalent to a insertion-sorted set provided. + * @param subject the indexed-set to test. + * @param elementSet the insertion-sorted set. + */ + private void assertEquals(final IndexedSet subject, final Set elementSet) { + Assert.assertEquals(subject.size(), elementSet.size()); + final List subjectList = subject.asList(); + Assert.assertEquals(subjectList.size(),elementSet.size()); + final Iterator subjectIterator = subject.iterator(); + final Iterator elementSetIterator = subject.iterator(); + + final ListIterator subjectListIterator = subjectList.listIterator(); + + while (subjectIterator.hasNext()) { + Assert.assertTrue(elementSetIterator.hasNext(),"less elements in indexed-set than in the equivalent hash-set"); + Assert.assertTrue(subjectListIterator.hasNext()); + + final Integer nextElement; + Assert.assertEquals(nextElement = subjectIterator.next(),elementSetIterator.next(),"elements in indexed-set do not follow the same order as equivalent linked hash-set's"); + Assert.assertEquals(subjectListIterator.next(),nextElement); + Assert.assertEquals(subject.indexOf(nextElement),subjectListIterator.previousIndex()); + } + Assert.assertFalse(elementSetIterator.hasNext()); + Assert.assertFalse(subjectListIterator.hasNext()); + } +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/collections/IntMaxHeapUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/collections/IntMaxHeapUnitTest.java new file mode 100644 index 000000000..986df35e0 --- /dev/null +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/collections/IntMaxHeapUnitTest.java @@ -0,0 +1,171 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ +package org.broadinstitute.gatk.utils.collections; + +import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Random; + +/** + * Tests {@link IntMaxHeap}. + * + * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> + */ +public class IntMaxHeapUnitTest { + + @Test(dataProvider = "capacityData") + public void testCapacity(final int initialCapacity, final int elementCount) { + + final IntMaxHeap heap = new IntMaxHeap(initialCapacity); + + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + for (int i = 0; i < elementCount; i++) { + final int v = rnd.nextInt(); + heap.add(v); + } + } + + @Test(dataProvider = "capacityData",dependsOnMethods = {"testCapacity"}) + public void testEmptynessAndSize(final int initialCapacity, final int elementCount) { + final IntMaxHeap heap = new IntMaxHeap(initialCapacity); + + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + Assert.assertEquals(heap.size(),0); + Assert.assertTrue(heap.isEmpty()); + for (int i = 0; i < elementCount; i++) { + final int v = rnd.nextInt(); + heap.add(v); + Assert.assertEquals(heap.size(),i+1); + Assert.assertFalse(heap.isEmpty()); + } + } + + @Test(dataProvider = "capacityData", dependsOnMethods = {"testEmptynessAndSize"}) + public void testClear(final int initialCapacity, final int elementCount) { + final IntMaxHeap heap = new IntMaxHeap(initialCapacity); + + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + for (int i = 0; i < elementCount; i++) { + final int v = rnd.nextInt(); + heap.add(v); + } + heap.clear(); + Assert.assertEquals(heap.size(),0); + Assert.assertTrue(heap.isEmpty()); + } + + @Test(dataProvider = "capacityData", dependsOnMethods = {"testCapacity"}) + public void testAddArray(final int initialCapacity, final int elementCount) { + + final IntMaxHeap addHeap = new IntMaxHeap(initialCapacity); + final IntMaxHeap arrayAddHeap = new IntMaxHeap(initialCapacity); + + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + final int[] values = new int[elementCount]; + for (int i = 0; i < elementCount; i++) { + final int v = rnd.nextInt(); + values[i] = v; + addHeap.add(v); + } + arrayAddHeap.add(values); + Assert.assertEquals(arrayAddHeap.size(),addHeap.size()); + while (!arrayAddHeap.isEmpty()) + Assert.assertEquals(arrayAddHeap.remove(),addHeap.remove()); + } + + @Test(dataProvider = "capacityData", dependsOnMethods = {"testEmptynessAndSize"}) + public void testRemove(final int initialCapacity, final int elementCount) { + final IntMaxHeap heap = new IntMaxHeap(initialCapacity); + + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + final List values = new ArrayList<>(elementCount); + for (int i = 0; i < elementCount; i++) { + final int v = rnd.nextInt(); + values.add(v); + heap.add(v); + } + + Collections.sort(values, Collections.reverseOrder()); + for (int i = 0; i < elementCount; i++) { + Assert.assertEquals(heap.remove(),(int)values.get(i), "element-count = " + elementCount + ", initial-capacity = " + initialCapacity); + Assert.assertEquals(heap.size(),elementCount - i - 1); + } + } + + @Test(dataProvider = "capacityData", dependsOnMethods = {"testCapacity"}) + public void testPeek(final int initialCapacity, final int elementCount) { + final IntMaxHeap heap = new IntMaxHeap(initialCapacity); + + final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + int top = rnd.nextInt(); + heap.add(top); + Assert.assertEquals(heap.peek(),top); + for (int i = 1; i < elementCount; i++) { + final int v = rnd.nextInt(); + if (v > top) top = v; + heap.add(v); + Assert.assertEquals(heap.peek(),top); + } + } + + @DataProvider(name="capacityData") + public Object[][] capacityData() { + return new Object[][] { + {0,100}, {1,113}, {20,301} + }; + } + +} diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java index 33e1a4758..65fc43579 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/genotyper/ReadLikelihoodsUnitTest.java @@ -48,14 +48,15 @@ package org.broadinstitute.gatk.utils.genotyper; import htsjdk.samtools.SAMFileHeader; import htsjdk.variant.variantcontext.Allele; import org.broadinstitute.gatk.engine.GenomeAnalysisEngine; -import org.broadinstitute.gatk.genotyping.IndexedAlleleList; -import org.broadinstitute.gatk.genotyping.IndexedSampleList; +import org.broadinstitute.gatk.genotyping.*; import org.broadinstitute.gatk.utils.GenomeLoc; import org.broadinstitute.gatk.utils.GenomeLocParser; +import org.broadinstitute.gatk.utils.MathUtils; import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils; import org.broadinstitute.gatk.utils.sam.GATKSAMRecord; import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils; import org.testng.Assert; +import org.testng.SkipException; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -410,7 +411,7 @@ public class ReadLikelihoodsUnitTest // We add a single missing. result.addMissingAlleles(Arrays.asList(newOne = Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-12345.6); - Assert.assertEquals(original.alleleCount() + 1, result.alleleCount()); + Assert.assertEquals(result.alleleCount(), original.alleleCount() + 1); // We add too more amongst exisisting alleles: result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.alleleAt(1), @@ -479,9 +480,9 @@ public class ReadLikelihoodsUnitTest final int alleleCount = result.alleleCount(); Assert.assertEquals(result.alleleCount(), alleleCount); for (int a = 0; a < alleleCount; a++) { - Assert.assertEquals(result.sampleReadCount(0),sampleReadCount); + Assert.assertEquals(result.sampleReadCount(sampleIndex),sampleReadCount); for (int r = 0; r < sampleReadCount; r++) - Assert.assertEquals(result.sampleMatrix(0).get(a,r), + Assert.assertEquals(result.sampleMatrix(sampleIndex).get(a,r), likelihoods == null ? 0.0 : likelihoods[sampleIndex][a][r], EPSILON); } } @@ -541,7 +542,7 @@ public class ReadLikelihoodsUnitTest final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); final Object[][] result = new Object[SAMPLE_SETS.length * ALLELE_SETS.length * ALLELE_SETS.length][]; int nextIndex = 0; - for (int s = 0; s < SAMPLE_SETS.length; s++) + for (int s = 0; s < SAMPLE_SETS.length; s++) { for (int a = 0; a < ALLELE_SETS.length; a++) { for (int b = 0; b < ALLELE_SETS.length; b++) { if (ALLELE_SETS[b].length < ALLELE_SETS[a].length) @@ -550,6 +551,7 @@ public class ReadLikelihoodsUnitTest }; } } + } return Arrays.copyOf(result,nextIndex); }catch (final Throwable e) { throw new RuntimeException(e); @@ -590,9 +592,6 @@ public class ReadLikelihoodsUnitTest } } - final SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(); - final GenomeLocParser locParser = new GenomeLocParser(SAM_HEADER.getSequenceDictionary()); - private Map> dataSetReads(final String[] samples, final Random rnd) { final Map> result = new HashMap<>(samples.length); @@ -608,4 +607,245 @@ public class ReadLikelihoodsUnitTest } return result; } + + @Test(dataProvider="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference") + public void testInstantiationAndBasicQueries(final int[] readCounts, final int alleleCount, final boolean hasReference) { + final SampleList sampleList = sampleList(readCounts); + + final AlleleList alleleList = alleleList(alleleCount,hasReference); + final Map> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList, readCounts); + final ReadLikelihoods subject = new ReadLikelihoods<>(sampleList,alleleList,sampleToReads); + + AlleleListUnitTester.assertAlleleList(subject,AlleleListUtils.asList(alleleList)); + SampleListUnitTester.assertSampleList(subject,SampleListUtils.asList(sampleList)); + + if (hasReference) { + final int referenceIndex = AlleleListUtils.indexOfReference(alleleList); + Assert.assertTrue(referenceIndex >= 0); + Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),referenceIndex); + } else { + Assert.assertEquals(AlleleListUtils.indexOfReference(subject), -1); + } + + testLikelihoodMatrixQueries(alleleList, sampleList, sampleToReads, subject); + testAlleleQueries(alleleList, subject); + testSampleQueries(sampleList, sampleToReads, subject); + } + + @Test(dataProvider="readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference") + public void testLikelihoodWriting(final int[] readCounts, final int alleleCount, final boolean hasReference) { + final SampleList sampleList = sampleList(readCounts); + + final AlleleList alleleList = alleleList(alleleCount,hasReference); + final Map> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList,readCounts); + final ReadLikelihoods subject = new ReadLikelihoods<>(sampleList,alleleList,sampleToReads); + + final int sampleCount = readCounts.length; + int totalLikelihoodsSet = 0; + int expectedLikelihoodsSet = 0; + for (int s = 0; s < sampleCount; s++) { + expectedLikelihoodsSet += readCounts[s] * alleleCount; + final ReadLikelihoods.Matrix matrix = subject.sampleMatrix(s); + final int readCount = matrix.readCount(); + for (int a = 0; a < alleleCount; a++) + for (int r = 0; r < readCount; r++) { + final double likelihood = testLikelihood(s, a, r); + Assert.assertNotEquals(likelihood,0); //Paranoia + totalLikelihoodsSet++; + matrix.set(a,r,likelihood); + Assert.assertEquals(matrix.get(a, r),likelihood); + } + + } + Assert.assertEquals(totalLikelihoodsSet,expectedLikelihoodsSet); + } + + @Test(dependsOnMethods={"testLikelihoodWriting","testInstantiationAndBasicQueries"}, + dataProvider="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference") + public void testMapConversion(final int[] readCounts, final int alleleCount, final boolean hasReference) { + final SampleList sampleList = sampleList(readCounts); + + final AlleleList alleleList = alleleList(alleleCount,hasReference); + final Map> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList,readCounts); + + final Set alleleWithLikelihoodsSet = new HashSet<>(); + final Set readsWithLikelihoodsSet = new HashSet<>(); + final Map map = new HashMap<>(sampleList.sampleCount()); + final int sampleCount = sampleList.sampleCount(); + for (int s = 0; s < sampleCount; s++) { + final String sample = sampleList.sampleAt(s); + final PerReadAlleleLikelihoodMap perSampleMap = new PerReadAlleleLikelihoodMap(); + final List reads = sampleToReads.get(sample); + for (int a = 0; a < alleleCount; a++) + for (int r = 0; r < reads.size(); r++) { + perSampleMap.add(reads.get(r), alleleList.alleleAt(a), testLikelihood(s, a, r)); + alleleWithLikelihoodsSet.add(alleleList.alleleAt(a)); + readsWithLikelihoodsSet.add(reads.get(r)); + } + map.put(sample,perSampleMap); + + } + + ReadLikelihoods subject = ReadLikelihoods.fromPerAlleleReadLikelihoodsMap(map); + + for (int s = 0; s < sampleCount; s++) { + final String sample = sampleList.sampleAt(s); + final int sIndex = subject.sampleIndex(sample); + Assert.assertTrue(sIndex >= 0); + Assert.assertTrue(sIndex < sampleCount); + final int sampleReadCount = sampleToReads.get(sample).size(); + final ReadLikelihoods.Matrix sampleLikelihoods = subject.sampleMatrix(sIndex); + for (int a = 0; a < alleleCount; a++) { + final Allele allele = alleleList.alleleAt(a); + final int aIndex = subject.alleleIndex(allele); + Assert.assertEquals(aIndex >= 0,alleleWithLikelihoodsSet.contains(allele)); + Assert.assertTrue(aIndex < alleleCount); + if (aIndex == -1) continue; + for (int r = 0; r < sampleReadCount; r++) { + final GATKSAMRecord read = sampleToReads.get(sample).get(r); + final int rIndex = subject.readIndex(sIndex,read); + final int rIndex2 = sampleLikelihoods.readIndex(read); + Assert.assertEquals(rIndex,rIndex2); + Assert.assertEquals(rIndex >= 0,readsWithLikelihoodsSet.contains(read)); + Assert.assertTrue(rIndex < sampleReadCount); + if (rIndex == -1) + continue; + final double likelihood = sampleLikelihoods.get(aIndex,rIndex); + Assert.assertEquals(likelihood,testLikelihood(s,a,r)); + } + } + } + } + + private double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) { + return - Math.abs(31 * (sampleIndex + 1) + 101 * alleleIndex + 1009 * readIndex); + } + + + private final Random rnd = GenomeAnalysisEngine.getRandomGenerator(); + + private void testLikelihoodMatrixQueries(final AlleleList alleles, final SampleList samples, + final Map> sampleToReads, ReadLikelihoods result) { + for (final String sample : SampleListUtils.asList(samples)) { + final int sampleIndex = result.sampleIndex(sample); + final ReadLikelihoods.Matrix likelihoodMatrix = result.sampleMatrix(sampleIndex); + final int sampleReadCount = sampleToReads.get(sample).size(); + final List reads = sampleToReads.get(sample); + Assert.assertEquals(likelihoodMatrix.alleleCount(), alleles.alleleCount()); + Assert.assertEquals(likelihoodMatrix.readCount(), sampleReadCount); + for (int a = 0; a < likelihoodMatrix.alleleCount(); a++) { + Assert.assertEquals(likelihoodMatrix.alleleAt(a),alleles.alleleAt(a)); + for (int r = 0; r < sampleReadCount; r++) { + Assert.assertEquals(likelihoodMatrix.readAt(r),reads.get(r)); + Assert.assertEquals(likelihoodMatrix.get(a, r), 0.0); + } + } + } + } + + private void testAlleleQueries(final AlleleList alleles, ReadLikelihoods result) { + final Set alleleIndices = new HashSet<>(); + for (final Allele allele : AlleleListUtils.asList(alleles)) { + final int alleleIndex = result.alleleIndex(allele); + Assert.assertTrue(alleleIndex >= 0); + Assert.assertFalse(alleleIndices.contains(alleleIndex)); + alleleIndices.add(alleleIndex); + Assert.assertSame(allele,alleles.alleleAt(alleleIndex)); + } + } + + private void testSampleQueries(final SampleList samples, Map> reads, + final ReadLikelihoods result) { + final Set sampleIds = new HashSet<>(samples.sampleCount()); + for (final String sample : SampleListUtils.asList(samples)) { + final int sampleIndex = result.sampleIndex(sample); + Assert.assertTrue(sampleIndex >= 0); + Assert.assertFalse(sampleIds.contains(sampleIndex)); + sampleIds.add(sampleIndex); + + final List sampleReads = result.sampleReads(sampleIndex); + final Set sampleReadsSet = new HashSet<>(sampleReads); + final List expectedSampleReadArray = reads.get(sample); + final Set expectedSampleReadsSet = new HashSet<>(expectedSampleReadArray); + Assert.assertEquals(sampleReadsSet,expectedSampleReadsSet); + + final int sampleReadCount = sampleReads.size(); + for (int r = 0; r < sampleReadCount; r++) { + Assert.assertSame(sampleReads.get(r), expectedSampleReadArray.get(r)); + final int readIndex = result.readIndex(sampleIndex, sampleReads.get(r)); + Assert.assertEquals(readIndex,r); + } + } + } + + private AlleleList alleleList(final int alleleCount, final boolean hasReference) { + final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount,100); + if (hasReference) { + final int referenceIndex = rnd.nextInt(alleleCount); + alleles[referenceIndex] = Allele.create(alleles[referenceIndex].getBases(),true); + } + final AlleleList alleleList = new IndexedAlleleList<>(alleles); + if (alleleList.alleleCount() != alleles.length) + throw new SkipException("repeated alleles, should be infrequent"); + return alleleList; + } + + private SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 1000); + final GenomeLocParser locParser = new GenomeLocParser(SAM_HEADER.getSequenceDictionary()); + + + private int[][] READ_COUNTS = new int[][] { + {}, + { 100 }, + { 0 }, + { 0, 0, 0 }, + { 1, 0, 1 }, + { 100, 10 , 100}, + { 1000, 10, 100, 20, 23 } + }; + + private int[] ALLELE_COUNTS = new int[] { 0, 1, 2, 3, 10, 20 }; + + @DataProvider(name="readCountsAndAlleleCountData") + public Object[][] readCountsAndAlleleCountData() { + final Object[][] result = new Object[READ_COUNTS.length * ALLELE_COUNTS.length * 2][]; + int index = 0; + for (final int[] readCounts : READ_COUNTS) + for (final int alleleCount : ALLELE_COUNTS) { + result[index++] = new Object[]{ readCounts, alleleCount, false}; + result[index++] = new Object[]{ readCounts, alleleCount, true}; + } + return result; + } + + @DataProvider(name="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference") + public Object[][] readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference() { + final Object[][] raw = readCountsAndAlleleCountData(); + final List result = new ArrayList<>(raw.length); + for (final Object[] paramSet : raw) + if (!paramSet[2].equals(true) || !paramSet[1].equals(0)) + result.add(paramSet); + return result.toArray(new Object[result.size()][]); + } + + @DataProvider(name="readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference") + public Object[][] readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference() { + final Object[][] raw = readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference(); + final List result = new ArrayList<>(raw.length); + for (final Object[] paramSet : raw) { + final int[] readCounts = (int[]) paramSet[0]; + final long totalReadCount = MathUtils.sum(readCounts); + if (totalReadCount > 0) + result.add(paramSet); + } + return result.toArray(new Object[result.size()][]); + } + + private SampleList sampleList(final int[] readCounts) { + final List samples = new ArrayList<>(readCounts.length); + for (int i = 0; i < readCounts.length; i++) + samples.add("SAMPLE_" + i); + return new IndexedSampleList(samples); + } + } diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java index 1f0280c82..354103eae 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/GVCFWriterUnitTest.java @@ -347,7 +347,7 @@ public class GVCFWriterUnitTest extends BaseTest { @Test public void testHomRefAlt() { - final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, 2); + final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY); writer.add(makeHomRef("20", 1, 0)); writer.add(makeHomRef("20", 2, 0)); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java index 00d5d6984..779cc588c 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/utils/gvcf/HomRefBlockUnitTest.java @@ -51,6 +51,7 @@ import htsjdk.variant.variantcontext.GenotypeBuilder; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.gatk.utils.BaseTest; +import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; @@ -70,7 +71,7 @@ public class HomRefBlockUnitTest extends BaseTest { @Test public void testBasicConstruction() { - final HomRefBlock band = new HomRefBlock(vc, 10, 20, 2); + final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY); Assert.assertSame(band.getStartingVC(), vc); Assert.assertEquals(band.getRef(), vc.getReference()); Assert.assertEquals(band.getGQLowerBound(), 10); @@ -85,8 +86,9 @@ public class HomRefBlockUnitTest extends BaseTest { @Test public void testMinMedian() { //TODO - might be better to make this test use a data provider? - final HomRefBlock band = new HomRefBlock(vc, 10, 20,2); + final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY); final GenotypeBuilder gb = new GenotypeBuilder("NA12878"); + gb.alleles(vc.getAlleles()); int pos = vc.getStart(); band.add(pos++, gb.DP(10).GQ(11).PL(new int[]{0,11,100}).make()); @@ -116,8 +118,9 @@ public class HomRefBlockUnitTest extends BaseTest { @Test public void testBigGQIsCapped() { - final HomRefBlock band = new HomRefBlock(vc, 10, 20,2); + final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY); final GenotypeBuilder gb = new GenotypeBuilder("NA12878"); + gb.alleles(vc.getAlleles()); band.add(vc.getStart(), gb.DP(1000).GQ(1000).PL(new int[]{0,10,100}).make()); assertValues(band, 1000, 1000, 99, 99); @@ -125,7 +128,7 @@ public class HomRefBlockUnitTest extends BaseTest { @Test(expectedExceptions = IllegalArgumentException.class) public void testBadAdd() { - final HomRefBlock band = new HomRefBlock(vc, 10, 20,2); + final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY); final GenotypeBuilder gb = new GenotypeBuilder("NA12878"); band.add(vc.getStart() + 10, gb.DP(10).GQ(11).PL(new int[]{0,10,100}).make()); @@ -155,7 +158,7 @@ public class HomRefBlockUnitTest extends BaseTest { @Test(dataProvider = "ContiguousData") public void testIsContiguous(final String contig, final int pos, final boolean expected) { - final HomRefBlock band = new HomRefBlock(vc, 10, 20,2); + final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY); final VariantContext testVC = new VariantContextBuilder(vc).chr(contig).start(pos).stop(pos).make(); Assert.assertEquals(band.isContiguous(testVC), expected); } diff --git a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java index 553823e33..1dd8a8a1f 100644 --- a/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java +++ b/public/gatk-tools-public/src/main/java/org/broadinstitute/gatk/utils/genotyper/PerReadAlleleLikelihoodMap.java @@ -45,9 +45,12 @@ import java.util.*; */ public class PerReadAlleleLikelihoodMap { /** A set of all of the allele, so we can efficiently determine if an allele is already present */ - private final Set allelesSet = new HashSet<>(); + private final Map allelesSet = new HashMap<>(); /** A list of the unique allele, as an ArrayList so we can call get(i) efficiently */ protected final List alleles = new ArrayList<>(); + + + protected final Map> likelihoodReadMap = new LinkedHashMap<>(); public PerReadAlleleLikelihoodMap() { } @@ -64,6 +67,10 @@ public class PerReadAlleleLikelihoodMap { if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null"); if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))"); + if (!allelesSet.containsKey(a)) { + allelesSet.put(a,alleles.size()); + alleles.add(a); + } Map likelihoodMap = likelihoodReadMap.get(read); if (likelihoodMap == null){ // LinkedHashMap will ensure iterating through alleles will be in consistent order @@ -73,10 +80,7 @@ public class PerReadAlleleLikelihoodMap { likelihoodMap.put(a,likelihood); - if (!allelesSet.contains(a)) { - allelesSet.add(a); - alleles.add(a); - } + } public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) { @@ -198,7 +202,7 @@ public class PerReadAlleleLikelihoodMap { * @return the log10 likelihood that this read matches this allele */ public double getLikelihoodAssociatedWithReadAndAllele(final GATKSAMRecord read, final Allele allele){ - if (!allelesSet.contains(allele) || !likelihoodReadMap.containsKey(read)) + if (!allelesSet.containsKey(allele) || !likelihoodReadMap.containsKey(read)) return 0.0; return likelihoodReadMap.get(read).get(allele); @@ -381,7 +385,7 @@ public class PerReadAlleleLikelihoodMap { * @return a non-null unmodifiable map */ public Set getAllelesSet() { - return Collections.unmodifiableSet(allelesSet); + return Collections.unmodifiableSet(allelesSet.keySet()); } /** diff --git a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/BaseTest.java b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/BaseTest.java index b8d60cf53..16e566230 100644 --- a/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/BaseTest.java +++ b/public/gatk-tools-public/src/test/java/org/broadinstitute/gatk/utils/BaseTest.java @@ -26,20 +26,9 @@ package org.broadinstitute.gatk.utils; import htsjdk.tribble.Tribble; -import htsjdk.tribble.util.TabixUtils; -import org.apache.log4j.AppenderSkeleton; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; -import org.apache.log4j.PatternLayout; -import org.apache.log4j.spi.LoggingEvent; import htsjdk.tribble.readers.LineIterator; import htsjdk.tribble.readers.PositionalBufferedStream; -import org.broadinstitute.gatk.utils.commandline.CommandLineUtils; -import org.broadinstitute.gatk.utils.collections.Pair; -import org.broadinstitute.gatk.utils.crypt.CryptUtils; -import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; -import org.broadinstitute.gatk.utils.io.IOUtils; -import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; +import htsjdk.tribble.util.TabixUtils; import htsjdk.variant.bcf2.BCF2Codec; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; @@ -47,6 +36,17 @@ import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFConstants; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLine; +import org.apache.log4j.AppenderSkeleton; +import org.apache.log4j.Level; +import org.apache.log4j.Logger; +import org.apache.log4j.PatternLayout; +import org.apache.log4j.spi.LoggingEvent; +import org.broadinstitute.gatk.utils.collections.Pair; +import org.broadinstitute.gatk.utils.commandline.CommandLineUtils; +import org.broadinstitute.gatk.utils.crypt.CryptUtils; +import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException; +import org.broadinstitute.gatk.utils.io.IOUtils; +import org.broadinstitute.gatk.utils.variant.GATKVCFUtils; import org.testng.Assert; import org.testng.Reporter; import org.testng.SkipException; @@ -132,6 +132,7 @@ public abstract class BaseTest { protected static final String publicTestDirRoot = publicTestDir.replace(publicTestDirRelative, ""); public static final String keysDataLocation = validationDataLocation + "keys/"; + public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key"; public static final String exampleFASTA = publicTestDir + "exampleFASTA.fasta";