Adds unit-test and integration test for new omniploidy likelihood calculation components

Added md5 to HaplotypeCallerIntegrationTest.testHaplotypeCallerSingleSampleWithDbsnp
This commit is contained in:
Valentin Ruano-Rubio 2014-08-13 14:06:52 -04:00
parent 9ee9da36bb
commit 611b7f25ea
28 changed files with 2855 additions and 53 deletions

View File

@ -89,7 +89,6 @@ public class GenotypeLikelihoodCalculator {
*/
private final GenotypeAlleleCounts[] genotypeAlleleCounts;
/**
* Number of genotypes given this calculator {@link #ploidy} and {@link #alleleCount}.
*/

View File

@ -0,0 +1,171 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.RandomDNA;
import org.testng.Assert;
import org.testng.SkipException;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
/**
* Helper class for those unit-test classes that test on implementations of SampleList.
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
public class AlleleListUnitTester {
private static final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
private static final RandomDNA rndDNA = new RandomDNA(rnd);
/**
* Test that the contents of an allele-list are the ones expected.
* <p/>
* <p>
* This method perform various consistency check involving all the {@link org.broadinstitute.gatk.genotyping.AlleleList} interface methods.
* Therefore calling this method is equivalent to a thorough check of the {@link org.broadinstitute.gatk.genotyping.AlleleList} aspect of
* the {@code actual} argument.
* </p>
*
* @param actual the sample-list to assess.
* @param expected the expected sample-list.
* @throws IllegalArgumentException if {@code expected} is {@code null} or contains
* {@code null}s which is an indication of an bug in the testing code.
* @throws RuntimeException if there is some testing assertion exception which
* is an indication of an actual bug the code that is been tested.
*/
public static <A extends Allele> void assertAlleleList(final AlleleList<A> actual, final List<A> expected) {
if (expected == null)
throw new IllegalArgumentException("the expected list cannot be null");
final Set<A> expectedAlleleSet = new HashSet<>(expected.size());
Assert.assertNotNull(actual);
Assert.assertEquals(actual.alleleCount(), expected.size());
for (int i = 0; i < expected.size(); i++) {
final A expectedAllele = expected.get(i);
if (expectedAllele == null)
throw new IllegalArgumentException("the expected sample cannot be null");
if (expectedAllele.equals(NEVER_USE_ALLELE))
throw new IllegalArgumentException("you cannot use the forbidden sample name");
if (expectedAlleleSet.contains(expected.get(i)))
throw new IllegalArgumentException("repeated allele in the expected list, this is a test bug");
final A actualAllele = actual.alleleAt(i);
Assert.assertNotNull(actualAllele, "allele cannot be null");
Assert.assertFalse(expectedAlleleSet.contains(actualAllele), "repeated allele: " + actualAllele);
Assert.assertEquals(actualAllele, expectedAllele, "wrong allele order; index = " + i);
Assert.assertEquals(actual.alleleIndex(actualAllele), i, "allele index mismatch");
expectedAlleleSet.add(actualAllele);
}
Assert.assertEquals(actual.alleleIndex((A) NEVER_USE_ALLELE), -1);
}
/**
* Save to assume that this allele will never be used.
*/
private static final Allele NEVER_USE_ALLELE = Allele.create(new String("ACTGACTGACTGACTGACTGACTGACTGACTGGTCAGTCAGTCAGTCAGTCAGTCA").getBytes(), false);
/**
* Generate testing alleles.
*
* <p>
* Basically all are random alleles given the maximum allele length.
* </p>
*
* <p>
* So with a low max-allele-length and high allele-count you can force repeats.
* </p>
*
* @param alleleCount number of alleles to generate.
* @param maxAlleleLength the maximum length of the allele in bases.
*
* @throws RuntimeException if {@code alleleCount} is negative or {@code maxAlleleLength} is less than 1.
* @return never {@code null}.
*/
public static Allele[] generateRandomAlleles(final int alleleCount, final int maxAlleleLength) {
if (maxAlleleLength < 1)
throw new IllegalArgumentException("the max allele length cannot be less than 1");
final Allele[] result = new Allele[alleleCount];
for (int i = 0; i < alleleCount; i++) {
final int alleleLength = rnd.nextInt(maxAlleleLength) + 1;
result[i] = Allele.create(rndDNA.nextBases(alleleLength));
}
return result;
}
/**
* Generate testing alleles.
*
* <p>
* Basically all are random alleles given the maximum allele length.
* </p>
*
* <p>
* So with a low max-allele-length and high allele-count you can force repeats.
* </p>
*
* @param alleleCount number of alleles to generate.
* @param maxAlleleLength the maximum length of the allele in bases.
* @param skipIfRepeats throw an test-skip exception {@link SkipException} if the resulting allele-list
* has repeats, thus is size is less than {@code alleleCount}
*
* @throws RuntimeException if {@code alleleCount} is negative or {@code maxAlleleLength} is less than 1.
* @return never {@code null}.
*/
static AlleleList<Allele> alleleList(final int alleleCount, final int maxAlleleLength, final boolean skipIfRepeats) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount,maxAlleleLength);
if (alleleCount > 0)
alleles[0] = Allele.create(alleles[0].getBases(),true);
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles);
if (skipIfRepeats && alleleList.alleleCount() != alleles.length)
throw new SkipException("repeated alleles, should be infrequent");
return alleleList;
}
}

View File

@ -0,0 +1,226 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Test {@link org.broadinstitute.gatk.genotyping.AlleleListUtils}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class AlleleListUtilsUnitTest {
@Test(dataProvider = "singleAlleleListData")
public void testAsList(final List<Allele> alleles1) {
final Allele[] uniqueAlleles = new LinkedHashSet<>(alleles1).toArray(new Allele[0]);
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles1);
final List<Allele> asList = AlleleListUtils.asList(alleleList);
final Allele[] asListArray = asList.toArray(new Allele[asList.size()]);
Assert.assertTrue(Arrays.equals(uniqueAlleles,asListArray));
}
@Test(dataProvider = "singleAlleleListData")
public void testIndexOfReference(final List<Allele> alleles1) {
final Allele[] uniqueAlleles = new LinkedHashSet<>(alleles1).toArray(new Allele[0]);
for (int i = 0; i < uniqueAlleles.length; i++) {
final Allele[] actualAlleles = uniqueAlleles.clone();
actualAlleles[i] = Allele.create(actualAlleles[i].getBases(),true);
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(actualAlleles);
Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),i);
}
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(uniqueAlleles);
Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),-1);
}
@Test(dataProvider = "twoAlleleListData", dependsOnMethods={"testAsList"})
public void testEquals(final List<Allele> alleles1, final List<Allele> alleles2) {
final AlleleList<Allele> alleleList1 = new IndexedAlleleList<Allele>(alleles1);
final AlleleList<Allele> alleleList2 = new IndexedAlleleList<Allele>(alleles2);
Assert.assertTrue(AlleleListUtils.equals(alleleList1,alleleList1));
Assert.assertTrue(AlleleListUtils.equals(alleleList2,alleleList2));
Assert.assertEquals(AlleleListUtils.equals(alleleList1, alleleList2),
Arrays.equals(AlleleListUtils.asList(alleleList1).toArray(new Allele[alleleList1.alleleCount()]),
AlleleListUtils.asList(alleleList2).toArray(new Allele[alleleList2.alleleCount()]))
);
Assert.assertEquals(AlleleListUtils.equals(alleleList1,alleleList2),
AlleleListUtils.equals(alleleList2,alleleList1));
}
@Test(dataProvider = "singleAlleleListData", dependsOnMethods= "testEquals" )
public void testSelfPermutation(final List<Allele> alleles1) {
final AlleleList<Allele> originalAlleleList = new IndexedAlleleList<>(alleles1);
final AlleleListPermutation<Allele> selfPermutation = AlleleListUtils.permutation(originalAlleleList,originalAlleleList);
Assert.assertEquals(selfPermutation.fromSize(),originalAlleleList.alleleCount());
Assert.assertEquals(selfPermutation.toSize(),originalAlleleList.alleleCount());
Assert.assertTrue(selfPermutation.isNonPermuted());
Assert.assertFalse(selfPermutation.isPartial());
for (int i = 0; i < originalAlleleList.alleleCount(); i++) {
Assert.assertEquals(selfPermutation.fromIndex(i), i);
Assert.assertEquals(selfPermutation.toIndex(i),i);
Assert.assertEquals(selfPermutation.fromList(),selfPermutation.toList());
AlleleListUnitTester.assertAlleleList(originalAlleleList, selfPermutation.fromList());
}
Assert.assertTrue(AlleleListUtils.equals(selfPermutation,originalAlleleList));
}
private final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
@Test(dataProvider = "singleAlleleListData", dependsOnMethods = "testEquals")
public void testSubsetPermutation(final List<Allele> alleles1) {
final List<Allele> subsetAlleles = new ArrayList<>(alleles1.size());
for (final Allele allele : alleles1)
if (rnd.nextBoolean()) subsetAlleles.add(allele);
final AlleleList<Allele> originalAlleleList = new IndexedAlleleList<>(alleles1);
final AlleleList<Allele> targetAlleleList = new IndexedAlleleList<>(subsetAlleles);
final AlleleListPermutation<Allele> subset = AlleleListUtils.permutation(originalAlleleList,targetAlleleList);
if (originalAlleleList.alleleCount() == targetAlleleList.alleleCount())
throw new SkipException("no real subset");
Assert.assertTrue(subset.isPartial());
Assert.assertFalse(subset.isNonPermuted());
Assert.assertEquals(subset.fromSize(),originalAlleleList.alleleCount());
Assert.assertEquals(subset.toSize(),targetAlleleList.alleleCount());
AlleleListUnitTester.assertAlleleList(originalAlleleList,subset.fromList());
AlleleListUnitTester.assertAlleleList(targetAlleleList,subset.toList());
for (int i = 0; i < targetAlleleList.alleleCount(); i++)
Assert.assertEquals(subset.fromIndex(i), originalAlleleList.alleleIndex(targetAlleleList.alleleAt(i)));
for (int j = 0; j < originalAlleleList.alleleCount(); j++) {
final Allele allele = originalAlleleList.alleleAt(j);
Assert.assertEquals(subset.toIndex(j),targetAlleleList.alleleIndex(allele));
}
Assert.assertTrue(AlleleListUtils.equals(subset,targetAlleleList));
}
@Test(dataProvider = "singleAlleleListData", dependsOnMethods = {"testAsList","testEquals"})
public void testShufflePermutation(final List<Allele> alleles1) {
final AlleleList<Allele> originalAlleleList = new IndexedAlleleList<>(alleles1);
if (originalAlleleList.alleleCount() <= 1)
throw new SkipException("non-shuffle allele-list");
final Allele[] targetAlleleArray = AlleleListUtils.asList(originalAlleleList).toArray(new Allele[originalAlleleList.alleleCount()]);
final int[] fromIndex = new int[targetAlleleArray.length];
for (int i = 0; i < fromIndex.length; i++)
fromIndex[i] = i;
for (int i = 0; i < targetAlleleArray.length - 1; i++) {
final int swapIndex = rnd.nextInt(targetAlleleArray.length - i - 1);
final int otherIndex = fromIndex[swapIndex + i + 1];
final Allele other = targetAlleleArray[swapIndex + i + 1];
fromIndex[swapIndex + i + 1] = fromIndex[i];
fromIndex[i] = otherIndex;
targetAlleleArray[swapIndex + i + 1] = targetAlleleArray[i];
targetAlleleArray[i] = other;
}
final AlleleList<Allele> targetAlleleList = new IndexedAlleleList<>(targetAlleleArray);
final AlleleListPermutation<Allele> permutation = AlleleListUtils.permutation(originalAlleleList,targetAlleleList);
Assert.assertFalse(permutation.isNonPermuted());
AlleleListUnitTester.assertAlleleList(originalAlleleList,permutation.fromList());
AlleleListUnitTester.assertAlleleList(targetAlleleList,permutation.toList());
Assert.assertFalse(permutation.isPartial());
Assert.assertEquals(permutation.fromSize(),originalAlleleList.alleleCount());
Assert.assertEquals(permutation.toSize(),targetAlleleList.alleleCount());
for (int i = 0; i < permutation.fromSize(); i++) {
Assert.assertEquals(permutation.toIndex(i),targetAlleleList.alleleIndex(originalAlleleList.alleleAt(i)));
Assert.assertEquals(permutation.fromIndex(i),originalAlleleList.alleleIndex(targetAlleleList.alleleAt(i)));
Assert.assertEquals(permutation.fromIndex(i),fromIndex[i]);
}
Assert.assertTrue(AlleleListUtils.equals(permutation,targetAlleleList));
}
private List<Allele>[] alleleLists;
@BeforeClass
public void setUp() {
alleleLists = new List[ALLELE_COUNT.length * MAX_ALLELE_LENGTH.length];
int nextIndex = 0;
for (int i = 0; i < ALLELE_COUNT.length; i++)
for (int j = 0; j < MAX_ALLELE_LENGTH.length; j++)
alleleLists[nextIndex++] = Arrays.asList(AlleleListUnitTester.generateRandomAlleles(ALLELE_COUNT[i], MAX_ALLELE_LENGTH[j]));
}
private static final int[] ALLELE_COUNT = { 0, 1, 5, 10, 20};
private static final int[] MAX_ALLELE_LENGTH = { 1, 2, 3, 10 };
@DataProvider(name="singleAlleleListData")
public Object[][] singleAlleleListData() {
final Object[][] result = new Object[alleleLists.length][];
for (int i = 0; i < alleleLists.length; i++)
result[i] = new Object[] { alleleLists[i]};
return result;
}
@DataProvider(name="twoAlleleListData")
public Object[][] twoAlleleListData() {
final Object[][] result = new Object[alleleLists.length * alleleLists.length][];
int index = 0;
for (int i = 0; i < alleleLists.length; i++)
for (int j = 0; j < alleleLists.length; j++)
result[index++] = new Object[] { alleleLists[i], alleleLists[j]};
return result;
}
}

View File

@ -0,0 +1,328 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Test {@link GenotypeAlleleCounts}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeAlleleCountsUnitTest {
@Test(dataProvider="ploidyData")
public void testFirst(final int ploidy) {
final GenotypeAlleleCounts subject = GenotypeAlleleCounts.first(ploidy);
Assert.assertNotNull(subject);
Assert.assertEquals(subject.ploidy(), ploidy);
Assert.assertEquals(subject.distinctAlleleCount(),1);
Assert.assertEquals(subject.alleleCountAt(0),ploidy);
Assert.assertEquals(subject.alleleCountFor(0),ploidy);
Assert.assertEquals(subject.alleleRankFor(0),0);
Assert.assertEquals(subject.alleleRankFor(1),-2);
Assert.assertTrue(subject.containsAllele(0));
Assert.assertFalse(subject.containsAllele(1));
Assert.assertEquals(subject.alleleIndexAt(0),0);
Assert.assertEquals(subject.maximumAlleleIndex(),0);
Assert.assertEquals(subject.minimumAlleleIndex(),0);
Assert.assertTrue(subject.compareTo(subject) == 0);
Assert.assertTrue(subject.equals(subject));
Assert.assertEquals(subject.index(),0);
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
expected[0] = ploidy;
Assert.assertEquals(subject.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
@Test(dataProvider = "ploidyData",dependsOnMethods = "testFirst")
public void testNext(final int ploidy) {
if (ploidy == 0)
testNextZeroPloidy();
else if (ploidy == 1)
testNextOnePloidy();
else
testPloidyTwoOrMore(ploidy);
}
@Test(dataProvider = "ploidyData",dependsOnMethods = "testNext")
public void testIncrease(final int ploidy) {
if (ploidy == 0)
testNextZeroPloidyIncrease();
else if (ploidy == 1)
testNextOnePloidyIncrease();
else
testPloidyTwoOrMoreIncrease(ploidy);
}
private void testNextZeroPloidy() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(0);
final GenotypeAlleleCounts next = first.next();
Assert.assertEquals(first,next);
Assert.assertEquals(first.compareTo(next),0);
Assert.assertEquals(next.compareTo(first), 0);
Assert.assertEquals(next.distinctAlleleCount(),0);
Assert.assertEquals(next.ploidy(),0);
Assert.assertEquals(next.index(),0);
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
private void testNextOnePloidy() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(1);
GenotypeAlleleCounts current = first;
while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts next = current.next();
Assert.assertEquals(next.minimumAlleleIndex(),next.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.minimumAlleleIndex() + 1);
Assert.assertEquals(next.alleleCountAt(0),1);
Assert.assertEquals(next.alleleIndexAt(0),next.minimumAlleleIndex());
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex()),0);
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex() + 1),-2);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()),1);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()+1),0);
Assert.assertEquals(next.ploidy(),1);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(), current.index() + 1);
Assert.assertEquals(next.ploidy(),current.ploidy());
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) expected[current.minimumAlleleIndex() + 1] = 1;
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
current = next;
}
}
private void testPloidyTwoOrMore(final int ploidy) {
if (ploidy < 2)
throw new IllegalArgumentException();
GenotypeAlleleCounts current = GenotypeAlleleCounts.first(ploidy);
while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts next = current.next();
if (current.distinctAlleleCount() == 1) {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1);
Assert.assertEquals(next.distinctAlleleCount(), 2 );
Assert.assertEquals(next.minimumAlleleIndex(), 0 );
} else {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.alleleCountAt(0) > 1 ? 0
: current.alleleCountAt(0) == 1 ? current.minimumAlleleIndex() + 1 : current.minimumAlleleIndex());
}
// Checking on 0's new count and current.minAllele + 1 alleles.
Assert.assertEquals(next.alleleCountFor(0),current.alleleCountFor(current.minimumAlleleIndex()) - 1);
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex() + 1),
current.alleleCountFor(current.minimumAlleleIndex() + 1) + 1);
// Checks current.minAllele count
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex()),
current.minimumAlleleIndex() == 0 ? current.alleleCountAt(0) - 1 : 0);
int totalCountSum = 0;
final int[] expectedAlleleCountsByIndex = new int[Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex()) + 1];
for (int i = 0; i < next.distinctAlleleCount(); i++) {
final int count = next.alleleCountAt(i);
final int index = next.alleleIndexAt(i);
expectedAlleleCountsByIndex[index] = count;
// Check consistency of alleleCountAt(x) and alleleCountFor(alleleIndexAt(x))
Assert.assertEquals(next.alleleCountFor(index),count);
totalCountSum += count;
// Check on counts of, in theory, unaffected allele counts.
if (index > current.minimumAlleleIndex() + 1)
Assert.assertEquals(next.alleleCountFor(index),current.alleleCountFor(index));
}
Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex())),expectedAlleleCountsByIndex));
Assert.assertEquals(totalCountSum,ploidy);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(),current.index() + 1);
Assert.assertEquals(next.ploidy(),ploidy);
current = next;
}
}
private void testNextZeroPloidyIncrease() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(0);
final GenotypeAlleleCounts next = first.clone();
next.increase();
Assert.assertEquals(first,next);
Assert.assertEquals(first.compareTo(next),0);
Assert.assertEquals(next.compareTo(first), 0);
Assert.assertEquals(next.distinctAlleleCount(),0);
Assert.assertEquals(next.ploidy(),0);
Assert.assertEquals(next.index(),0);
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
private void testNextOnePloidyIncrease() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(1);
GenotypeAlleleCounts next = first;
while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts current = next.clone();
next.increase();
Assert.assertEquals(next.minimumAlleleIndex(),next.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.minimumAlleleIndex() + 1);
Assert.assertEquals(next.alleleCountAt(0),1);
Assert.assertEquals(next.alleleIndexAt(0),next.minimumAlleleIndex());
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex()),0);
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex() + 1),-2);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()),1);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()+1),0);
Assert.assertEquals(next.ploidy(),1);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(), current.index() + 1);
Assert.assertEquals(next.ploidy(),current.ploidy());
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) expected[current.minimumAlleleIndex() + 1] = 1;
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
}
private void testPloidyTwoOrMoreIncrease(final int ploidy) {
if (ploidy < 2)
throw new IllegalArgumentException();
GenotypeAlleleCounts next = GenotypeAlleleCounts.first(ploidy);
while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts current = next.clone();
next.increase();
if (current.distinctAlleleCount() == 1) {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1);
Assert.assertEquals(next.distinctAlleleCount(), 2 );
Assert.assertEquals(next.minimumAlleleIndex(), 0 );
} else {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.alleleCountAt(0) > 1 ? 0
: current.alleleCountAt(0) == 1 ? current.minimumAlleleIndex() + 1 : current.minimumAlleleIndex());
}
// Checking on 0's new count and current.minAllele + 1 alleles.
Assert.assertEquals(next.alleleCountFor(0),current.alleleCountFor(current.minimumAlleleIndex()) - 1);
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex() + 1),
current.alleleCountFor(current.minimumAlleleIndex() + 1) + 1);
// Checks current.minAllele count
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex()),
current.minimumAlleleIndex() == 0 ? current.alleleCountAt(0) - 1 : 0);
int totalCountSum = 0;
final int[] expectedAlleleCountsByIndex = new int[Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex()) + 1];
for (int i = 0; i < next.distinctAlleleCount(); i++) {
final int count = next.alleleCountAt(i);
final int index = next.alleleIndexAt(i);
expectedAlleleCountsByIndex[index] = count;
// Check consistency of alleleCountAt(x) and alleleCountFor(alleleIndexAt(x))
Assert.assertEquals(next.alleleCountFor(index),count);
totalCountSum += count;
// Check on counts of, in theory, unaffected allele counts.
if (index > current.minimumAlleleIndex() + 1)
Assert.assertEquals(next.alleleCountFor(index),current.alleleCountFor(index));
}
Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex())),expectedAlleleCountsByIndex));
Assert.assertEquals(totalCountSum,ploidy);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(),current.index() + 1);
Assert.assertEquals(next.ploidy(),ploidy);
}
}
private static final int MAXIMUM_ALLELE_INDEX = 10;
private static final int[] PLOIDY = new int[] { 1, 2, 3, 7, 10};
@DataProvider(name="ploidyData")
public Object[][] ploidyData() {
final Object[][] result = new Object[PLOIDY.length][];
for (int i = 0; i < PLOIDY.length; i++)
result[i] = new Object[] { PLOIDY[i ]};
return result;
}
}

View File

@ -0,0 +1,172 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Tests {@link GenotypeLikelihoodCalculators} and {@link GenotypeLikelihoodCalculator}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeLikelihoodCalculatorUnitTest {
@Test(dataProvider = "ploidyAndMaximumAlleleData")
public void testPloidyAndMaximumAllele(final int ploidy, final int alleleCount) {
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, alleleCount);
Assert.assertNotNull(calculator);
Assert.assertEquals(calculator.ploidy(),ploidy);
Assert.assertEquals(calculator.alleleCount(), alleleCount);
Assert.assertEquals(calculator.genotypeCount(),calculateGenotypeCount(ploidy, alleleCount)," ploidy = " + ploidy + " alleleCount = " + alleleCount);
final int genotypeCount = calculator.genotypeCount();
final int testGenotypeCount = Math.min(30000,genotypeCount);
for (int i = 0; i < testGenotypeCount; i++) {
final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(i);
Assert.assertNotNull(alleleCounts);
if (i > 0)
Assert.assertTrue(calculator.genotypeAlleleCountsAt(i - 1).compareTo(alleleCounts) < 0);
final int[] alleleArray = new int[ploidy];
int index = 0;
for (int j = 0; j < alleleCounts.distinctAlleleCount(); j++)
Arrays.fill(alleleArray, index, index += alleleCounts.alleleCountAt(j), alleleCounts.alleleIndexAt(j));
final int[] alleleCountArray = new int[alleleCounts.distinctAlleleCount() << 1];
alleleCounts.copyAlleleCounts(alleleCountArray,0);
Assert.assertEquals(index,ploidy);
Assert.assertEquals(calculator.allelesToIndex(alleleArray),i);
Assert.assertEquals(calculator.alleleCountsToIndex(alleleCountArray),i);
}
}
@Test(dataProvider = "ploidyAndMaximumAlleleAndReadCountsData", dependsOnMethods = "testPloidyAndMaximumAllele")
public void testLikelihoodCalculation(final int ploidy, final int alleleCount, final int[] readCount) {
final ReadLikelihoods<Allele> readLikelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount,readCount);
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, alleleCount);
final int genotypeCount = calculator.genotypeCount();
final int testGenotypeCount = Math.min(30000,genotypeCount);
final int sampleCount = readCount.length;
for (int s = 0; s < sampleCount ; s++) {
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = readLikelihoods.sampleMatrix(s);
final GenotypeLikelihoods genotypeLikelihoods = calculator.genotypeLikelihoods(sampleLikelihoods);
final double[] genotypeLikelihoodsDoubles = genotypeLikelihoods.getAsVector();
Assert.assertEquals(genotypeLikelihoodsDoubles.length,genotypeCount);
for (int i = 0; i < testGenotypeCount; i++) {
final GenotypeAlleleCounts genotypeAlleleCounts = calculator.genotypeAlleleCountsAt(i);
Assert.assertNotNull(genotypeLikelihoods);
final double[] readGenotypeLikelihoods = new double[sampleLikelihoods.readCount()];
for (int r = 0; r < sampleLikelihoods.readCount(); r++) {
final double[] compoments = new double[genotypeAlleleCounts.distinctAlleleCount()];
for (int ar = 0; ar < genotypeAlleleCounts.distinctAlleleCount(); ar++) {
final int a = genotypeAlleleCounts.alleleIndexAt(ar);
final int aCount = genotypeAlleleCounts.alleleCountAt(ar);
final double readLk = sampleLikelihoods.get(a, r);
compoments[ar] = readLk + Math.log10(aCount);
}
readGenotypeLikelihoods[r] = MathUtils.approximateLog10SumLog10(compoments) - Math.log10(ploidy);
}
final double genotypeLikelihood = MathUtils.sum(readGenotypeLikelihoods);
Assert.assertEquals(genotypeLikelihoodsDoubles[i], genotypeLikelihood, 0.0001);
}
}
}
// Simple inefficient calculation of the genotype count given the ploidy.
private int calculateGenotypeCount(final int ploidy, final int alleleCount) {
if (ploidy == 0)
return 0;
else if (ploidy == 1)
return alleleCount;
else if (ploidy == 2)
return ((alleleCount) * (alleleCount + 1)) >> 1;
else if (alleleCount == 0)
return 0;
else {
return calculateGenotypeCount(ploidy - 1, alleleCount) +
calculateGenotypeCount(ploidy, alleleCount - 1);
}
}
private static final int[] MAXIMUM_ALLELE = new int[] { 1, 2, 5, 6 };
private static final int[] PLOIDY = new int[] { 1, 2, 3, 20 };
private static final int[][] READ_COUNTS = new int[][] {
{ 10 , 100, 50 },
{ 0, 100, 10, 1 , 50 },
{ 1, 2, 3, 4, 20 },
{ 10, 0 },
};
@DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData")
public Object[][] ploidyAndMaximumAlleleAndReadCountsData() {
final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length * READ_COUNTS.length][];
int index = 0;
for (final int i : PLOIDY)
for (final int j : MAXIMUM_ALLELE)
for (final int[] k : READ_COUNTS)
result[index++] = new Object[] { i, j, k };
return result;
}
@DataProvider(name="ploidyAndMaximumAlleleData")
public Object[][] ploidyAndMaximumAlleleData() {
final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length][];
int index = 0;
for (final int i : PLOIDY)
for (final int j : MAXIMUM_ALLELE)
result[index++] = new Object[] { i, j };
return result;
}
}

View File

@ -0,0 +1,103 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
/**
* Test {@link org.broadinstitute.gatk.genotyping.InfiniteRandomMatingPopulationModel}
*/
public class GenotypingDataUnitTest {
@Test(dataProvider="ploidyAndMaximumAlleleAndReadCountsData")
public void testInstantiation(final int[] ploidies, final int[] readCounts) {
final ReadLikelihoods<Allele> likelihoods = ReadLikelihoodsUnitTester.readLikelihoods(2,readCounts);
final SampleList sampleList = likelihoods;
final PloidyModel ploidyModel = new HeterogeneousPloidyModel(sampleList,ploidies);
final GenotypingData<Allele> data = new GenotypingData<>(ploidyModel,likelihoods);
Assert.assertTrue(AlleleListUtils.equals(data,likelihoods));
Assert.assertTrue(SampleListUtils.equals(data,likelihoods));
Assert.assertEquals(data.readLikelihoods(),likelihoods);
Assert.assertEquals(data.ploidyModel(),ploidyModel);
}
private static final int[][] PLOIDIES = new int[][]{
{1, 1, 1, 1},
{1, 2, 3, 4},
{2, 2, 2, 2},
{2, 1, 2, 1},
{1},
{2},
{},
};
private static final int[][] READ_COUNTS = new int[][] {
{ 10 , 100, 50, 20 },
{ 0, 100, 10, 1 },
{ 1, 2, 3, 4 },
{ 10, 20, 50, 40 },
{ 10 },
{ 20 },
{ }
};
@DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData")
public Object[][] ploidyAndMaximumAlleleAndReadCountsData() {
final List<Object[]> result = new ArrayList<>(PLOIDIES.length * 2);
for (int i = 0; i < PLOIDIES.length; i++)
result.add(new Object[] {PLOIDIES[i], READ_COUNTS[i]});
return result.toArray(new Object[0][]);
}
}

View File

@ -0,0 +1,119 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
/**
* General heterogeneous ploidy model.
*
* <p>
* Currenly only avaialable for testing but will be promoted at some point and have its own unit test.
* </p>
*/
public class HeterogeneousPloidyModel implements PloidyModel {
private final SampleList sampleList;
private final int[] ploidies;
private final int ploidySum;
private final boolean isHomogeneous;
public HeterogeneousPloidyModel(final SampleList sampleList, final int[] ploidies) {
if (sampleList == null)
throw new IllegalArgumentException("the sample list cannot be null");
if (ploidies == null)
throw new IllegalArgumentException("the ploidies cannot be null");
if (sampleList.sampleCount() != ploidies.length)
throw new IllegalArgumentException("sample-list and ploidy array length must match");
this.ploidies = ploidies.clone();
int ploidySum = 0;
for (int i = 0; i < ploidies.length; i++) {
final int p = this.ploidies[i];
if (p < 0)
throw new IllegalArgumentException("no ploidy can be less than 0");
ploidySum += p;
}
this.ploidySum = ploidySum;
isHomogeneous = ploidies.length == 0 || ploidies.length * this.ploidies[0] == ploidySum;
this.sampleList = sampleList;
}
@Override
public int samplePloidy(final int sampleIndex) {
if (sampleIndex < 0 || sampleIndex > ploidies.length)
throw new IllegalArgumentException("invalid sample index: " + sampleIndex);
return ploidies[sampleIndex];
}
@Override
public boolean isHomogeneous() {
return isHomogeneous;
}
@Override
public int totalPloidy() {
return ploidySum;
}
@Override
public int sampleCount() {
return ploidies.length;
}
@Override
public int sampleIndex(final String sample) {
return sampleList.sampleIndex(sample);
}
@Override
public String sampleAt(int sampleIndex) {
return sampleList.sampleAt(sampleIndex);
}
}

View File

@ -0,0 +1,92 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
/**
* Tests {@link HomogeneousPloidyModel}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class HomogeneousPloidyModelUnitTest {
private static final int[] PLOIDY = new int[] { 1, 2, 3, 7, 10};
private static final int[] SAMPLE_COUNT = new int[] { 0, 1, 3, 4, 5, 6, 10, 101};
@Test(dataProvider = "ploidyAndSampleListData")
public void testPloidyAndSampleList(final int ploidy, final int sampleCount) {
final List<String> sampleNames = new ArrayList<>(sampleCount);
for (int i = 0; i < sampleCount; i++)
sampleNames.add("SAMPLE_" + i);
final IndexedSampleList sampleList = new IndexedSampleList(sampleNames);
final HomogeneousPloidyModel ploidyModel = new HomogeneousPloidyModel(sampleList,ploidy);
Assert.assertTrue(ploidyModel.isHomogeneous());
Assert.assertEquals(ploidyModel.totalPloidy(),sampleCount * ploidy);
for (int i = 0; i < sampleCount; i++)
Assert.assertEquals(ploidyModel.samplePloidy(i),ploidy);
SampleListUnitTester.assertSampleList(ploidyModel,sampleNames);
}
@DataProvider(name="ploidyAndSampleListData")
public Object[][] ploidyAndSampleListData() {
final Object[][] result = new Object[PLOIDY.length * SAMPLE_COUNT.length][];
int index = 0;
for (int i = 0; i < PLOIDY.length; i++)
for (int j = 0; j < SAMPLE_COUNT.length; j++ )
result[index++] = new Object[] { PLOIDY[i], SAMPLE_COUNT[j]};
return result;
}
}

View File

@ -0,0 +1,102 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import static org.broadinstitute.gatk.genotyping.AlleleListUnitTester.assertAlleleList;
/**
* Tests {@link org.broadinstitute.gatk.genotyping.IndexedSampleList}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedAlleleListUnitTest {
@Test
public void testEmptyConstructor() {
final IndexedAlleleList<Allele> subject = new IndexedAlleleList<>();
assertAlleleList(subject, Collections.EMPTY_LIST);
}
@Test(dataProvider= "alleleCountMaxAlleleLengthData")
public void testArrayConstructor(final int alleleCount, final int maxAlleleLength) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount, maxAlleleLength);
final LinkedHashSet<Allele> nonRepeatedAlleles = new LinkedHashSet<>(Arrays.asList(alleles));
final IndexedAlleleList<Allele> subject = new IndexedAlleleList<>(alleles);
assertAlleleList(subject, Arrays.asList(nonRepeatedAlleles.toArray(new Allele[nonRepeatedAlleles.size()])));
}
@Test(dataProvider= "alleleCountMaxAlleleLengthData")
public void testCollectionConstructor(final int alleleCount, final int maxAlleleLength) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount, maxAlleleLength);
final List<Allele> alleleList = Arrays.asList(alleles);
final LinkedHashSet<Allele> nonRepeatedAlleles = new LinkedHashSet<>(Arrays.asList(alleles));
final IndexedAlleleList<Allele> subject = new IndexedAlleleList<>(alleleList);
assertAlleleList(subject, Arrays.asList(nonRepeatedAlleles.toArray(new Allele[nonRepeatedAlleles.size()])));
}
private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20};
private static final int[] MAX_ALLELE_LENGTH = { 1, 2, 3, 10 };
@DataProvider(name="alleleCountMaxAlleleLengthData")
public Object[][] alleleCountMaxAlleleLengthData() {
final Object[][] result = new Object[SAMPLE_COUNT.length * MAX_ALLELE_LENGTH.length][];
int nextIndex = 0;
for (int i = 0; i < SAMPLE_COUNT.length; i++)
for (int j = 0; j < MAX_ALLELE_LENGTH.length; j++)
result[nextIndex++] = new Object[] { SAMPLE_COUNT[i], MAX_ALLELE_LENGTH[j]};
return result;
}
}

View File

@ -0,0 +1,133 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import static org.broadinstitute.gatk.genotyping.SampleListUnitTester.assertSampleList;
/**
* Tests {@link IndexedSampleList}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedSampleListUnitTest {
@Test
public void testEmptyConstructor() {
final IndexedSampleList subject = new IndexedSampleList();
assertSampleList(subject, Collections.EMPTY_LIST);
}
@Test(dataProvider="sampleCountMaxSampleIndexData")
public void testArrayConstructor(final int sampleCount, final int maxSampleIndex) {
final String[] sampleNames = generateSampleNames(sampleCount,maxSampleIndex);
final LinkedHashSet<String> nonRepeatedNames = new LinkedHashSet<>(Arrays.asList(sampleNames));
final IndexedSampleList subject = new IndexedSampleList(sampleNames);
assertSampleList(subject, Arrays.asList(nonRepeatedNames.toArray(new String[nonRepeatedNames.size()])));
}
@Test(dataProvider="sampleCountMaxSampleIndexData")
public void testCollectionConstructor(final int sampleCount, final int maxSampleIndex) {
final String[] sampleNames = generateSampleNames(sampleCount,maxSampleIndex);
final List<String> sampleNameList = Arrays.asList(sampleNames);
final LinkedHashSet<String> nonRepeatedNames = new LinkedHashSet<>(Arrays.asList(sampleNames));
final IndexedSampleList subject = new IndexedSampleList(sampleNameList);
assertSampleList(subject, Arrays.asList(nonRepeatedNames.toArray(new String[nonRepeatedNames.size()])));
}
/**
* Generate testing sample names.
*
* <p>
* Basically all have a common prefix "SAMPLE_" followed by a numeric index.
* </p>
*
* <p>
* With {@code maxSampleIndex} you can force to have some repeated sample names;
* (if {@code sampleCount < maxSampleIndex}.
* </p>
*
* @param sampleCount number of sample names to generate.
* @param maxSampleIndex the maximum sample numeric index.
*
* @throws RuntimeException if {@code sampleCount} or {@code maxSampleIndex} are negative.
* @return never {@code null}.
*/
private String[] generateSampleNames(final int sampleCount, final int maxSampleIndex) {
final String[] result = new String[sampleCount];
for (int i = 0; i < sampleCount; i++)
result[i] = "SAMPLE_" + rnd.nextInt(maxSampleIndex + 1);
return result;
}
private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20};
private static final int[] MAX_SAMPLE_INDEX = { 0, 1, 4, 9, 10000};
private static final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
@DataProvider(name="sampleCountMaxSampleIndexData")
public Object[][] sampleCountMaxSampleIndexData() {
final Object[][] result = new Object[SAMPLE_COUNT.length * MAX_SAMPLE_INDEX.length][];
int nextIndex = 0;
for (int i = 0; i < SAMPLE_COUNT.length; i++)
for (int j = 0; j < MAX_SAMPLE_INDEX.length; j++)
result[nextIndex++] = new Object[] { SAMPLE_COUNT[i], MAX_SAMPLE_INDEX[j]};
return result;
}
}

View File

@ -0,0 +1,145 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* Test {@link InfiniteRandomMatingPopulationModel}
*/
public class InfiniteRandomMatingPopulationModelUnitTest {
@Test(dataProvider="ploidyAndMaximumAlleleAndReadCountsData")
public void testCalculateLikelihoods(final int[] ploidies, final int alleleCount, final int discardAlleleCount, final int[] readCounts) {
final ReadLikelihoods<Allele> likelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount,readCounts);
final AlleleList<Allele> genotypingAlleleList = discardAlleleCount == 0 ? likelihoods : discardAllelesAtRandom(likelihoods,discardAlleleCount);
final SampleList sampleList = SampleListUnitTester.sampleList(ploidies.length);
final PloidyModel ploidyModel = new HeterogeneousPloidyModel(sampleList,ploidies);
final GenotypingData<Allele> data = new GenotypingData<>(ploidyModel,likelihoods);
final InfiniteRandomMatingPopulationModel model = new InfiniteRandomMatingPopulationModel();
final GenotypingLikelihoods<Allele> gLikelihoods = model.calculateLikelihoods(genotypingAlleleList,data);
Assert.assertNotNull(gLikelihoods);
AlleleListUnitTester.assertAlleleList(gLikelihoods, AlleleListUtils.asList(genotypingAlleleList));
SampleListUnitTester.assertSampleList(gLikelihoods,SampleListUtils.asList(sampleList));
final int sampleCount = gLikelihoods.sampleCount();
for (int i = 0; i < sampleCount; i++) {
final GenotypeLikelihoods sampleLikelihoods = gLikelihoods.sampleLikelihoods(i);
Assert.assertNotNull(sampleLikelihoods);
final double[] values = sampleLikelihoods.getAsVector();
Assert.assertNotNull(values);
Assert.assertEquals(values.length, GenotypeLikelihoodCalculators.getInstance(ploidies[i], genotypingAlleleList.alleleCount()).genotypeCount());
for (int j = 0; j < values.length; j++)
Assert.assertTrue(values[j] <= 0);
}
}
private AlleleList<Allele> discardAllelesAtRandom(final AlleleList<Allele> likelihoods, final int discardAlleleCount) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final ArrayList<Allele> subset = new ArrayList<>(AlleleListUtils.asList(likelihoods));
for (int i = 0; i < discardAlleleCount; i++) {
subset.remove(rnd.nextInt(subset.size()));
}
return new IndexedAlleleList<>(subset);
}
/**
* Each entry contains to value, where the first is the total number of alleles and the second
* The number to discard some arbitrary number of alleles for genotyping for the {@link #testCalculateLikelihoods}.
*/
private static final int[][] ALLELE_COUNTS = new int[][] {
{1, 0},
{2, 1},
{5, 2},
{10, 4},
{1, 0},
{2, 1},
{10, 7}
};
private static final int[][] PLOIDIES = new int[][]{
{1, 1, 1, 1},
{1, 2, 3, 4},
{2, 2, 2, 2},
{2, 1, 2, 1},
{1},
{2},
{},
};
private static final int[][] READ_COUNTS = new int[][] {
{ 10 , 100, 50, 20 },
{ 0, 100, 10, 1 },
{ 1, 2, 3, 4 },
{ 10, 20, 50, 40 },
{ 10 },
{ 20 },
{ }
};
@DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData")
public Object[][] ploidyAndMaximumAlleleAndReadCountsData() {
final List<Object[]> result = new ArrayList<>(PLOIDIES.length * 2);
for (int i = 0; i < PLOIDIES.length; i++) {
result.add(new Object[] {PLOIDIES[i], ALLELE_COUNTS[i][0], 0, READ_COUNTS[i]});
final int discardAlleleCount = ALLELE_COUNTS[i][1];
if (discardAlleleCount == 0) continue;
result.add(new Object[] { PLOIDIES[i], ALLELE_COUNTS[i][0], ALLELE_COUNTS[i][1], READ_COUNTS[i]});
}
return result.toArray(new Object[0][]);
}
}

View File

@ -0,0 +1,124 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Constains utilities for tests that need to create read-likelihoods.
*/
public class ReadLikelihoodsUnitTester {
static ReadLikelihoods<Allele> readLikelihoods(final int alleleCount, final int[] readCount) {
final int sampleCount = readCount.length;
final AlleleList<Allele> alleleList = AlleleListUnitTester.alleleList(alleleCount,100,true);
final SampleList sampleList = SampleListUnitTester.sampleList(sampleCount);
final Map<String,List<GATKSAMRecord>> sampleToReads = new HashMap<>(sampleCount);
for (int i = 0; i < sampleCount; i++) {
sampleToReads.put(sampleList.sampleAt(i),readList(i,readCount[i]));
}
final ReadLikelihoods<Allele> likelihoods = new ReadLikelihoods<>(sampleList,alleleList, sampleToReads);
for (int s = 0; s < sampleCount; s++) {
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = likelihoods.sampleMatrix(s);
for (int a = 0; a < alleleCount; a++)
for (int r = 0; r < readCount[s]; r++)
sampleLikelihoods.set(a, r, testLikelihood(s, a, r));
}
return likelihoods;
}
/**
* produces a test likelihood depending on the sample, read and allele index.
*/
private static double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) {
return - Math.abs(3 * (sampleIndex + 1) + 7 * (alleleIndex + 1) + 11 * (readIndex + 1));
}
private static SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 1000);
static List<GATKSAMRecord> readList(final int sampleIndex, final int readCount) {
final List<GATKSAMRecord> reads = new ArrayList<>(readCount);
int readIndex = 0;
for (int j = 0; j < readCount; j++)
reads.add(ArtificialSAMUtils.createArtificialRead(SAM_HEADER, "READ_" + sampleIndex + "_" + (readIndex++), 1, 1, 100));
return reads;
}
/**
* Creates a sampleToReads map given the sample list and the required read counts.
* @param sampleList the target sample-list.
* @param readCounts the target read-counts.
* @return never {@code null}.
*/
public static Map<String,List<GATKSAMRecord>> sampleToReads(final SampleList sampleList, final int[] readCounts) {
final Map<String,List<GATKSAMRecord>> result = new HashMap<>(sampleList.sampleCount());
int readIndex = 0;
for (int i = 0; i < sampleList.sampleCount(); i++) {
final int readCount = readCounts[i];
final String sample = sampleList.sampleAt(i);
final List<GATKSAMRecord> records = new ArrayList<>(readCount);
for (int j = 0; j < readCount; j++)
records.add(ArtificialSAMUtils.createArtificialRead(SAM_HEADER,"READ_" + (readIndex++),1,1,100));
result.put(sample,records);
}
return result;
}
}

View File

@ -0,0 +1,120 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import org.testng.Assert;
import java.util.*;
/**
* Helper class for those unit-test classes that test on implementations of SampleList.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class SampleListUnitTester {
/**
* Test that the contents of a sample-list are the ones expected.
*
* <p>
* This method perform various consistency check involving all the {@link SampleList} interface methods.
* Therefore calling this method is equivalent to a thorough check of the {@link SampleList} aspect of
* the {@code actual} argument.
* </p>
*
* @param actual the sample-list to assess.
* @param expected the expected sample-list.
*
* @throws IllegalArgumentException if {@code expected} is {@code null} or contains
* {@code null}s which is an indication of an bug in the testing code.
*
* @throws java.lang.RuntimeException if there is some testing assertion exception which
* is an indication of an actual bug the code that is been tested.
*/
public static void assertSampleList(final SampleList actual, final List<String> expected) {
if (expected == null)
throw new IllegalArgumentException("the expected list cannot be null");
final Set<String> expectedNames = new HashSet<>(expected.size());
Assert.assertNotNull(actual);
Assert.assertEquals(actual.sampleCount(),expected.size());
for (int i = 0; i < expected.size(); i++) {
final String expectedSample = expected.get(i);
if (expectedSample == null)
throw new IllegalArgumentException("the expected sample cannot be null");
if (expectedSample.equals(NEVER_USE_SAMPLE_NAME))
throw new IllegalArgumentException("you cannot use the forbidden sample name");
if (expectedNames.contains(expected.get(i)))
throw new IllegalArgumentException("repeated names in the expected list, this is a test bug");
final String actualSample = actual.sampleAt(i);
Assert.assertNotNull(actualSample,"sample name cannot be null");
Assert.assertFalse(expectedNames.contains(actualSample),"repeated sample name: " + actualSample);
Assert.assertEquals(actualSample,expectedSample,"wrong sample name order; index = " + i);
Assert.assertEquals(actual.sampleIndex(actualSample),i,"sample index mismatch");
expectedNames.add(actualSample);
}
Assert.assertEquals(actual.sampleIndex(NEVER_USE_SAMPLE_NAME),-1);
}
/**
* Creates a sample list for testing given the number of samples in it.
* @param sampleCount the required sample count.
* @return never {@code null}.
*/
static SampleList sampleList(final int sampleCount) {
if (sampleCount < 0)
throw new IllegalArgumentException("the number of sample cannot be negative");
final List<String> result = new ArrayList<>(sampleCount);
for (int i =0; i < sampleCount; i++)
result.add("SAMPLE_" + i);
return new IndexedSampleList(result);
}
/**
* Save to assume that this sample name will never be used.
*/
private static final String NEVER_USE_SAMPLE_NAME = "WHY_WOULD_YOU_CALL_A_SAMPLE_LIKE_THIS? ArE yOu Crazzzzy? " + new Date().toString();
}

View File

@ -0,0 +1,126 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.genotyping;
import htsjdk.variant.variantcontext.Allele;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Test {@link AlleleListUtils}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class SampleListUtilsUnitTest {
@Test(dataProvider = "singleSampleListData")
public void testAsList(final List<String> samples) {
final SampleList sampleList = new IndexedSampleList(samples);
final List<String> asList = SampleListUtils.asList(sampleList);
Assert.assertEquals(samples, asList);
}
@Test(dataProvider = "twoSampleListData", dependsOnMethods={"testAsList"})
public void testEquals(final List<String> sample2, final List<String> samples2) {
final SampleList sampleList1 = new IndexedSampleList(sample2);
final SampleList sampleList2 = new IndexedSampleList(samples2);
Assert.assertTrue(SampleListUtils.equals(sampleList1, sampleList1));
Assert.assertTrue(SampleListUtils.equals(sampleList2,sampleList2));
Assert.assertEquals(SampleListUtils.equals(sampleList1, sampleList2),
Arrays.equals(SampleListUtils.asList(sampleList1).toArray(new String[sampleList1.sampleCount()]),
SampleListUtils.asList(sampleList2).toArray(new String[sampleList2.sampleCount()]))
);
Assert.assertEquals(SampleListUtils.equals(sampleList1,sampleList2),
SampleListUtils.equals(sampleList2,sampleList1));
}
private List<String>[] sampleLists;
@BeforeClass
public void setUp() {
sampleLists = new List[SAMPLE_COUNT.length];
int nextIndex = 0;
for (int i = 0; i < SAMPLE_COUNT.length; i++) {
final List<String> sampleList = new ArrayList<>(SAMPLE_COUNT[i]);
sampleList.add("SAMPLE_" + i);
sampleLists[nextIndex++] = sampleList;
}
}
private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20};
@DataProvider(name="singleSampleListData")
public Object[][] singleSampleListData() {
final Object[][] result = new Object[sampleLists.length][];
for (int i = 0; i < sampleLists.length; i++)
result[i] = new Object[] { sampleLists[i]};
return result;
}
@DataProvider(name="twoSampleListData")
public Object[][] twoAlleleListData() {
final Object[][] result = new Object[sampleLists.length * sampleLists.length][];
int index = 0;
for (int i = 0; i < sampleLists.length; i++)
for (int j = 0; j < sampleLists.length; j++)
result[index++] = new Object[] { sampleLists[i], sampleLists[j]};
return result;
}
}

View File

@ -80,7 +80,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
final SampleList fakeSamples = SampleListUtils.singletonList("fake");
ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples);
ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(),engine.getArguments().BAQMode);
}
private UnifiedGenotypingEngine getEngine() {
@ -89,7 +89,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
@DataProvider(name = "ReferenceQualityCalculation")
public Object[][] makeReferenceQualityCalculation() {
List<Object[]> tests = new ArrayList<Object[]>();
final List<Object[]> tests = new ArrayList<>();
// this functionality can be adapted to provide input data for whatever you might want in your data
final double p = Math.log10(0.5);
@ -116,7 +116,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
for ( Integer numAltAlleles = 0; numAltAlleles < 100; numAltAlleles++ ) {
Set<Allele> alleles = new HashSet<Allele>();
final Set<Allele> alleles = new HashSet<>();
alleles.add(Allele.create("A", true)); // ref allele
for (int len = 1; len <=numAltAlleles; len++) {

View File

@ -69,12 +69,12 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "05b8af0db7b009721df209eea96bdf1a");
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "4b646b6fc9c5c2ef88433a5b350310fe");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "1ac510860b295d66e1da7b27ba7cafb8");
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "171355e4d0648fdd50d7d56de950d338");
}
@Test(enabled = true)

View File

@ -68,6 +68,6 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "654059dda19cb2cf546097e44753ea14");
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "0f6fdf60d7f93b2db8c8cb92c1fd3e00");
}
}

View File

@ -47,18 +47,36 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@DataProvider(name = "MyDataProviderHaploid")
public Object[][] makeMyDataProviderHaploid() {
List<Object[]> tests = new ArrayList<>();
final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000";
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "5cc1858896aca6683282f53054bb7a61"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "010a747f5c41ddb7889168e499eb40bb"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d7dbc1c8e11a277e9db857eb766fd2c6"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "799752d88c4e15e19a953add764d2239"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "fa057b35d6fe9588c2653b6560d6e3c2"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "d10e8907594414890cbf80d282426812"});
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "MyDataProvider")
public Object[][] makeMyDataProvider() {
List<Object[]> tests = new ArrayList<>();
@ -77,6 +95,24 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "MyDataProviderTetraploid")
public Object[][] makeMyDataProviderTetraploid() {
List<Object[]> tests = new ArrayList<>();
final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000";
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "6e157b6fdf4071fcb7da74f40146a611"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "354b84dbfaf55947aea40865e74ce66b"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fc4b7e6528747cb20e0c92699a0787cb"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6e0f5d82b77ea79a639d43b2db70e751"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a3daf472f7ab16667e5f6dab1af392ff"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "af9230fa56752b732572ce956101a2be"});
return tests.toArray(new Object[][]{});
}
/**
* Example testng test using MyDataProvider
*/
@ -86,7 +122,31 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
final Pair<List<File>,List<String>> executionOutput = executeTest(name, spec);
executeTest(name, spec);
}
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "MyDataProviderHaploid", enabled=false)
public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCFHaploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
}
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "MyDataProviderTetraploid", enabled=false)
public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCFTetraploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
}
@Test
@ -144,6 +204,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
private static final String NOCALL_GVCF_BUGFIX_INTERVALS = privateTestDir + "gvcf_nocall_bug.interval_list";
private static final String NOCALL_GVCF_BUGFIX_BAM = privateTestDir + "gvcf_nocall_bug.bam";
private static final String GENERAL_PLOIDY_BUGFIX1_BAM = privateTestDir + "general-ploidy-arrayindex-bug-1.bam";
private static final String GENERAL_PLOIDY_BUGFIX1_INTERVALS = privateTestDir + "general-ploidy-arrayindex-bug-1.intervals";
private static final String GENERAL_PLOIDY_BUGFIX2_BAM = privateTestDir + "general-ploidy-arrayindex-bug-2.bam";
private static final String GENERAL_PLOIDY_BUGFIX2_INTERVALS = privateTestDir + "general-ploidy-arrayindex-bug-2.intervals";
@Test
public void testNoCallGVCFMissingPLsBugFix() {
@ -153,4 +218,23 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}
@Test(enabled=false)
public void testGeneralPloidyArrayIndexBug1Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec);
}
@Test(enabled=false)
public void testGeneralPloidyArrayIndexBug2Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec);
}
}

View File

@ -93,24 +93,55 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
HCTest(NA12878_BAM, "", "42de756c08b028be70287ada1022526e");
}
@Test
public void testHaplotypeCallerMultiSampleHaploid() {
HCTest(CEUTRIO_BAM,
"-ploidy 1", "b9e43506af628768fc9fd1ced49822b1");
}
@Test
public void testHaplotypeCallerSingleSampleHaploid() {
HCTest(NA12878_BAM, "-ploidy 1", "fb584b8c3f371ee2e438a3fc2335b26f");
}
@Test
public void testHaplotypeCallerSingleSampleTetraploid() {
HCTest(NA12878_BAM, "-ploidy 4", "d450b486c76520f9c803c603f25563e4");
}
@Test
public void testHaplotypeCallerMinBaseQuality() {
HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46");
}
@Test
public void testHaplotypeCallerMinBaseQualityHaploid() {
HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "40259040f6febd8ea5931132cf5d8958");
}
@Test
public void testHaplotypeCallerMinBaseQualityTetraploid() {
HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "ca11eae5def67ca9717d129348e4cda7");
}
@Test
public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "6cf15ddbfa4a3738e891fd9a09da8d07");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "f0677e5a2882f947f437e8d2049172cb");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "4c2a2dad6379b13fee4c7faca17441f5");
}
@Test(enabled = false) // can't annotate the rsID's yet
@Test
public void testHaplotypeCallerSingleSampleWithDbsnp() {
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "");
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "9d7067648561aa35b04d355184a5dea2");
}
@Test
@ -120,6 +151,18 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
"669aac2aa9c22881eda86ee53b13351a");
}
@Test
public void testHaplotypeCallerMultiSampleGGAHaploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"e50c55c65db3fa55c75ba03b4dd2f1a8");
}
@Test
public void testHaplotypeCallerMultiSampleGGATetraploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"374d6db6e5f3f4fdb5ede26a529caa8b");
}
@Test
public void testHaplotypeCallerInsertionOnEdgeOfContig() {
HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae");
@ -265,7 +308,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + hg19Reference + " --no_cmdline_in_header -I " + NA12878_PCRFREE250_ADAPTER_TRIMMED + " -o %s -L 20:10,024,000-10,024,500 "
, 1,
Arrays.asList(""));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
executeTest("HCTestGraphBasedPCRFreePositiveLogLkFix", spec);
}
// --------------------------------------------------------------------------------------------------------------
@ -346,5 +389,4 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
}
}

View File

@ -300,6 +300,9 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
@Test
public void testRefConfidencePartialReads() {
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final String ref = "ACGTAACCGGTT";
for ( int readLen = 3; readLen < ref.length(); readLen++ ) {
for ( int start = 0; start < ref.length() - readLen; start++ ) {
@ -307,8 +310,6 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final List<Haplotype> haplotypes = Arrays.asList(data.getRefHap());
final List<VariantContext> calls = Collections.emptyList();
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
data.getActiveRegion().add(data.makeRead(start, readLen));
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
@ -326,6 +327,9 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final int start = xxxdata.getStart();
final int stop = xxxdata.getEnd();
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
for ( int nReads = 0; nReads < 2; nReads++ ) {
final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C"));
@ -347,8 +351,6 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final List<Integer> expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);

View File

@ -56,7 +56,8 @@ import java.util.Random;
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class RandomDNA {
public class
RandomDNA {
private Random random;
@ -73,6 +74,19 @@ public class RandomDNA {
random = new Random();
}
/**
* Creates a new random DNA generator given a random number generator.
* @param rnd the underlying random number generator.
*
* @throws IllegalArgumentException if {@code rnd} is {@code null}.
*/
public RandomDNA(final Random rnd) {
if (rnd == null)
throw new IllegalArgumentException("the random number generator cannot be null");
random = rnd;
}
/**
* Constructs a new random DNA generator providing a seed.
*

View File

@ -0,0 +1,281 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.utils.collections;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Tests the working of {@link IndexedSet}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedSetUnitTest {
@Test(dataProvider = "initialCapacityElementCountMaxElementData")
public void testCompositionBySingleElementAddition(final int initialCapacity,
final int elementCount, final int maxElement) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final IndexedSet<Integer> subject = new IndexedSet<>(initialCapacity);
final Set<Integer> elementSet = new LinkedHashSet<>();
for (int i = 0; i < elementCount; i++) {
final int nextElement = rnd.nextInt(maxElement + 1);
final boolean isNewElement = ! elementSet.contains(nextElement);
Assert.assertEquals(subject.add(nextElement), elementSet.add(nextElement));
Assert.assertEquals(subject.size(),elementSet.size());
if (isNewElement)
Assert.assertEquals(subject.indexOf(nextElement),elementSet.size() - 1);
}
assertEquals(subject, elementSet);
}
@Test(dataProvider = "initialCapacityElementCountMaxElementData")
public void testCompositionByCollectionAddition(final int initialCapacity,
final int elementCount, final int maxElement) {
final IndexedSet<Integer> subject = new IndexedSet<>(initialCapacity);
final List<Integer> elementList = generateElementCollection(elementCount,maxElement);
Assert.assertEquals(subject.addAll(elementList), !elementList.isEmpty());
final Set<Integer> elementSet = new LinkedHashSet<>(elementCount);
elementSet.addAll(elementList);
assertEquals(subject,elementSet);
}
@Test(dataProvider = "elementCountMaxElementData")
public void testCompositionByCollectionConstructor(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
assertEquals(subject,elementSet);
Assert.assertFalse(subject.addAll(elementList));
}
private List<Integer> generateElementCollection(final int elementCount, final int maxElement) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final List<Integer> elementList = new ArrayList<>(elementCount);
for (int i = 0; i < elementCount; i++)
elementList.add(rnd.nextInt(maxElement + 1));
return elementList;
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor"})
public void testLookupByIndex(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final Integer[] elementArray = elementSet.toArray(new Integer[elementSet.size()]);
final List<Integer> subjectList = subject.asList();
for (int i = 0; i < subject.size(); i++) {
final int element = elementArray[i];
final int subjectElement = subject.get(i);
final int subjectListElement = subjectList.get(i);
Assert.assertEquals(subjectElement,element);
Assert.assertEquals(subjectListElement,element);
}
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor"})
public void testIndexOf(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final Integer[] elementArray = elementSet.toArray(new Integer[elementSet.size()]);
final List<Integer> subjectList = subject.asList();
for (int i = 0; i < subject.size(); i++) {
final int element = elementArray[i];
final int listElement = subjectList.get(i);
final int subjectIndex = subject.indexOf(element);
Assert.assertEquals(listElement,element);
Assert.assertEquals(subjectIndex,i);
Assert.assertEquals(subject.indexOf(-element - 1),-1);
}
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"})
public void testRemoveHalf(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final int removeCount = (subject.size() + 1) / 2;
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < removeCount; i++) {
final int removeIndex = rnd.nextInt(subject.size());
final int removeElement = subject.get(removeIndex);
subject.remove(removeElement);
elementSet.remove(removeElement);
}
assertEquals(subject,elementSet);
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"})
public void testRemoveAll(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final int removeCount = subject.size();
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < removeCount; i++) {
final int removeIndex = rnd.nextInt(subject.size());
final int removeElement = subject.get(removeIndex);
subject.remove(removeElement);
elementSet.remove(removeElement);
}
assertEquals(subject,elementSet);
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor"})
public void testClear(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
subject.clear();
elementSet.clear();
assertEquals(subject, elementSet);
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"})
public void testRemoveAndAdd(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final int removeCount = subject.size();
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < removeCount; i++) {
final int removeIndex = rnd.nextInt(subject.size());
final int removeElement = subject.get(removeIndex);
subject.remove(removeElement);
elementSet.remove(removeElement);
}
subject.addAll(elementList);
elementSet.addAll(elementList);
assertEquals(subject, elementSet);
}
private final int[] INITIAL_CAPACITY = { 0, 10, 100 };
private final int[] ELEMENT_COUNT = { 0, 1, 10, 100 , 1000 };
private final int[] MAX_ELEMENT = { 0, 1, 5, 10, 50, 100, 500 };
@DataProvider(name="initialCapacityElementCountMaxElementData")
public Object[][] initialCapacityElementCountMaxElementData() {
final Object[][] result = new Object[INITIAL_CAPACITY.length * ELEMENT_COUNT.length * MAX_ELEMENT.length][];
int nextIndex = 0;
for (int i = 0; i < INITIAL_CAPACITY.length; i++)
for (int j = 0; j < ELEMENT_COUNT.length; j++)
for (int k = 0; k < MAX_ELEMENT.length; k++)
result[nextIndex++] = new Object[] { INITIAL_CAPACITY[i], ELEMENT_COUNT[j], MAX_ELEMENT[k]};
return result;
}
@DataProvider(name="elementCountMaxElementData")
public Object[][] elementCountMaxElementData() {
final Object[][] result = new Object[ELEMENT_COUNT.length * MAX_ELEMENT.length][];
int nextIndex = 0;
for (int j = 0; j < ELEMENT_COUNT.length; j++)
for (int k = 0; k < MAX_ELEMENT.length; k++)
result[nextIndex++] = new Object[] { ELEMENT_COUNT[j], MAX_ELEMENT[k]};
return result;
}
/**
* Asserts that an indexed-set is equivalent to a insertion-sorted set provided.
* @param subject the indexed-set to test.
* @param elementSet the insertion-sorted set.
*/
private void assertEquals(final IndexedSet<Integer> subject, final Set<Integer> elementSet) {
Assert.assertEquals(subject.size(), elementSet.size());
final List<Integer> subjectList = subject.asList();
Assert.assertEquals(subjectList.size(),elementSet.size());
final Iterator<Integer> subjectIterator = subject.iterator();
final Iterator<Integer> elementSetIterator = subject.iterator();
final ListIterator<Integer> subjectListIterator = subjectList.listIterator();
while (subjectIterator.hasNext()) {
Assert.assertTrue(elementSetIterator.hasNext(),"less elements in indexed-set than in the equivalent hash-set");
Assert.assertTrue(subjectListIterator.hasNext());
final Integer nextElement;
Assert.assertEquals(nextElement = subjectIterator.next(),elementSetIterator.next(),"elements in indexed-set do not follow the same order as equivalent linked hash-set's");
Assert.assertEquals(subjectListIterator.next(),nextElement);
Assert.assertEquals(subject.indexOf(nextElement),subjectListIterator.previousIndex());
}
Assert.assertFalse(elementSetIterator.hasNext());
Assert.assertFalse(subjectListIterator.hasNext());
}
}

View File

@ -0,0 +1,171 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.utils.collections;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
* Tests {@link IntMaxHeap}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IntMaxHeapUnitTest {
@Test(dataProvider = "capacityData")
public void testCapacity(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
heap.add(v);
}
}
@Test(dataProvider = "capacityData",dependsOnMethods = {"testCapacity"})
public void testEmptynessAndSize(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
Assert.assertEquals(heap.size(),0);
Assert.assertTrue(heap.isEmpty());
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
heap.add(v);
Assert.assertEquals(heap.size(),i+1);
Assert.assertFalse(heap.isEmpty());
}
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testEmptynessAndSize"})
public void testClear(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
heap.add(v);
}
heap.clear();
Assert.assertEquals(heap.size(),0);
Assert.assertTrue(heap.isEmpty());
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testCapacity"})
public void testAddArray(final int initialCapacity, final int elementCount) {
final IntMaxHeap addHeap = new IntMaxHeap(initialCapacity);
final IntMaxHeap arrayAddHeap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final int[] values = new int[elementCount];
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
values[i] = v;
addHeap.add(v);
}
arrayAddHeap.add(values);
Assert.assertEquals(arrayAddHeap.size(),addHeap.size());
while (!arrayAddHeap.isEmpty())
Assert.assertEquals(arrayAddHeap.remove(),addHeap.remove());
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testEmptynessAndSize"})
public void testRemove(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final List<Integer> values = new ArrayList<>(elementCount);
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
values.add(v);
heap.add(v);
}
Collections.sort(values, Collections.reverseOrder());
for (int i = 0; i < elementCount; i++) {
Assert.assertEquals(heap.remove(),(int)values.get(i), "element-count = " + elementCount + ", initial-capacity = " + initialCapacity);
Assert.assertEquals(heap.size(),elementCount - i - 1);
}
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testCapacity"})
public void testPeek(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
int top = rnd.nextInt();
heap.add(top);
Assert.assertEquals(heap.peek(),top);
for (int i = 1; i < elementCount; i++) {
final int v = rnd.nextInt();
if (v > top) top = v;
heap.add(v);
Assert.assertEquals(heap.peek(),top);
}
}
@DataProvider(name="capacityData")
public Object[][] capacityData() {
return new Object[][] {
{0,100}, {1,113}, {20,301}
};
}
}

View File

@ -48,14 +48,15 @@ package org.broadinstitute.gatk.utils.genotyper;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.genotyping.IndexedAlleleList;
import org.broadinstitute.gatk.genotyping.IndexedSampleList;
import org.broadinstitute.gatk.genotyping.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@ -410,7 +411,7 @@ public class ReadLikelihoodsUnitTest
// We add a single missing.
result.addMissingAlleles(Arrays.asList(newOne = Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-12345.6);
Assert.assertEquals(original.alleleCount() + 1, result.alleleCount());
Assert.assertEquals(result.alleleCount(), original.alleleCount() + 1);
// We add too more amongst exisisting alleles:
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.alleleAt(1),
@ -479,9 +480,9 @@ public class ReadLikelihoodsUnitTest
final int alleleCount = result.alleleCount();
Assert.assertEquals(result.alleleCount(), alleleCount);
for (int a = 0; a < alleleCount; a++) {
Assert.assertEquals(result.sampleReadCount(0),sampleReadCount);
Assert.assertEquals(result.sampleReadCount(sampleIndex),sampleReadCount);
for (int r = 0; r < sampleReadCount; r++)
Assert.assertEquals(result.sampleMatrix(0).get(a,r),
Assert.assertEquals(result.sampleMatrix(sampleIndex).get(a,r),
likelihoods == null ? 0.0 : likelihoods[sampleIndex][a][r], EPSILON);
}
}
@ -541,7 +542,7 @@ public class ReadLikelihoodsUnitTest
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final Object[][] result = new Object[SAMPLE_SETS.length * ALLELE_SETS.length * ALLELE_SETS.length][];
int nextIndex = 0;
for (int s = 0; s < SAMPLE_SETS.length; s++)
for (int s = 0; s < SAMPLE_SETS.length; s++) {
for (int a = 0; a < ALLELE_SETS.length; a++) {
for (int b = 0; b < ALLELE_SETS.length; b++) {
if (ALLELE_SETS[b].length < ALLELE_SETS[a].length)
@ -550,6 +551,7 @@ public class ReadLikelihoodsUnitTest
};
}
}
}
return Arrays.copyOf(result,nextIndex);
}catch (final Throwable e) {
throw new RuntimeException(e);
@ -590,9 +592,6 @@ public class ReadLikelihoodsUnitTest
}
}
final SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader();
final GenomeLocParser locParser = new GenomeLocParser(SAM_HEADER.getSequenceDictionary());
private Map<String,List<GATKSAMRecord>> dataSetReads(final String[] samples,
final Random rnd) {
final Map<String,List<GATKSAMRecord>> result = new HashMap<>(samples.length);
@ -608,4 +607,245 @@ public class ReadLikelihoodsUnitTest
}
return result;
}
@Test(dataProvider="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference")
public void testInstantiationAndBasicQueries(final int[] readCounts, final int alleleCount, final boolean hasReference) {
final SampleList sampleList = sampleList(readCounts);
final AlleleList<Allele> alleleList = alleleList(alleleCount,hasReference);
final Map<String,List<GATKSAMRecord>> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList, readCounts);
final ReadLikelihoods<Allele> subject = new ReadLikelihoods<>(sampleList,alleleList,sampleToReads);
AlleleListUnitTester.assertAlleleList(subject,AlleleListUtils.asList(alleleList));
SampleListUnitTester.assertSampleList(subject,SampleListUtils.asList(sampleList));
if (hasReference) {
final int referenceIndex = AlleleListUtils.indexOfReference(alleleList);
Assert.assertTrue(referenceIndex >= 0);
Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),referenceIndex);
} else {
Assert.assertEquals(AlleleListUtils.indexOfReference(subject), -1);
}
testLikelihoodMatrixQueries(alleleList, sampleList, sampleToReads, subject);
testAlleleQueries(alleleList, subject);
testSampleQueries(sampleList, sampleToReads, subject);
}
@Test(dataProvider="readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference")
public void testLikelihoodWriting(final int[] readCounts, final int alleleCount, final boolean hasReference) {
final SampleList sampleList = sampleList(readCounts);
final AlleleList<Allele> alleleList = alleleList(alleleCount,hasReference);
final Map<String,List<GATKSAMRecord>> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList,readCounts);
final ReadLikelihoods<Allele> subject = new ReadLikelihoods<>(sampleList,alleleList,sampleToReads);
final int sampleCount = readCounts.length;
int totalLikelihoodsSet = 0;
int expectedLikelihoodsSet = 0;
for (int s = 0; s < sampleCount; s++) {
expectedLikelihoodsSet += readCounts[s] * alleleCount;
final ReadLikelihoods.Matrix<Allele> matrix = subject.sampleMatrix(s);
final int readCount = matrix.readCount();
for (int a = 0; a < alleleCount; a++)
for (int r = 0; r < readCount; r++) {
final double likelihood = testLikelihood(s, a, r);
Assert.assertNotEquals(likelihood,0); //Paranoia
totalLikelihoodsSet++;
matrix.set(a,r,likelihood);
Assert.assertEquals(matrix.get(a, r),likelihood);
}
}
Assert.assertEquals(totalLikelihoodsSet,expectedLikelihoodsSet);
}
@Test(dependsOnMethods={"testLikelihoodWriting","testInstantiationAndBasicQueries"},
dataProvider="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference")
public void testMapConversion(final int[] readCounts, final int alleleCount, final boolean hasReference) {
final SampleList sampleList = sampleList(readCounts);
final AlleleList<Allele> alleleList = alleleList(alleleCount,hasReference);
final Map<String,List<GATKSAMRecord>> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList,readCounts);
final Set<Allele> alleleWithLikelihoodsSet = new HashSet<>();
final Set<GATKSAMRecord> readsWithLikelihoodsSet = new HashSet<>();
final Map<String,PerReadAlleleLikelihoodMap> map = new HashMap<>(sampleList.sampleCount());
final int sampleCount = sampleList.sampleCount();
for (int s = 0; s < sampleCount; s++) {
final String sample = sampleList.sampleAt(s);
final PerReadAlleleLikelihoodMap perSampleMap = new PerReadAlleleLikelihoodMap();
final List<GATKSAMRecord> reads = sampleToReads.get(sample);
for (int a = 0; a < alleleCount; a++)
for (int r = 0; r < reads.size(); r++) {
perSampleMap.add(reads.get(r), alleleList.alleleAt(a), testLikelihood(s, a, r));
alleleWithLikelihoodsSet.add(alleleList.alleleAt(a));
readsWithLikelihoodsSet.add(reads.get(r));
}
map.put(sample,perSampleMap);
}
ReadLikelihoods<Allele> subject = ReadLikelihoods.fromPerAlleleReadLikelihoodsMap(map);
for (int s = 0; s < sampleCount; s++) {
final String sample = sampleList.sampleAt(s);
final int sIndex = subject.sampleIndex(sample);
Assert.assertTrue(sIndex >= 0);
Assert.assertTrue(sIndex < sampleCount);
final int sampleReadCount = sampleToReads.get(sample).size();
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = subject.sampleMatrix(sIndex);
for (int a = 0; a < alleleCount; a++) {
final Allele allele = alleleList.alleleAt(a);
final int aIndex = subject.alleleIndex(allele);
Assert.assertEquals(aIndex >= 0,alleleWithLikelihoodsSet.contains(allele));
Assert.assertTrue(aIndex < alleleCount);
if (aIndex == -1) continue;
for (int r = 0; r < sampleReadCount; r++) {
final GATKSAMRecord read = sampleToReads.get(sample).get(r);
final int rIndex = subject.readIndex(sIndex,read);
final int rIndex2 = sampleLikelihoods.readIndex(read);
Assert.assertEquals(rIndex,rIndex2);
Assert.assertEquals(rIndex >= 0,readsWithLikelihoodsSet.contains(read));
Assert.assertTrue(rIndex < sampleReadCount);
if (rIndex == -1)
continue;
final double likelihood = sampleLikelihoods.get(aIndex,rIndex);
Assert.assertEquals(likelihood,testLikelihood(s,a,r));
}
}
}
}
private double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) {
return - Math.abs(31 * (sampleIndex + 1) + 101 * alleleIndex + 1009 * readIndex);
}
private final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
private void testLikelihoodMatrixQueries(final AlleleList<Allele> alleles, final SampleList samples,
final Map<String,List<GATKSAMRecord>> sampleToReads, ReadLikelihoods<Allele> result) {
for (final String sample : SampleListUtils.asList(samples)) {
final int sampleIndex = result.sampleIndex(sample);
final ReadLikelihoods.Matrix<Allele> likelihoodMatrix = result.sampleMatrix(sampleIndex);
final int sampleReadCount = sampleToReads.get(sample).size();
final List<GATKSAMRecord> reads = sampleToReads.get(sample);
Assert.assertEquals(likelihoodMatrix.alleleCount(), alleles.alleleCount());
Assert.assertEquals(likelihoodMatrix.readCount(), sampleReadCount);
for (int a = 0; a < likelihoodMatrix.alleleCount(); a++) {
Assert.assertEquals(likelihoodMatrix.alleleAt(a),alleles.alleleAt(a));
for (int r = 0; r < sampleReadCount; r++) {
Assert.assertEquals(likelihoodMatrix.readAt(r),reads.get(r));
Assert.assertEquals(likelihoodMatrix.get(a, r), 0.0);
}
}
}
}
private void testAlleleQueries(final AlleleList<Allele> alleles, ReadLikelihoods<Allele> result) {
final Set<Integer> alleleIndices = new HashSet<>();
for (final Allele allele : AlleleListUtils.asList(alleles)) {
final int alleleIndex = result.alleleIndex(allele);
Assert.assertTrue(alleleIndex >= 0);
Assert.assertFalse(alleleIndices.contains(alleleIndex));
alleleIndices.add(alleleIndex);
Assert.assertSame(allele,alleles.alleleAt(alleleIndex));
}
}
private void testSampleQueries(final SampleList samples, Map<String, List<GATKSAMRecord>> reads,
final ReadLikelihoods<Allele> result) {
final Set<Integer> sampleIds = new HashSet<>(samples.sampleCount());
for (final String sample : SampleListUtils.asList(samples)) {
final int sampleIndex = result.sampleIndex(sample);
Assert.assertTrue(sampleIndex >= 0);
Assert.assertFalse(sampleIds.contains(sampleIndex));
sampleIds.add(sampleIndex);
final List<GATKSAMRecord> sampleReads = result.sampleReads(sampleIndex);
final Set<GATKSAMRecord> sampleReadsSet = new HashSet<>(sampleReads);
final List<GATKSAMRecord> expectedSampleReadArray = reads.get(sample);
final Set<GATKSAMRecord> expectedSampleReadsSet = new HashSet<>(expectedSampleReadArray);
Assert.assertEquals(sampleReadsSet,expectedSampleReadsSet);
final int sampleReadCount = sampleReads.size();
for (int r = 0; r < sampleReadCount; r++) {
Assert.assertSame(sampleReads.get(r), expectedSampleReadArray.get(r));
final int readIndex = result.readIndex(sampleIndex, sampleReads.get(r));
Assert.assertEquals(readIndex,r);
}
}
}
private AlleleList<Allele> alleleList(final int alleleCount, final boolean hasReference) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount,100);
if (hasReference) {
final int referenceIndex = rnd.nextInt(alleleCount);
alleles[referenceIndex] = Allele.create(alleles[referenceIndex].getBases(),true);
}
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles);
if (alleleList.alleleCount() != alleles.length)
throw new SkipException("repeated alleles, should be infrequent");
return alleleList;
}
private SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 1000);
final GenomeLocParser locParser = new GenomeLocParser(SAM_HEADER.getSequenceDictionary());
private int[][] READ_COUNTS = new int[][] {
{},
{ 100 },
{ 0 },
{ 0, 0, 0 },
{ 1, 0, 1 },
{ 100, 10 , 100},
{ 1000, 10, 100, 20, 23 }
};
private int[] ALLELE_COUNTS = new int[] { 0, 1, 2, 3, 10, 20 };
@DataProvider(name="readCountsAndAlleleCountData")
public Object[][] readCountsAndAlleleCountData() {
final Object[][] result = new Object[READ_COUNTS.length * ALLELE_COUNTS.length * 2][];
int index = 0;
for (final int[] readCounts : READ_COUNTS)
for (final int alleleCount : ALLELE_COUNTS) {
result[index++] = new Object[]{ readCounts, alleleCount, false};
result[index++] = new Object[]{ readCounts, alleleCount, true};
}
return result;
}
@DataProvider(name="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference")
public Object[][] readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference() {
final Object[][] raw = readCountsAndAlleleCountData();
final List<Object[]> result = new ArrayList<>(raw.length);
for (final Object[] paramSet : raw)
if (!paramSet[2].equals(true) || !paramSet[1].equals(0))
result.add(paramSet);
return result.toArray(new Object[result.size()][]);
}
@DataProvider(name="readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference")
public Object[][] readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference() {
final Object[][] raw = readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference();
final List<Object[]> result = new ArrayList<>(raw.length);
for (final Object[] paramSet : raw) {
final int[] readCounts = (int[]) paramSet[0];
final long totalReadCount = MathUtils.sum(readCounts);
if (totalReadCount > 0)
result.add(paramSet);
}
return result.toArray(new Object[result.size()][]);
}
private SampleList sampleList(final int[] readCounts) {
final List<String> samples = new ArrayList<>(readCounts.length);
for (int i = 0; i < readCounts.length; i++)
samples.add("SAMPLE_" + i);
return new IndexedSampleList(samples);
}
}

View File

@ -347,7 +347,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHomRefAlt() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, 2);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));

View File

@ -51,6 +51,7 @@ import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@ -70,7 +71,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBasicConstruction() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20, 2);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
Assert.assertSame(band.getStartingVC(), vc);
Assert.assertEquals(band.getRef(), vc.getReference());
Assert.assertEquals(band.getGQLowerBound(), 10);
@ -85,8 +86,9 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testMinMedian() {
//TODO - might be better to make this test use a data provider?
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
gb.alleles(vc.getAlleles());
int pos = vc.getStart();
band.add(pos++, gb.DP(10).GQ(11).PL(new int[]{0,11,100}).make());
@ -116,8 +118,9 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBigGQIsCapped() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
gb.alleles(vc.getAlleles());
band.add(vc.getStart(), gb.DP(1000).GQ(1000).PL(new int[]{0,10,100}).make());
assertValues(band, 1000, 1000, 99, 99);
@ -125,7 +128,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadAdd() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
band.add(vc.getStart() + 10, gb.DP(10).GQ(11).PL(new int[]{0,10,100}).make());
@ -155,7 +158,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(dataProvider = "ContiguousData")
public void testIsContiguous(final String contig, final int pos, final boolean expected) {
final HomRefBlock band = new HomRefBlock(vc, 10, 20,2);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final VariantContext testVC = new VariantContextBuilder(vc).chr(contig).start(pos).stop(pos).make();
Assert.assertEquals(band.isContiguous(testVC), expected);
}

View File

@ -45,9 +45,12 @@ import java.util.*;
*/
public class PerReadAlleleLikelihoodMap {
/** A set of all of the allele, so we can efficiently determine if an allele is already present */
private final Set<Allele> allelesSet = new HashSet<>();
private final Map<Allele,Integer> allelesSet = new HashMap<>();
/** A list of the unique allele, as an ArrayList so we can call get(i) efficiently */
protected final List<Allele> alleles = new ArrayList<>();
protected final Map<GATKSAMRecord, Map<Allele, Double>> likelihoodReadMap = new LinkedHashMap<>();
public PerReadAlleleLikelihoodMap() { }
@ -64,6 +67,10 @@ public class PerReadAlleleLikelihoodMap {
if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null");
if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))");
if (!allelesSet.containsKey(a)) {
allelesSet.put(a,alleles.size());
alleles.add(a);
}
Map<Allele,Double> likelihoodMap = likelihoodReadMap.get(read);
if (likelihoodMap == null){
// LinkedHashMap will ensure iterating through alleles will be in consistent order
@ -73,10 +80,7 @@ public class PerReadAlleleLikelihoodMap {
likelihoodMap.put(a,likelihood);
if (!allelesSet.contains(a)) {
allelesSet.add(a);
alleles.add(a);
}
}
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) {
@ -198,7 +202,7 @@ public class PerReadAlleleLikelihoodMap {
* @return the log10 likelihood that this read matches this allele
*/
public double getLikelihoodAssociatedWithReadAndAllele(final GATKSAMRecord read, final Allele allele){
if (!allelesSet.contains(allele) || !likelihoodReadMap.containsKey(read))
if (!allelesSet.containsKey(allele) || !likelihoodReadMap.containsKey(read))
return 0.0;
return likelihoodReadMap.get(read).get(allele);
@ -381,7 +385,7 @@ public class PerReadAlleleLikelihoodMap {
* @return a non-null unmodifiable map
*/
public Set<Allele> getAllelesSet() {
return Collections.unmodifiableSet(allelesSet);
return Collections.unmodifiableSet(allelesSet.keySet());
}
/**

View File

@ -26,20 +26,9 @@
package org.broadinstitute.gatk.utils;
import htsjdk.tribble.Tribble;
import htsjdk.tribble.util.TabixUtils;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.spi.LoggingEvent;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.gatk.utils.commandline.CommandLineUtils;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.crypt.CryptUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.io.IOUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import htsjdk.tribble.util.TabixUtils;
import htsjdk.variant.bcf2.BCF2Codec;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
@ -47,6 +36,17 @@ import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.commandline.CommandLineUtils;
import org.broadinstitute.gatk.utils.crypt.CryptUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.io.IOUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import org.testng.Assert;
import org.testng.Reporter;
import org.testng.SkipException;
@ -132,6 +132,7 @@ public abstract class BaseTest {
protected static final String publicTestDirRoot = publicTestDir.replace(publicTestDirRelative, "");
public static final String keysDataLocation = validationDataLocation + "keys/";
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
public static final String exampleFASTA = publicTestDir + "exampleFASTA.fasta";