Merge pull request #675 from broadinstitute/vrr_hc_omniploidy_general_likelihood_calculation

HC omniploidy general likelihood calculation

Stories:

   https://www.pivotaltracker.com/story/show/72090992
   https://www.pivotaltracker.com/story/show/72091202
This commit is contained in:
Valentin Ruano Rubio 2014-08-19 14:43:49 -04:00
commit 86cb88e121
78 changed files with 7948 additions and 735 deletions

View File

@ -145,7 +145,7 @@ public class StandardCallerArgumentCollection implements Cloneable {
*/
@Hidden
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
public AFCalcFactory.Calculation AFmodel = AFCalcFactory.Calculation.getDefaultModel();
public AFCalcFactory.Calculation requestedAlleleFrequencyCalculationModel;
@Hidden
@Argument(shortName = "logExactCalls", doc="x", required=false)

View File

@ -0,0 +1,171 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import java.util.List;
/**
* Creates {@link org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods.Matrix} mappers to be used when working with a subset of the original alleles.
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
public abstract class AlleleLikelihoodMatrixMapper<A extends Allele> {
public abstract ReadLikelihoods.Matrix<A> map(final ReadLikelihoods.Matrix<A> original);
/**
* Instantiates a new mapper given an allele-list permutation.
* @param permutation the requested permutation.
* @param <A> the allele type.
*
* @throws IllegalArgumentException if {@code permutation} is {@code null}.
*
* @return never {@code null}.
*/
public static <A extends Allele> AlleleLikelihoodMatrixMapper<A> newInstance(final AlleleListPermutation<A> permutation) {
if (permutation == null)
throw new IllegalArgumentException("the permutation must not be null");
if (permutation.isNonPermuted())
return asIs();
else
return general(permutation);
}
/**
* Returns trivial mapper that just maps to the original matrix without changes.
*
* @param <A> the allele type.
* @return never {@code null}.
*/
@SuppressWarnings("unchecked")
private static <A extends Allele> AlleleLikelihoodMatrixMapper<A> asIs() {
return AS_IS;
}
@SuppressWarnings("unchecked")
private static final AlleleLikelihoodMatrixMapper AS_IS = new AlleleLikelihoodMatrixMapper<Allele>() {
@Override
public ReadLikelihoods.Matrix<Allele> map(final ReadLikelihoods.Matrix original) {
return original;
}
};
/**
* Constructs a new mapper instance that work with general permutation without making any assumption.
* @param permutation the permutation to apply to requested matrices wrappers.
* @param <A> allele type.
* @return never {@code null}.
*/
private static <A extends Allele> AlleleLikelihoodMatrixMapper<A> general(final AlleleListPermutation<A> permutation) {
return new AlleleLikelihoodMatrixMapper<A>() {
@Override
public ReadLikelihoods.Matrix<A> map(final ReadLikelihoods.Matrix<A> original) {
return new ReadLikelihoods.Matrix<A>() {
@Override
public List<GATKSAMRecord> reads() {
return original.reads();
}
@Override
public List<A> alleles() {
return permutation.toList();
}
@Override
public void set(final int alleleIndex, final int readIndex, final double value) {
original.set(permutation.fromIndex(alleleIndex),readIndex,value);
}
@Override
public double get(final int alleleIndex, final int readIndex) {
return original.get(permutation.fromIndex(alleleIndex),readIndex);
}
@Override
public int alleleIndex(final A allele) {
return permutation.alleleIndex(allele);
}
@Override
public int readIndex(GATKSAMRecord read) {
return original.readIndex(read);
}
@Override
public int alleleCount() {
return permutation.toSize();
}
@Override
public int readCount() {
return original.readCount();
}
@Override
public A alleleAt(final int alleleIndex) {
return original.alleleAt(permutation.fromIndex(alleleIndex));
}
@Override
public GATKSAMRecord readAt(final int readIndex) {
return original.readAt(readIndex);
}
@Override
public void copyAlleleLikelihoods(final int alleleIndex, final double[] dest, final int offset) {
original.copyAlleleLikelihoods(permutation.fromIndex(alleleIndex),dest,offset);
}
};
}
};
}
}

View File

@ -47,16 +47,16 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.samtools.SAMUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACcounts;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACset;
import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.vcf.VCFConstants;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import java.util.*;
@ -424,18 +424,9 @@ public abstract class GeneralPloidyGenotypeLikelihoods {
*/
public static int[] getAlleleCountFromPLIndex(final int nAlleles, final int numChromosomes, final int PLindex) {
// todo - another brain-dead inefficient implementation, can do much better by computing in closed form
final SumIterator iterator = new SumIterator(nAlleles,numChromosomes);
while (iterator.hasNext()) {
final int[] plVec = iterator.getCurrentVector();
if (iterator.getLinearIndex() == PLindex)
return plVec;
iterator.next();
}
return null;
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(numChromosomes, nAlleles);
final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(PLindex);
return alleleCounts.alleleCountsByIndex(nAlleles - 1);
}
/*

View File

@ -0,0 +1,707 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.utils.MathUtils;
import java.util.Arrays;
/**
* Collection of allele counts for a genotype. It encompasses what alleles are present in the genotype and in what number.</p>
*
* <p>Alleles are represented herein by their indices running from <b>0</b> to <b>N-1</b> where <i>N</i> is the number of alleles.</p>
*
* <p>Each allele present in a genotype (count != 0) has a <i>rank</i>, that is the 0-based ordinal of
* that allele amongst the ones present in the genotype as sorted by their index.</p>
*
* <p>For example:</p>
*
* <p><b>0/0/2/2</b> has two alleles with indices <b>0</b> and <b>2</b>, both with count 2.
* The rank of <b>0</b> is <i>0</i> whereas the rank of <b>2</b> is <i>1</i>.</p>
*
* <p><b>2/4/4/7</b> has three alleles with indices <b>2</b>, <b>4</b> and <b>7</b>. <b>2</b> and <b>7</b> have count 1 whereas <b>4</b> has count 2.
* The rank of <b>2</b> is <i>0</i>, the rank of <b>4</b> is <i>1</i>. and the rank of <b>7</b> is <i>2</i>.</p>
*
* <p>In contrast, in both examples above both <b>3</b> and <b>10</b> (and many others) are absent thus they have no rank (represented by <i>-1</i> whenever applies).</p>
*
* <p>{@link GenotypeAlleleCounts} instances have themselves their own index (returned by {@link #index() index()}, that indicate their 0-based ordinal within the possible genotype combinations with the same ploidy.</p>
*
* <p>For example, for ploidy 3:</p>
*
* <table>
* <th>Index</th><th>Genotype</th>
* <tr><td>0</td><td><b>0/0/0</b></td></tr>
* <tr><td>1</td><td><b>0/0/1</b></td></tr>
* <tr><td>2</td><td><b>0/1/1</b></td></tr>
* <tr><td>3</td><td><b>1/1/1</b></td></tr>
* <tr><td>4</td><td><b>0/0/2</b></td></tr>
* <tr><td>6</td><td><b>0/1/2</b></td></tr>
* <tr><td>7</td><td><b>1/1/2</b></td></tr>
* <tr><td>8</td><td><b>0/2/2</b></td></tr>
* <tr><td>9</td><td><b>1/2/2</b></td></tr>
* <tr><td>10</td><td><b>2/2/2</b></td></tr>
* <tr><td>11</td><td><b>0/0/3</b></td></tr>
* <tr><td>12</td><td><b>0/1/3</b></td></tr>
* <tr><td>13</td><td><b>1/1/3</b></td></tr>
* <tr><td>14</td><td><b>0/2/3</b></td></tr>
* <tr><td>15</td><td><b>1/2/3</b></td></tr>
* <tr><td>16</td><td><b>2/2/3</b></td></tr>
* <tr><td>17</td><td><b>0/3/3</b></td></tr>
* <tr><td>...</td><td>...</td></tr>
* </table>
*
* The total number of possible genotypes is only bounded by the maximum allele index.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeAlleleCounts implements Comparable<GenotypeAlleleCounts>, Cloneable {
private double log10CombinationCount;
/**
* The ploidy of the genotype.
*/
private final int ploidy;
/**
* Sorted array of integer pairs as described in {@link #GenotypeAlleleCounts(int, int, int...)}.
*/
private int[] sortedAlleleCounts;
/**
* Number of different alleles in the genotype.
*/
private int distinctAlleleCount;
/**
* Index of this genotype within genotypes of the same ploidy.
*/
private int index;
/**
* Creates a new unphased genotype.
*
* <p>This method assumes that the invoker is passing a well formatted and sorted allele frequency array.
* Not checks are done for the sake of performance.</p>
*
* <p>
* The input argument {@code sortedAlleleCounts} list the index of alleles included in the unphased genotype
* and their frequency in the genotype in a single array using consecutive pairs:<br/>
*
* <pre> [allele_1,freq_1,allele_2,freq_2, ... , allele_i, freq_i, ... , allele_n, freq_n]</pre>
*
* <br/>
* No entry can have frequency == 0 (these must be omitted) and entries are sorted by allele index without
* any repetitions so that if <i>i < j</i> then <i>allele_i < allele_j</i>.
*
* </p>
*
* <p>
* The {@code ploidy} provided must be equal to the sum of all frequencies in {@code sortedAlleleCounts}
* </p>
* @param ploidy the genotype ploidy.
* @param sortedAlleleCounts sorted allele counts following the restrictions above.
* @param index the genotype index.
*/
private GenotypeAlleleCounts(final int ploidy, final int index, final int... sortedAlleleCounts) {
this.ploidy = ploidy;
this.sortedAlleleCounts = sortedAlleleCounts;
distinctAlleleCount = sortedAlleleCounts.length >> 1;
log10CombinationCount = -1;
this.index = index;
}
/**
* Returns the log10 of the number of possible allele combinations that would give raise to this allele count.
* @return 0 or less.
*/
public double log10CombinationCount() {
if (log10CombinationCount == -1)
return log10CombinationCount = calculateLog10CombinationCount();
else
return log10CombinationCount;
}
/**
* Calculates log10 combination count.
*
* @return 0 or less.
*/
private double calculateLog10CombinationCount() {
if (ploidy <= 1)
return 0;
else {
final int[] counts = new int[distinctAlleleCount];
for (int i = 0, j = 1; i < distinctAlleleCount; i++, j+=2)
counts[i] = sortedAlleleCounts[j];
return MathUtils.log10MultinomialCoefficient(ploidy, counts);
}
}
/**
* Returns the genotype's ploidy.
* @return 0 or greater.
*/
public int ploidy() {
return ploidy;
}
/**
* Increases the allele counts a number of times.
*
* <p>
* This method must not be invoked on cached genotype-allele-counts that are meant to remain constant,
* such as the ones contained in {@link GenotypeLikelihoodCalculators#genotypeTableByPloidy}.
* </p>
*
* @param times the number of times to increase.
*
* @throws IllegalArgumentException if {@code times} is negative.
*/
protected void increase(final int times) {
for (int i = 0; i < times; i++)
increase();
}
/**
* Updates the genotype counts to match the next genotype.
*
* <p>
* This method must not be invoked on cached genotype-allele-counts that are meant to remain constant,
* such as the ones contained in {@link GenotypeLikelihoodCalculators#genotypeTableByPloidy}
* </p>
*/
protected void increase() {
// if the ploidy is zero there is only one possible genotype.
if (distinctAlleleCount == 0)
return;
// Worth make this case faster.
if (distinctAlleleCount == 1) {
if (ploidy == 1) {
sortedAlleleCounts[0]++;
} else {
if (sortedAlleleCounts.length < 4)
sortedAlleleCounts = Arrays.copyOf(sortedAlleleCounts,4);
sortedAlleleCounts[2] = sortedAlleleCounts[0] + 1;
sortedAlleleCounts[3] = 1;
sortedAlleleCounts[0] = 0;
sortedAlleleCounts[1] = ploidy - 1;
distinctAlleleCount = 2;
}
} else {
// Now, all the following ifs are just the way to avoid working with dynamically sizing List<int[]>
// as the final size of the resulting new sorted-allele-counts array varies depending on the situation.
// this is considerably faster and the logic complexity would not be that different actually so it is worth
// the if indentations.
//
// Notice that at this point distinctAlleleCount >= 2 thus sortedAlleleCounts.length >= 4.
//
// We only need to look at the two lowest allele indices to decide what to do.
final int allele0 = sortedAlleleCounts[0];
final int freq0 = sortedAlleleCounts[1];
final int allele1 = sortedAlleleCounts[2];
final int allele0Plus1 = allele0 + 1;
final boolean allele0And1AreConsecutive = allele0Plus1 == allele1;
final int[] newSortedAlleleCounts;
// The rest of the sorted allele counts array contains junk
final int sortedAlleleCountsLength = distinctAlleleCount << 1;
if (freq0 == 1) { // in this case allele0 wont be present in the result and all is frequency should go to allele0 + 1.
if (allele0And1AreConsecutive) { // need just to remove the first allele and add 1 to the frequency of the second (freq1 += 1).
System.arraycopy(sortedAlleleCounts, 2, sortedAlleleCounts, 0, sortedAlleleCountsLength - 2); // shift left the first component away.
sortedAlleleCounts[1]++; // freq1 has become freq0.
distinctAlleleCount--;
} else // just need to mutate allele0 to allele0 + 1.
sortedAlleleCounts[0] = allele0Plus1;
} else { // && freq0 > 1 as per sortedAlleleCounts format restrictions. In this case allele0 will mutated to '0' with frequency decreased by 1.
if (allele0And1AreConsecutive) { // we don't need to add a component for allele0 + 1 since it already exists.
sortedAlleleCounts[0] = 0;
sortedAlleleCounts[1] = freq0 - 1;
sortedAlleleCounts[3]++;
} else { // we need to insert allele0 + 1 in the sorted-allele-counts array and give it frequency 1.
if (sortedAlleleCounts.length < sortedAlleleCountsLength + 2) // make room for the new component.
sortedAlleleCounts = Arrays.copyOf(sortedAlleleCounts,sortedAlleleCountsLength + 2);
System.arraycopy(sortedAlleleCounts, 2, sortedAlleleCounts, 4, sortedAlleleCountsLength - 2);
sortedAlleleCounts[0] = 0;
sortedAlleleCounts[1] = freq0 - 1;
sortedAlleleCounts[2] = allele0Plus1;
sortedAlleleCounts[3] = 1;
distinctAlleleCount++;
}
}
}
index++;
log10CombinationCount = -1;
}
/**
* Calculates the next genotype in likelihood indexing order.
* @return never null.
*/
protected GenotypeAlleleCounts next() {
// if the ploidy is zero there is only one possible genotype.
if (distinctAlleleCount == 0)
return this;
// Worth make this case faster.
if (distinctAlleleCount == 1) {
if (ploidy == 1) // A -> B , D -> E etc...
return new GenotypeAlleleCounts(1, index + 1, sortedAlleleCounts[0] + 1, 1);
else // AAAAA -> AAAAB, DDD -> AAE etc...
return new GenotypeAlleleCounts(ploidy, index + 1, 0, ploidy - 1, sortedAlleleCounts[0] + 1, 1);
}
// Now, all the following ifs are just the way to avoid working with dynamically sizing List<int[]>
// as the final size of the resulting new sorted-allele-counts array varies depending on the situation.
// this is considerably faster and the logic complexity would not be that different actually so it is worth
// the if indentations.
//
// Notice that at this point distinctAlleleCount >= 2 thus sortedAlleleCounts.length >= 4.
//
// We only need to look at the two lowest allele indices to decide what to do.
final int allele0 = sortedAlleleCounts[0];
final int freq0 = sortedAlleleCounts[1];
final int allele1 = sortedAlleleCounts[2];
final int allele0Plus1 = allele0 + 1;
final boolean allele0And1AreConsecutive = allele0Plus1 == allele1;
final int[] newSortedAlleleCounts;
// The rest of the sorted allele counts array contains junk
final int sortedAlleleCountsLength = distinctAlleleCount << 1;
if (freq0 == 1) { // in this case allele0 wont be present in the result and all is frequency should go to allele0 + 1.
if (allele0And1AreConsecutive) { // need just to remove the first allele and 1 to the frequency of the second (freq1 += 1).
newSortedAlleleCounts = Arrays.copyOfRange(sortedAlleleCounts,2,sortedAlleleCountsLength);
newSortedAlleleCounts[1]++;
} else { // just need to mutate allele0 to allele0 + 1.
newSortedAlleleCounts = Arrays.copyOf(sortedAlleleCounts,sortedAlleleCountsLength);
newSortedAlleleCounts[0] = allele0Plus1;
// newSortedAlleleCounts[1] = 1; // :) no need to do it because it is already the case (freq0 == 1).
}
} else { // && freq0 > 1 as per sortedAlleleCounts format restrictions. In this case allele0 will muttated to '0' with frequency decreased by 1.
if (allele0And1AreConsecutive) { // we don't need to add a component for allele0 + 1 since it already exists.
newSortedAlleleCounts = sortedAlleleCounts.clone();
newSortedAlleleCounts[0] = 0;
newSortedAlleleCounts[1] = freq0 - 1;
newSortedAlleleCounts[3]++;
} else { // we need to insert allele0 + 1 in the sorted-allele-counts array.
newSortedAlleleCounts = new int[sortedAlleleCountsLength + 2];
newSortedAlleleCounts[0] = 0;
newSortedAlleleCounts[1] = freq0 - 1;
newSortedAlleleCounts[2] = allele0Plus1;
newSortedAlleleCounts[3]++; // = 1 as the array was freshly created with 0s.
System.arraycopy(sortedAlleleCounts,2,newSortedAlleleCounts,4,sortedAlleleCountsLength - 2);
}
}
return new GenotypeAlleleCounts(ploidy, index + 1, newSortedAlleleCounts);
}
/**
* Returns the number of different alleles that participate in the genotype.
*
* @return 0 or greater.
*/
public int distinctAlleleCount() {
return distinctAlleleCount;
}
/**
* Returns the index of the allele from its rank in the genotype.
*
* @param rank the query rank.
*
* @throws IllegalArgumentException if the {@code rank} provided is outside the valid range [0,{@link #distinctAlleleCount()}).
*
* @return 0 or greater.
*/
public int alleleIndexAt(final int rank) {
if (rank < 0 || rank >= distinctAlleleCount)
throw new IllegalArgumentException("the requested rank " + rank + " is out of range [0," + distinctAlleleCount + ")");
return sortedAlleleCounts[rank << 1];
}
/**
* Returns the rank of an allele in the genotype by its index.
*
* @param index the target index.
*
* @throws IllegalArgumentException if {@code index} is less that 0. Indices can be arbitrarily large.
*
* @return -1 or less if the allele index is not present in the genotype, 0 to {@link #distinctAlleleCount()} - 1 otherwise.
* If negative, the absolute value can be used to determine where would be that index inserted within {@code [0,{@link #distinctAlleleCount()}]} as
* {@code - result - 1}.
*
*/
public int alleleRankFor(final int index) {
if (index < 0)
throw new IllegalArgumentException("the index must be 0 or greater");
return alleleIndexToRank(index, 0, distinctAlleleCount);
}
/**
* Generates a string that would represent the unphased genotype with this allele counts.
*
* <p>
* In this string allele calls appear in alleleIndex order with as many repeats as copies of each allele. So
* for example:<br/>
* <pre>
* 0 # haploid reference.
* 0/0 # typical diploid calls
* 0/1
* 1/1
* 0/0/1/3/3 # pentaploid with to ref, one first alt. and 2 third alt. allele
* </pre>
*
* </p>
*
* @return never {@code null}.
*/
public String toUnphasedGenotypeString() {
if (ploidy == 0) return "";
final StringBuilder sb = new StringBuilder(distinctAlleleCount * 3);
for (int i = 0; i < distinctAlleleCount; i += 2) {
final int alleleIndex = sortedAlleleCounts[i];
final int alleleCount = sortedAlleleCounts[i + 1];
for (int j = 0; j < alleleCount; j++)
sb.append(alleleIndex).append('/');
}
sb.setLength(sb.length() - 1);
return sb.toString();
}
@Override
public String toString() {
// Perhaps we should change in the future, but the unphased genotype representation seems to be
// a good one.
return toUnphasedGenotypeString();
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(final Object o) {
if (o instanceof GenotypeAlleleCounts)
return equals((GenotypeAlleleCounts)o);
else
return false;
}
/**
* Compares with another genotype.
* @param o the other genotype.
* @return never {@code null}.
*/
public boolean equals(final GenotypeAlleleCounts o) {
if (o == this)
return true;
if (o == null)
return false;
if (ploidy != o.ploidy)
return false;
return Arrays.equals(sortedAlleleCounts, o.sortedAlleleCounts);
}
/**
* Returns the index of this genotype allele count within all possible genotypes with the same ploidy.
*
* @return 0 or greater.
*/
public int index() {
return index;
}
/**
* Compares to genotypes.
*
* <p>A genotype with larger ploidy is considered greater than one with a lower ploidy. If both genotypes have
* the same ploidy, then the genotype with the largest allele index or largest count if these are the same</p>.
*
* @param other genotype to compare to.
*
* @throws IllegalArgumentException if {@code other} is {@code null}.
*
* @return 0 if both genotypes are equivalent, < 0 if this genotype is less than {@code other} and > 0
* if this genotype is greater than {@code other}.
*/
@Override
public int compareTo(final GenotypeAlleleCounts other) {
if (other == this)
return 0;
if (other == null)
throw new IllegalArgumentException("input genotype cannot be null");
if (other.ploidy == ploidy)
return index - other.index;
else
return ploidy - other.ploidy;
}
@Override
public int hashCode() {
return ((31 + ploidy) * 31 ) + index;
}
/**
* Implements binary search across allele indexes.
* @param index the target index.
* @param from first inclusive possible rank.
* @param to last exclusive possible rank.
* @return -1 or less if the allele index is not in the genotype false otherwise. You can obtain
* the potential insertion point (within the interval [from,to]) as {@code -result - 1}
*/
private int alleleIndexToRank(final int index,final int from, final int to) {
if (to <= from)
return - from - 1;
if (from == to - 1) {
final int onlyIndex = sortedAlleleCounts[from << 1];
return onlyIndex == index ? from : (onlyIndex > index) ? -from - 1 : -to - 1;
}
final int mid = (to + from) >> 1;
final int midIndex = sortedAlleleCounts[mid << 1];
if (midIndex == index)
return mid;
else if (midIndex < index)
return alleleIndexToRank(index,mid + 1,to);
else
return alleleIndexToRank(index,0,mid);
}
/**
* Returns the count of an allele in the genotype given is rank in the genotype (not the allele index itself).
*
* @param rank of the requested allele within the genotype.
*
* @throws IllegalArgumentException if {@code rank} is out the the valid range [0,{@link #distinctAlleleCount})
*
* @return 1 or greater.
*/
public int alleleCountAt(final int rank) {
if (rank < 0 || rank >= distinctAlleleCount)
throw new IllegalArgumentException("the rank is out of range");
return sortedAlleleCounts[(rank << 1) + 1];
}
/**
* Checks whether this genotype contain at least one call on a particular allele index.
*
* @param index the target allele.
*
* @throws IllegalArgumentException if {@code index} is negative.
*
* @return {@code true} iff the genotype contains that allele index.
*/
public boolean containsAllele(final int index) {
return alleleRankFor(index) >= 0;
}
/**
* Returns the count of an allele in the genotype given it index.
*
* @return 0 if the allele is not present in the genotype, 1 or more otherwise.
*/
public int alleleCountFor(final int index) {
final int rank = alleleRankFor(index);
return rank < 0 ? 0 : alleleCountAt(rank);
}
/**
* Returns the allele counts for each allele index to maximum.
* @param maximumAlleleIndex the maximum allele index required.
* @throws IllegalArgumentException if {@code maximumAlleleIndex} is less than 0.
* @return never {@code null}, an array of exactly {@code maximumAlleleIndex + 1} positions with the counts
* of each allele where the position in the array is equal to its index.
*/
public int[] alleleCountsByIndex(final int maximumAlleleIndex) {
if (maximumAlleleIndex < 0)
throw new IllegalArgumentException("the requested allele count cannot be less than 0");
final int[] result = new int[maximumAlleleIndex + 1];
copyAlleleCountsByIndex(result, 0, 0, maximumAlleleIndex);
return result;
}
private void copyAlleleCountsByIndex(final int[] dest, final int offset, final int minimumAlleleIndex, final int maximumAlleleIndex) {
// First we determine what section of the sortedAlleleCounts array contains the counts of interest,
// By the present allele rank range of interest.
final int minimumAlleleRank = alleleRankFor(minimumAlleleIndex);
final int maximumAlleleRank = alleleRankFor(maximumAlleleIndex);
// If the min or max allele index are absent (returned rank < 0) we note where the would be inserted; that
// way we avoid going through the rest of positions in the sortedAlleleCounts array.
// The range of interest is then [startRank,endRank].
final int startRank = minimumAlleleRank < 0 ? - minimumAlleleRank - 1 : minimumAlleleRank;
final int endRank = maximumAlleleRank < 0 ? - maximumAlleleRank - 2 : maximumAlleleRank;
// Iteration variables:
int nextIndex = minimumAlleleIndex; // next index that we want to output the count for.
int nextRank = startRank; // next rank to query in sortedAlleleCounts.
int nextSortedAlleleCountsOffset = nextRank << 1; // offset in sortedAlleleCounts where the info is present for the next rank.
int nextDestOffset = offset; // next offset in destination array where to set the count for the nextIndex.
while (nextRank++ <= endRank) {
final int alleleIndex = sortedAlleleCounts[nextSortedAlleleCountsOffset++];
// fill non-present allele counts with 0s.
while (alleleIndex > nextIndex) {
dest[nextDestOffset++] = 0;
nextIndex++;
}
// It is guaranteed that at this point alleleIndex == nextIndex
// thanks to the condition of the enclosing while: there must be at least one index of interest that
// it is present in remaning (nextRank,endRank] interval as otherwise endRank would be less than nextRank.
dest[nextDestOffset++] = sortedAlleleCounts[nextSortedAlleleCountsOffset++];
nextIndex++;
}
// Finally we take care of trailing requested allele indices.
while (nextIndex++ <= maximumAlleleIndex)
dest[nextDestOffset++] = 0;
}
/**
* Copies the sorted allele counts into an array.
*
* <p>
* Sorted allele counts are disposed as an even-sized array where even positions indicate the allele index and
* the following odd positions the number of copies of that allele in this genotype allele count:
* </p>
* <p><pre>
* [ allele_0, freq_0, allele_1, freq_1 ... ]
* </pre></p>
*
* <p>
* With {@code offset} you can indicate an alternative first position in the destination array.
* </p>
*
* @param dest where to copy the counts.
* @param offset starting position.
*
* @throws IllegalArgumentException if {@code dest} is {@code null}, {@code offset} is less than 0
* or {@code dest} is not large enough considering the number of alleles present in this genotype
* allele counts and the {@code offset} provided. A total of
* <code>{@link #distinctAlleleCount()} * 2 positions</code>
* are required for the job.
*/
public void copyAlleleCounts(final int[] dest, final int offset) {
if (dest == null)
throw new IllegalArgumentException("the destination cannot be null");
if (offset < 0)
throw new IllegalArgumentException("the offset cannot be negative");
final int sortedAlleleCountsLength = distinctAlleleCount << 1;
if (offset + sortedAlleleCountsLength > dest.length)
throw new IllegalArgumentException("the input array does not have enough capacity");
System.arraycopy(sortedAlleleCounts,0,dest,offset,sortedAlleleCountsLength);
}
/**
* Instantiates the first genotype possible provided a total ploidy.
* @param ploidy the ploidy of the genotype.
*
* @throws java.lang.IllegalArgumentException if ploidy is less than 0.
*
* @return never {@code null}.
*/
protected static GenotypeAlleleCounts first(final int ploidy) {
if (ploidy < 0)
throw new IllegalArgumentException("the ploidy must be 0 or greater");
else if (ploidy == 0)
return new GenotypeAlleleCounts(0,0);
else
return new GenotypeAlleleCounts(ploidy, 0, 0, ploidy);
}
/**
* Makes the next genotype in likelihood indexing order.
*
* @param g the original genotype.
*
* @throws IllegalArgumentException if {@code g} is {@code null}.
*
* @return never {@code null}.
*/
public static GenotypeAlleleCounts makeNextGenotype(final GenotypeAlleleCounts g) {
if (g == null)
throw new IllegalArgumentException("the next genotype");
return g.next();
}
/**
* Returns the largest allele index present in the genotype.
*
* @return -1 if there is no alleles (ploidy == 0), 0 or greater otherwise.
*/
public int maximumAlleleIndex() {
if (distinctAlleleCount == 0)
return -1;
else
return sortedAlleleCounts[(distinctAlleleCount - 1) << 1];
}
/**
* Returns the smallest allele index present in the genotype.
*
* @return -1 if there is no allele (ploidy == 0), 0 or greater otherwise.
*/
public int minimumAlleleIndex() {
if (distinctAlleleCount == 0)
return -1;
else
return sortedAlleleCounts[0];
}
/**
* Creates an independent copy of this genotype.
* @return never {@code null}.
*/
@Override
protected GenotypeAlleleCounts clone() {
return new GenotypeAlleleCounts(ploidy,index,Arrays.copyOf(sortedAlleleCounts,distinctAlleleCount << 1));
}
}

View File

@ -0,0 +1,567 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.collections.IntMaxHeap;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
/**
* Helper to calculate genotype likelihoods given a ploidy and an allele count (number of possible distinct alleles).
*
* <p>
* Notice that for performance this class is thread-unsafe an so it cannot be shared between thread in a multi-thread run.
* </p>
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeLikelihoodCalculator {
/**
* Maximum number of components (or distinct alleles) for any genotype with this calculator ploidy and allele count.
*/
private int maximumDistinctAllelesInGenotype;
/**
* Offset table for this calculator.
*
* <p>
* This is a shallow copy of {@link GenotypeLikelihoodCalculators#alleleFirstGenotypeOffsetByPloidy} when the calculator was created
* thus it follows the same format as that array. Please refer to its documentation.
* </p>
*
* <p>You can assume that this offset table contain at least (probably more) the numbers corresponding to the allele count and ploidy for this calculator.
* However since it might have more than that and so you must use {@link #alleleCount} and {@link #ploidy} when
* iterating through this array rather that its length or the length of its components.</p>.
*/
private final int[][] alleleFirstGenotypeOffsetByPloidy;
/**
* Genotype table for this calculator.
*
* <p>It is ensure that it contains all the genotypes for this calculator ploidy and allele count, maybe more. For
* that reason you must use {@link #genotypeCount} when iterating through this array and not relay on its length.</p>
*/
private final GenotypeAlleleCounts[] genotypeAlleleCounts;
/**
* Number of genotypes given this calculator {@link #ploidy} and {@link #alleleCount}.
*/
private final int genotypeCount;
/**
* Number of genotyping alleles for this calculator.
*/
private final int alleleCount;
/**
* Ploidy for this calculator.
*/
private final int ploidy;
/**
* Max-heap for integers used for this calculator internally.
*/
private final IntMaxHeap alleleHeap;
/**
* Cache of the last genotype-allele-count requested using {@link #genotypeAlleleCountsAt(int)}, when it
* goes beyond the maximum genotype-allele-count static capacity. Check on that method documentation for details.
*/
private transient GenotypeAlleleCounts lastOverheadCounts;
/**
* Buffer used as a temporary container for likelihood components for genotypes stratified by alleles, allele frequency and reads.
*
* <p>To improve performance we use a 1-dimensional array to implement a 3-dimensional one as some of those dimension
* have typically very low depths (allele and allele frequency)</p>
*
* <p>
* The value contained in position <code>[a][f][r] == log10Lk(read[r] | allele[a]) + log10(f) </code>. Exception is
* for f == 0 whose value is undefined (in practice 0.0) and never used.
* </p>
*
* <p>
* It is indexed by read, then by allele and then by the number of copies of the allele. For the latter
* there are as many entries as the ploidy of the calculator + 1 (to accommodate zero copies although is
* never used in practice).
* </p>
*/
private double[] readAlleleLikelihoodByAlleleCount = null;
/**
* Buffer used as a temporary container for likelihood components for genotypes stratified by reads.
*
* <p>
* It is indexed by genotype index and then by read index. The read capacity is increased as needed by calling
* {@link #ensureReadCapacity(int) ensureReadCapacity}.
* </p>
*/
private final double[][] readLikelihoodsByGenotypeIndex;
/**
* Indicates how many reads the calculator supports.
*
* <p>This figure is increased dynamically as per the
* calculation request calling {@link #ensureReadCapacity(int) ensureReadCapacity}.<p/>
*/
private int readCapacity = -1;
/**
* Caches the log10 of the first few integers up to the ploidy supported by the calculator.
* <p>This is in fact a shallow copy if {@link GenotypeLikelihoodCalculators#ploidyLog10}</p> and is not meant to be modified by
* this class. </p>
*/
private final double[] log10;
/**
* Buffer field use as a temporal container for sorted allele counts when calculating the likelihood of a
* read in a genotype.
* <p>
* This array follows the same format as {@link GenotypeAlleleCounts#sortedAlleleCounts}. Each component in the
* genotype takes up two positions in the array where the first indicate the allele index and the second its frequency in the
* genotype. Only non-zero frequency alleles are represented, taking up the first positions of the array.
* </p>
*
* <p>
* This array is sized so that it can accommodate the maximum possible number of distinct alleles in any
* genotype supported by the calculator, value stored in {@link #maximumDistinctAllelesInGenotype}.
* </p>
*/
private final int[] genotypeAllelesAndCounts;
/**
* Buffer field use as a temporal container for component likelihoods when calculating the likelihood of a
* read in a genotype. It is stratified by read and the allele component of the genotype likelihood... that is
* the part of the likelihood sum that correspond to a particular allele in the genotype.
*
* <p>
* It is implemented in a 1-dimensional array since typically one of the dimensions is rather small. Its size
* is equal to {@link #readCapacity} times {@link #maximumDistinctAllelesInGenotype}.
* </p>
*
* <p>
* More concretely [r][i] == log10Lk(read[r] | allele[i]) + log(freq[i]) where allele[i] is the ith allele
* in the genotype of interest and freq[i] is the number of times it occurs in that genotype.
* </p>
*/
private double[] readGenotypeLikelihoodComponents;
/**
* Creates a new calculator providing its ploidy and number of genotyping alleles.
*/
protected GenotypeLikelihoodCalculator(final int ploidy, final int alleleCount,
final int[][] alleleFirstGenotypeOffsetByPloidy,
final GenotypeAlleleCounts[][] genotypeTableByPloidy,
final double[] ploidyLog10) {
this.alleleFirstGenotypeOffsetByPloidy = alleleFirstGenotypeOffsetByPloidy;
genotypeAlleleCounts = genotypeTableByPloidy[ploidy];
this.alleleCount = alleleCount;
this.ploidy = ploidy;
genotypeCount = this.alleleFirstGenotypeOffsetByPloidy[ploidy][alleleCount];
if (genotypeCount == GenotypeLikelihoodCalculators.GENOTYPE_COUNT_OVERFLOW)
throw new IllegalArgumentException(
String.format("the combination of ploidy (%s) and number of alleles (%s) results in a very large number of genotypes (> %s). You need to limit ploidy or the number of alternative alleles to analyze this locus",
ploidy,alleleCount,Integer.MAX_VALUE));
alleleHeap = new IntMaxHeap(ploidy);
readLikelihoodsByGenotypeIndex = new double[genotypeCount][];
log10 = ploidyLog10;
// The number of possible components is limited by distinct allele count and ploidy.
maximumDistinctAllelesInGenotype = Math.min(ploidy, alleleCount);
genotypeAllelesAndCounts = new int[maximumDistinctAllelesInGenotype << 1];
}
/**
* Makes sure that temporal arrays and matrices are prepared for a number of reads to process.
* @param requestedCapacity number of read that need to be processed.
*/
public void ensureReadCapacity(final int requestedCapacity) {
if (requestedCapacity < 0)
throw new IllegalArgumentException("illegal capacity value");
if (readCapacity == -1) { // first time call.
final int minimumCapacity = Math.max(requestedCapacity,10); // Never go too small, 10 is the minimum.
readAlleleLikelihoodByAlleleCount = new double[minimumCapacity * alleleCount * (ploidy+1)];
for (int i = 0; i < genotypeCount; i++)
readLikelihoodsByGenotypeIndex[i] = new double[minimumCapacity];
readGenotypeLikelihoodComponents = new double[ploidy * minimumCapacity];
readCapacity = minimumCapacity;
} else if (readCapacity < requestedCapacity) {
final int doubleCapacity = (requestedCapacity << 1);
readAlleleLikelihoodByAlleleCount = new double[doubleCapacity * alleleCount * (ploidy+1)];
for (int i = 0; i < genotypeCount; i++)
readLikelihoodsByGenotypeIndex[i] = new double[doubleCapacity];
readGenotypeLikelihoodComponents = new double[maximumDistinctAllelesInGenotype * doubleCapacity];
readCapacity = doubleCapacity;
}
}
/**
* Give a list of alleles, returns the likelihood array index.
*
* <p>This operation is <b>thread-unsafe</b>.</p>
*
* @param alleleIndices the indices of the alleles in the genotype, there should be as many repetition of an
* index as copies of that allele in the genotype. Allele indices do not need to be sorted in
* any particular way.
*
* @return never {@code null}.
*/
public int allelesToIndex(final int... alleleIndices) {
// Special case ploidy == 0.
if (ploidy == 0) return 0;
alleleHeap.clear();
alleleHeap.add(alleleIndices);
return alleleHeapToIndex();
}
/**
* Returns the number of possible genotypes given ploidy and the maximum allele index.
* @return never {@code null}.
*/
public int genotypeCount() {
return genotypeCount;
}
/**
* Returns the genotype associated to a particular likelihood index.
*
* <p>If {@code index} is larger than {@link GenotypeLikelihoodCalculators#MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY},
* this method will reconstruct that genotype-allele-count iteratively from the largest strongly referenced count available.
* or the last requested index genotype.
* </p>
*
* <p> Therefore if you are iterating through all genotype-allele-counts you should do sequentially and incrementally, to
* avoid a large efficiency drop </p>.
*
* @param index query likelihood-index.
* @return never {@code null}.
*/
public GenotypeAlleleCounts genotypeAlleleCountsAt(final int index) {
if (index < 0 || index >= genotypeCount)
throw new IllegalArgumentException("invalid likelihood index: " + index + " >= " + genotypeCount
+ " (genotype count for nalleles = " + alleleCount + " and ploidy " + ploidy );
if (index < GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY)
return genotypeAlleleCounts[index];
else if (lastOverheadCounts == null || lastOverheadCounts.index() > index) {
final GenotypeAlleleCounts result = genotypeAlleleCounts[GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY - 1].clone();
result.increase(index - GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY + 1);
lastOverheadCounts = result;
return result.clone();
} else {
lastOverheadCounts.increase(index - lastOverheadCounts.index());
return lastOverheadCounts.clone();
}
}
/**
* Calculate the likelihoods given the list of alleles and the likelihood map.
*
* <p>This operation is <b>thread-unsafe</b>.</p>
*
* @param likelihoods the likelihood matrix all alleles vs all reads.
*
* @throws IllegalArgumentException if {@code alleleList} is {@code null} or {@code likelihoods} is {@code null}
* or the alleleList size does not match the allele-count of this calculator, or there are missing allele vs
* read combinations in {@code likelihoods}.
*
* @return never {@code null}.
*/
public <A extends Allele> GenotypeLikelihoods genotypeLikelihoods(final ReadLikelihoods.Matrix<A> likelihoods) {
if (likelihoods == null)
throw new IllegalArgumentException("the likelihood map cannot be null");
if (likelihoods.alleleCount() != alleleCount)
throw new IllegalArgumentException("mismatch between allele list and alleleCount");
final int readCount = likelihoods.readCount();
ensureReadCapacity(readCount);
/// [x][y][z] = z * LnLk(Read_x | Allele_y)
final double[] readLikelihoodComponentsByAlleleCount
= readLikelihoodComponentsByAlleleCount(likelihoods);
final double[][] genotypeLikelihoodByRead = genotypeLikelihoodByRead(readLikelihoodComponentsByAlleleCount,readCount);
final double[] readLikelihoodsByGenotypeIndex = genotypeLikelihoods(genotypeLikelihoodByRead, readCount);
return GenotypeLikelihoods.fromLog10Likelihoods(readLikelihoodsByGenotypeIndex);
}
/**
* Calculates the final genotype likelihood array out of the likelihoods for each genotype per read.
*
* @param readLikelihoodsByGenotypeIndex <i>[g][r]</i> likelihoods for each genotype <i>g</i> and <i>r</i>.
* @param readCount number of reads in the input likelihood arrays in {@code genotypeLikelihoodByRead}.
* @return never {@code null}, one position per genotype where the <i>i</i> entry is the likelihood of the ith
* genotype (0-based).
*/
private double[] genotypeLikelihoods(final double[][] readLikelihoodsByGenotypeIndex, final int readCount) {
final double[] result = new double[genotypeCount];
final double denominator = readCount * log10[ploidy]; // instead of dividing each read likelihood by ploidy
// ( so subtract log10(ploidy) ) we multiply them all and the divide by ploidy^readCount (so substract readCount * log10(ploidy) )
for (int g = 0; g < genotypeCount; g++) {
final double[] likelihoodsByRead = readLikelihoodsByGenotypeIndex[g];
double s = - denominator;
for (int r = 0; r < readCount; r++)
s += likelihoodsByRead[r];
result[g] = s;
}
return result;
}
/**
* Calculates the likelihood component of each read on each genotype.
*
* @param readLikelihoodComponentsByAlleleCount [a][f][r] likelihood stratified by allele <i>a</i>, frequency in genotype <i>f</i> and
* read <i>r</i>.
* @param readCount number of reads in {@code readLikelihoodComponentsByAlleleCount}.
* @return never {@code null}.
*/
private double[][] genotypeLikelihoodByRead(final double[] readLikelihoodComponentsByAlleleCount, final int readCount) {
// Here we don't use the convenience of {@link #genotypeAlleleCountsAt(int)} within the loop to spare instantiations of
// GenotypeAlleleCounts class when we are dealing with many genotypes.
GenotypeAlleleCounts alleleCounts = genotypeAlleleCounts[0];
for (int genotypeIndex = 0; genotypeIndex < genotypeCount; genotypeIndex++) {
final double[] readLikelihoods = this.readLikelihoodsByGenotypeIndex[genotypeIndex];
final int componentCount = alleleCounts.distinctAlleleCount();
switch (componentCount) {
case 1: //
singleComponentGenotypeLikelihoodByRead(alleleCounts, readLikelihoods, readLikelihoodComponentsByAlleleCount, readCount);
break;
case 2:
twoComponentGenotypeLikelihoodByRead(alleleCounts,readLikelihoods,readLikelihoodComponentsByAlleleCount, readCount);
break;
default:
manyComponentGenotypeLikelihoodByRead(alleleCounts,readLikelihoods,readLikelihoodComponentsByAlleleCount, readCount);
}
if (genotypeIndex < genotypeCount - 1)
alleleCounts = nextGenotypeAlleleCounts(alleleCounts);
}
return readLikelihoodsByGenotypeIndex;
}
private GenotypeAlleleCounts nextGenotypeAlleleCounts(final GenotypeAlleleCounts alleleCounts) {
final int index = alleleCounts.index();
final GenotypeAlleleCounts result;
final int cmp = index - GenotypeLikelihoodCalculators.MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY + 1;
if (cmp < 0)
result = genotypeAlleleCounts[index + 1];
else if (cmp == 0) {
result = genotypeAlleleCounts[index].clone();
result.increase();
} else {
alleleCounts.increase();
result = alleleCounts;
}
return result;
}
/**
* General genotype likelihood component by thread calculator. It does not make any assumption in the exact
* number of alleles present in the genotype.
*/
private void manyComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts,
final double[] likelihoodByRead,
final double[]readLikelihoodComponentsByAlleleCount,
final int readCount) {
// First we collect the allele likelihood component for all reads and place it
// in readGenotypeLikelihoodComponents for the final calculation per read.
genotypeAlleleCounts.copyAlleleCounts(genotypeAllelesAndCounts,0);
final int componentCount = genotypeAlleleCounts.distinctAlleleCount();
final int alleleDataSize = (ploidy + 1) * readCount;
for (int c = 0,cc = 0; c < componentCount; c++) {
final int alleleIndex = genotypeAllelesAndCounts[cc++];
final int alleleCount = genotypeAllelesAndCounts[cc++];
// alleleDataOffset will point to the index of the first read likelihood for that allele and allele count.
int alleleDataOffset = alleleDataSize * alleleIndex + alleleCount * readCount;
for (int r = 0, readDataOffset = c; r < readCount; r++, readDataOffset += maximumDistinctAllelesInGenotype)
readGenotypeLikelihoodComponents[readDataOffset] = readLikelihoodComponentsByAlleleCount[alleleDataOffset++];
}
// Calculate the likelihood per read.
for (int r = 0, readDataOffset = 0; r < readCount; r++, readDataOffset += maximumDistinctAllelesInGenotype)
likelihoodByRead[r] = MathUtils.approximateLog10SumLog10(readGenotypeLikelihoodComponents, readDataOffset, readDataOffset + componentCount);
}
/**
* Calculates the likelihood component by read for a given genotype allele count assuming that there are
* exactly two alleles present in the genotype (with arbitrary non-zero counts each).
*/
private void twoComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts,
final double[] likelihoodByRead,
final double[] readLikelihoodComponentsByAlleleCount,
final int readCount) {
final int allele0 = genotypeAlleleCounts.alleleIndexAt(0);
final int freq0 = genotypeAlleleCounts.alleleCountAt(0);
final int allele1 = genotypeAlleleCounts.alleleIndexAt(1);
final int freq1 = ploidy - freq0; // no need to get it from genotypeAlleleCounts.
int allele0LnLkOffset = readCount * ((ploidy + 1) * allele0 + freq0);
int allele1LnLkOffset = readCount * ((ploidy + 1) * allele1 + freq1);
for (int r = 0; r < readCount; r++) {
final double lnLk0 = readLikelihoodComponentsByAlleleCount[allele0LnLkOffset++];
final double lnLk1 = readLikelihoodComponentsByAlleleCount[allele1LnLkOffset++];
likelihoodByRead[r] = MathUtils.approximateLog10SumLog10(lnLk0,lnLk1);
}
}
/**
* Calculates the likelihood component by read for a given genotype allele count assuming that there are
* exactly one allele present in the genotype.
*/
private void singleComponentGenotypeLikelihoodByRead(final GenotypeAlleleCounts genotypeAlleleCounts,
final double[] likelihoodByRead, final double[] readLikelihoodComponentsByAlleleCount, final int readCount) {
final int allele = genotypeAlleleCounts.alleleIndexAt(0);
// the count of the only component must be = ploidy.
int offset = (allele * (ploidy + 1) + ploidy) * readCount;
for (int r = 0; r < readCount; r++)
likelihoodByRead[r] =
readLikelihoodComponentsByAlleleCount[offset++];
}
/**
* Returns a 3rd matrix with the likelihood components.
*
* <pre>
* result[y][z][x] := z * lnLk ( read_x | allele_y ).
* </pre>
*
* @return never {@code null}.
*/
private <A extends Allele> double[] readLikelihoodComponentsByAlleleCount(final ReadLikelihoods.Matrix<A> likelihoods) {
final int readCount = likelihoods.readCount();
final int alleleDataSize = readCount * (ploidy + 1);
// frequency1Offset = readCount to skip the useless frequency == 0. So now we are at the start frequency == 1
// frequency1Offset += alleleDataSize to skip to the next allele index data location (+ readCount) at each iteration.
for (int a = 0, frequency1Offset = readCount; a < alleleCount; a++, frequency1Offset += alleleDataSize) {
likelihoods.copyAlleleLikelihoods(a, readAlleleLikelihoodByAlleleCount, frequency1Offset);
// p = 2 because the frequency == 1 we already have it.
for (int frequency = 2, destinationOffset = frequency1Offset + readCount; frequency <= ploidy; frequency++) {
final double log10frequency = log10[frequency];
for (int r = 0, sourceOffset = frequency1Offset; r < readCount; r++)
readAlleleLikelihoodByAlleleCount[destinationOffset++] =
readAlleleLikelihoodByAlleleCount[sourceOffset++] + log10frequency;
}
}
return readAlleleLikelihoodByAlleleCount;
}
/**
* Returns the ploidy for this genotype likelihood calculator.
* @return 0 or greater.
*/
public int ploidy() {
return ploidy;
}
/**
* Returns the total number of alleles for this genotype calculator.
* @return the number of alleles considered by this calculator.
*/
public int alleleCount() {
return alleleCount;
}
/**
* Returns the likelihood index given the allele counts.
*
* @param alleleCountArray the query allele counts. This must follow the format returned by
* {@link GenotypeAlleleCounts#copyAlleleCounts} with 0 offset.
*
* @throws IllegalArgumentException if {@code alleleCountArray} is not a valid {@code allele count array}:
* <ul>
* <li>is {@code null},</li>
* <li>or its length is not even,</li>
* <li>or it contains any negatives,
* <li>or the count sum does not match the calculator ploidy,</li>
* <li>or any of the alleles therein is negative or greater than the maximum allele index.</li>
* </ul>
*
* @return 0 or greater but less than {@link #genotypeCount}.
*/
public int alleleCountsToIndex(final int ... alleleCountArray) {
if (alleleCountArray == null)
throw new IllegalArgumentException("the allele counts cannot be null");
if ((alleleCountArray.length & 1) != 0)
throw new IllegalArgumentException("the allele counts array cannot have odd length");
alleleHeap.clear();
for (int i = 0; i < alleleCountArray.length; i += 2) {
final int index = alleleCountArray[i];
final int count = alleleCountArray[i+1];
if (count < 0)
throw new IllegalArgumentException("no allele count can be less than 0");
for (int j = 0; j < count; j++)
alleleHeap.add(index);
}
return alleleHeapToIndex();
}
/**
* Transforms the content of the heap into an index.
* @return a valid likelihood index.
*/
private int alleleHeapToIndex() {
if (alleleHeap.size() != ploidy)
throw new IllegalArgumentException("the sum of allele counts must be equal to the ploidy of the calculator");
if (alleleHeap.peek() >= alleleCount)
throw new IllegalArgumentException("invalid allele " + alleleHeap.peek() + " more than the maximum " + (alleleCount - 1));
int result = 0;
for (int p = ploidy; p > 0; p--) {
final int allele = alleleHeap.remove();
if (allele < 0)
throw new IllegalArgumentException("invalid allele " + allele + " must be equal or greater than 0 ");
result += alleleFirstGenotypeOffsetByPloidy[p][allele];
}
return result;
}
}

View File

@ -0,0 +1,410 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import java.util.Arrays;
/**
* Genotype likelihood calculator utility.
*
* <p>
* This class provide genotype likelihood calculators with any number of alleles able given an arbitrary ploidy and allele
* count (number of distinct alleles).
* </p>
*
* <p>
* This class is thread-safe.
* </p>
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeLikelihoodCalculators {
/**
* Maximum possible number of genotypes that this calculator can handle.
*/
public static final int MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY = 1000;
/**
* Mark to indicate genotype-count overflow due to a large number of allele and ploidy;
*/
protected static final int GENOTYPE_COUNT_OVERFLOW = -1;
/**
* The current maximum allele index supported by the tables.
* <p>
* Its initial value indicates the initial capacity of the shared {@link #alleleFirstGenotypeOffsetByPloidy} table.
* Feel free to change it to anything reasonable that is non-negative.
* </p>
*/
private static int maximumAllele = 1; // its initial value is the initial capacity of the shared tables.
/**
* The current maximum ploidy supported by the tables.
* <p>
* Its initial value indicates the initial capacity of the shared {@link #genotypeTableByPloidy}. Feel free
* to change it to anything reasonable that is non-negative.
* </p>
*/
private static int maximumPloidy = 2; // its initial value is the initial capacity of the shared tables.
/**
* Shared copy of the offset table as described in {@link #buildGenotypeAlleleCountsTable(int, int, int[][])}.
*
* This reference holds the largest requested so far in terms of maximum-allele and maximum-ploidy.
*/
private volatile static int[][] alleleFirstGenotypeOffsetByPloidy =
buildAlleleFirstGenotypeOffsetTable(maximumPloidy, maximumAllele);
/**
* Shared table of genotypes give the ploidy sorted by their index in the likelihood array.
*
* <p>
* Its format is described in {@link #buildGenotypeAlleleCountsTable(int, int, int[][])}.
* </p>
*/
private volatile static GenotypeAlleleCounts[][] genotypeTableByPloidy =
buildGenotypeAlleleCountsTable(maximumPloidy,maximumAllele,alleleFirstGenotypeOffsetByPloidy);
/**
* Build the table with the genotype offsets based on ploidy and the maximum allele index with representation
* in the genotype.
* <p>
* The result is a matrix containing the offset of the first genotype that contain a particular allele
* stratified by ploidy.
* <p>
* Row (first dimension) represent the ploidy, whereas
* the second dimension represents the allele.
* </p>
*
* <p>
* Thus the value a position <i>[p][a]</i> indicates how many genotypes of ploidy <i>p</i> there are before the first
* one that contains allele <i>a</i>. <br/>
*
* For example, considering ploidy 3 and alleles A, B, C, D, etc ... (indexed 0, 1, 2, ... respectively):
* <br/>
* [3][A] == [3][0] == 0 as the first genotype AAA contains A.
* <br/>
* [3][C] == [3][2] == 4 as the first genotype that contains C, AAC follows: AAA AAB ABB BBB
* <br/>
* [4][D] == [4][3] == 14 as the first genotype that contains D, AAAD follows: AAAA AAAB AABB ABBB BBBB AAAC
* AABC ABBC BBBC AACC ABCC BBCC ACCC BCCC CCCC.
*
* </p>
*
* <p>
* This value are calculated recursively as follows:
* </p>
* <pre>
*
* Offset[p][a] := Offset[p-1][a] + Offset[p][a-1] when a > 0, p > 0
* 0 when a == 0
* 1 otherwise
*
*
* 0 1 1 1 1 1 1 ...
* 0 1 2 3 4 5 6 ...
* 0 1 3 6 10 15 21 ...
* 0 1 4 10 20 35 56 ...
* 0 1 5 15 35 70 126 ...
* 0 ..................
* </pre>
*
* <p>
* Note: if someone can come with a close form computable 0(1) (respect to ploidy and allele count)
* please let the author know.
* </p>
*
* <p>
* The matrix is guaranteed to have as many rows as indicated by {@code maximumPloidy} + 1; the first
* row refers to the special case of ploidy == 0, the second row to ploidy 1 and so forth. Thus the ploidy
* matches the index.
* </p>
* <p>
* The matrix is guaranteed to have as many columns as indicate by {@code maximumAllele} + 1. In this case however
* the first allele index 0 is a sense allele (typically the reference allele). The reason to have at least the total
* genotype count up to allele count {@link @alleleCapacity} that is equal to the offset of the first genotype
* of the following allele; thus we need an extra one.
* </p>
*
* <p>
* Although it might seem non-sense to have genotypes of ploidy 0. The values in the first row are used when
* filling up values in row 1 and so forth so it is present for programmatic convenience.
* Offsets in this row are 0 for the first column and 1 for any others.
* </p>
*
* @param maximumPloidy maximum supported ploidy.
* @param maximumAllele maximum supported allele index.
*
* @throws IllegalArgumentException if {@code maximumPloidy} or {@code maximumAllele} is negative.
*
* @return never {@code null}, the matrix described with enough information to address
* problems concerning up to the requested maximum allele index and ploidy.
*/
private static int[][] buildAlleleFirstGenotypeOffsetTable(final int maximumPloidy, final int maximumAllele) {
checkPloidyAndMaximumAllele(maximumPloidy, maximumAllele);
final int rowCount = maximumPloidy + 1;
final int colCount = maximumAllele + 1;
final int[][] result = new int[rowCount][colCount];
// Ploidy 0 array must be { 0, 1, 1, ...., 1}
Arrays.fill(result[0],1,colCount,1);
// Now we take care of the rest of ploidies.
// We leave the first allele offset to it correct value 0 by starting with allele := 1.
for (int ploidy = 1; ploidy < rowCount; ploidy++)
for (int allele = 1; allele < colCount; allele++) {
result[ploidy][allele] = result[ploidy][allele - 1] + result[ploidy - 1][allele];
if (result[ploidy][allele] < result[ploidy][allele - 1])
result[ploidy][allele] = GENOTYPE_COUNT_OVERFLOW;
}
return result;
}
/**
* Composes a table with the lists of all possible genotype allele counts given the the ploidy and maximum allele index.
* <p>
* The resulting matrix has at least as many rows as {@code maximumPloidy } + 1 as the first row with index 0 correspond
* to ploidy == 0. Each row array has as many positions as necessary to contain all possible genotype-allele-counts in increasing order.
* This quantity varies with the ploidy.
* </p>
*
* <p>
* Therefore <code>result[3][4]</code> would contain the 5th genotype with ploidy 3, and <code>result[4].length</code>
* would be equal to the count of possible genotypes for ploidy 4.
* </p>
*
* @param maximumPloidy maximum ploidy to use in queries to the resulting table.
* @param maximumAllele maximum allele index to use in queries to the resulting table.
* @param offsetTable an allele first genotype offset table as constructed using {@link #buildAlleleFirstGenotypeOffsetTable(int, int)}
* that supports at least up to {@code maximumAllele} and {@code maximumPloidy}.
*
* @throws IllegalArgumentException if {@code maximumPloidy} or {@code maximumAllele} is negative, or {@code offsetTable} is {@code null},
* or it does not have the capacity to handle the requested maximum ploidy or allele index.
*
* @return never {@code null}.
*/
private static GenotypeAlleleCounts[][] buildGenotypeAlleleCountsTable(final int maximumPloidy, final int maximumAllele, final int[][] offsetTable) {
checkPloidyAndMaximumAllele(maximumPloidy, maximumAllele);
checkOffsetTableCapacity(offsetTable,maximumPloidy,maximumAllele);
final int rowCount = maximumPloidy + 1;
final GenotypeAlleleCounts[][] result = new GenotypeAlleleCounts[rowCount][]; // each row has a different number of columns.
for (int ploidy = 0; ploidy <= maximumPloidy; ploidy++)
result[ploidy] = buildGenotypeAlleleCountsArray(ploidy, maximumAllele, offsetTable);
return result;
}
/**
* Builds a genotype-allele-counts array given the genotype ploidy and how many genotype you need.
* <p>
* The result is guarantee to have exactly {@code length} positions and the elements are sorted
* in agreement with the standard way to display genotypes following the VCF standard.
* </p>
*
* <p> Notice that is possible to request ploidy ==0. In that case the resulting array will have repetitions
* of the empty genotype allele count.
* </p>
*
* <p>
* For example,
*
* <pre>
* ploidy = 1, length = 5 : [ {A}, {B}, {C}, {D}, {E} ]
* ploidy = 2, length = 7 : [ {AA}, {AB}, {BB}, {AC}, {BC}, {CC}, {AD}
* ploidy = 3, length = 10 : [ {AAA}, {AAB}, {ABB}, {BBB}, {AAC}, {ABC}, {BBC}, {BCC}, {CCC}, {AAD} ]
* </pre>
* </p>
*
* @param ploidy requested ploidy.
* @param alleleCount number of different alleles that the genotype table must support.
* @param genotypeOffsetTable table with the offset of the first genotype that contain an allele given
* the ploidy and its index.
*
* @throws IllegalArgumentException if {@code ploidy} or {@code length} is negative.
*
* @return never {@code null}, follows the specification above.
*/
private static GenotypeAlleleCounts[] buildGenotypeAlleleCountsArray(final int ploidy, final int alleleCount, final int[][] genotypeOffsetTable) {
if (ploidy < 0)
throw new IllegalArgumentException("the requested ploidy cannot be negative: " + ploidy);
if (alleleCount < 0)
throw new IllegalArgumentException("the requested maximum allele cannot be negative: " + alleleCount);
final int length = genotypeOffsetTable[ploidy][alleleCount];
final int strongRefLength = length == GENOTYPE_COUNT_OVERFLOW ? MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY : Math.min(length, MAXIMUM_STRONG_REF_GENOTYPE_PER_PLOIDY);
final GenotypeAlleleCounts[] result = new GenotypeAlleleCounts[strongRefLength];
result[0] = GenotypeAlleleCounts.first(ploidy);
for (int genotypeIndex = 1; genotypeIndex < strongRefLength; genotypeIndex++)
result[genotypeIndex] = result[genotypeIndex-1].next();
return result;
}
/**
* Cached log10 values for the first integer up to the maximum ploidy requested thus far.
*/
private volatile static double[] ploidyLog10;
// Initialize {@link #ploidyLog10}.
static {
ploidyLog10 = new double[maximumPloidy + 1];
for (int i = 0; i <= maximumPloidy; i++)
ploidyLog10[i] = Math.log10(i);
}
/**
* Returns an instance given its ploidy and the number of alleles.
*
* @param alleleCount the required allele-count.
* @param ploidy the required ploidy-count.
*
* @throws IllegalArgumentException if either {@code ploidy} or {@code alleleCount} is {@code null}, or
* the resulting number of genotypes is too large.
*
* @return never {@code null}.
*/
public static GenotypeLikelihoodCalculator getInstance(final int ploidy,
final int alleleCount) {
checkPloidyAndMaximumAllele(ploidy, alleleCount);
if (alleleCount < 0)
throw new IllegalArgumentException("the allele count cannot be negative");
if (ploidy < 0)
throw new IllegalArgumentException("the ploidy count cannot be negative");
// Non-thread safe (fast) check on tables capacities,
// if not enough capacity we expand the tables in a thread-safe manner:
if (alleleCount > maximumAllele || ploidy > maximumPloidy)
ensureCapacity(alleleCount, ploidy);
// At this point the tables must have at least the requested capacity, likely to be much more.
return new GenotypeLikelihoodCalculator(ploidy,alleleCount,alleleFirstGenotypeOffsetByPloidy,genotypeTableByPloidy,ploidyLog10);
}
/**
* Thread safe update of shared tables
*
* @param requestedMaximumAllele the new requested maximum allele maximum.
* @param requestedMaximumPloidy the new requested ploidy maximum.
*/
private synchronized static void ensureCapacity(final int requestedMaximumAllele, final int requestedMaximumPloidy) {
final boolean needsToExpandAlleleCapacity = requestedMaximumAllele > maximumAllele;
final boolean needsToExpandPloidyCapacity = requestedMaximumPloidy > maximumPloidy;
// Double check with the lock on to avoid double work.
if (!needsToExpandAlleleCapacity && !needsToExpandPloidyCapacity)
return;
final int newMaximumPloidy = Math.max(maximumPloidy,requestedMaximumPloidy);
final int newMaximumAllele = Math.max(maximumAllele,requestedMaximumAllele);
// Update tables first.
alleleFirstGenotypeOffsetByPloidy = buildAlleleFirstGenotypeOffsetTable(newMaximumPloidy,newMaximumAllele);
genotypeTableByPloidy = buildGenotypeAlleleCountsTable(newMaximumPloidy,newMaximumAllele,alleleFirstGenotypeOffsetByPloidy);
if (needsToExpandPloidyCapacity)
ploidyLog10 = ploidyLog10Extension(newMaximumPloidy);
// Since tables are volatile fields, it is guaranteed that tables changes will be seen before
// than any change on ploidyCapacity and alleleCapacity ensuring that the non-thread safe
// capacity verification test in {@link #getInstance} wont ever allow a thread
// to proceed to use a table without the required capacity.
// Just after updating tables update the capacity fields:
if (needsToExpandAlleleCapacity)
maximumAllele = requestedMaximumAllele;
if (needsToExpandPloidyCapacity)
maximumPloidy = requestedMaximumPloidy;
}
/**
* Extends the existing {@link #ploidyLog10} with more log10 as needed by maximum-ploidy expansion.
* @param newMaximumPloidy the new maximum ploidy.
*
* @return never code {@code null}.
*/
private static double[] ploidyLog10Extension(final int newMaximumPloidy) {
final int start = ploidyLog10.length;
final double[] result = Arrays.copyOf(ploidyLog10,newMaximumPloidy + 1);
for (int i = start; i < result.length; i++)
result[i] = Math.log10(i);
return result;
}
/**
* Perform value checks on maximumPloidy and allele passed to diverse methods in this class.
* <p>
* Throws an exception if there is any issues.
* </p>
*
* @param ploidy the maximum ploidy value.
* @param maximumAllele the maximum allele value.
*
* @throws IllegalArgumentException if either value is negative.
*/
private static void checkPloidyAndMaximumAllele(final int ploidy, final int maximumAllele) {
if (ploidy < 0)
throw new IllegalArgumentException("the ploidy provided cannot be negative: " + ploidy);
if (maximumAllele < 0)
throw new IllegalArgumentException("the maximum allele index provided cannot be negative: " + maximumAllele);
}
private static void checkOffsetTableCapacity(final int[][] offsetTable, final int maximumPloidy, final int maximumAllele) {
if (offsetTable == null)
throw new IllegalArgumentException("the allele first genotype offset table provided cannot be null");
if (offsetTable.length <= maximumPloidy )
throw new IllegalArgumentException("the allele first genotype offset table provided does not have enough " +
"capacity for requested maximum ploidy: " + maximumPloidy);
if (offsetTable[0].length < maximumAllele)
throw new IllegalArgumentException("the allele first genotype offset table provided does not have enough " +
"capacity for requested maximum allele index: " + maximumAllele);
}
}

View File

@ -0,0 +1,130 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
/**
* Encapsulates the data use to make the genotype calls.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypingData<A extends Allele> implements SampleList, AlleleList<A> {
private final PloidyModel ploidyModel;
private final ReadLikelihoods<A> likelihoods;
/**
* Constructs a new genotyping-data collection providing the ploidy model to apply to the input model
* and the read-likelihoods collection.
*
*
* @param ploidyModel the ploidy model.
* @param likelihoods the read-likelihoods collection.
*
* @throws IllegalArgumentException if either {@code ploidyModel} or {@code likelihoods} is {@code null},
* or they are not compatible in terms of the samples they contain; their lists must match.
*/
public GenotypingData(final PloidyModel ploidyModel, final ReadLikelihoods<A> likelihoods) {
if (ploidyModel == null)
throw new IllegalArgumentException("the ploidy model cannot be null");
if (likelihoods == null)
throw new IllegalArgumentException("the likelihood object cannot be null");
this.ploidyModel = ploidyModel;
this.likelihoods = likelihoods;
if (!SampleListUtils.equals(ploidyModel, likelihoods))
throw new IllegalArgumentException("sample list are different between ploidy-model and read-likelihood collection, perhaps just the order");
}
/**
* Returns the ploidy model that corresponds to the data provided.
* @return never {@code null}.
*/
public PloidyModel ploidyModel() {
return ploidyModel;
}
@Override
public int sampleCount() {
return ploidyModel.sampleCount();
}
@Override
public int sampleIndex(final String sample) {
return ploidyModel.sampleIndex(sample);
}
@Override
public String sampleAt(int sampleIndex) {
return ploidyModel.sampleAt(sampleIndex);
}
/**
* Returns read-likelihoods to use for genotyping.
* @return never {@code null}.
*/
public ReadLikelihoods<A> readLikelihoods() {
return likelihoods;
}
@Override
public int alleleCount() {
return likelihoods.alleleCount();
}
@Override
public int alleleIndex(final A allele) {
return likelihoods.alleleIndex(allele);
}
@Override
public A alleleAt(final int index) {
return likelihoods.alleleAt(index);
}
}

View File

@ -48,10 +48,11 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
@ -69,8 +70,6 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFConstants;
import java.util.*;
@ -86,22 +85,20 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
protected Logger logger;
protected final GenomeAnalysisEngine toolkit;
protected final Config configuration;
protected VariantAnnotatorEngine annotationEngine;
protected final int numberOfGenomes;
protected final Collection<String> sampleNames;
protected final SampleList samples;
private final double[] log10AlleleFrequencyPriorsSNPs;
private final double[] log10AlleleFrequencyPriorsIndels;
private final GenomeLocParser genomeLocParser;
protected final GenomeLocParser genomeLocParser;
// the model used for calculating p(non-ref)
protected ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>() {
@ -111,59 +108,32 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return AFCalcFactory.createAFCalc(configuration, numberOfGenomes, logger);
}
};
/**
* Construct a new genotyper engine.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
*/
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration) {
this(toolkit,configuration,resolveSampleNamesFromToolkit(toolkit));
}
/**
* Resolve the sample name set to be the set of all samples passed to the tool.
*
* @param toolkit reference to the toolkit.
*
* @throws IllegalArgumentException if the {@code toolkit} is {@code null}.
*
* @return never {@code null}, but empty if there is no samples.
*/
private static Set<String> resolveSampleNamesFromToolkit(final GenomeAnalysisEngine toolkit) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
return new LinkedHashSet<>(toolkit.getSampleDB().getSampleNames());
}
/**
* Construct a new genotyper engine, on a specific subset of samples.
*
* @param toolkit reference to the genome-analysis toolkit.
* @param configuration engine configuration object.
* @param sampleNames subset of sample to work on identified by their names. If {@code null}, the full toolkit
* @param samples subset of sample to work on identified by their names. If {@code null}, the full toolkit
* sample set will be used instead.
* @param genomeLocParser the genome-loc-parser
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code configuration} is {@code null}.
* @throws IllegalArgumentException if any of {@code samples}, {@code configuration} or {@code genomeLocParser} is {@code null}.
*/
protected GenotypingEngine(final GenomeAnalysisEngine toolkit, final Config configuration,final Set<String> sampleNames) {
if (toolkit == null)
throw new IllegalArgumentException("the toolkit cannot be null");
protected GenotypingEngine(final Config configuration,final SampleList samples, final GenomeLocParser genomeLocParser) {
if (configuration == null)
throw new IllegalArgumentException("the configuration cannot be null");
this.configuration = configuration;
logger = Logger.getLogger(getClass());
this.toolkit = toolkit;
this.sampleNames = sampleNames != null ? sampleNames : toolkit.getSampleDB().getSampleNames();
numberOfGenomes = this.sampleNames.size() * configuration.genotypeArgs.samplePloidy;
this.samples = samples;
numberOfGenomes = this.samples.sampleCount() * configuration.genotypeArgs.samplePloidy;
MathUtils.Log10Cache.ensureCacheContains(numberOfGenomes * 2);
log10AlleleFrequencyPriorsSNPs = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.snpHeterozygosity,configuration.genotypeArgs.inputPrior);
log10AlleleFrequencyPriorsIndels = computeAlleleFrequencyPriors(numberOfGenomes,
configuration.genotypeArgs.indelHeterozygosity,configuration.genotypeArgs.inputPrior);
genomeLocParser = toolkit.getGenomeLocParser();
this.genomeLocParser = genomeLocParser;
}
/**
@ -257,7 +227,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
! outputAlternativeAlleles.siteIsMonomorphic ||
configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES || configuration.annotateAllSitesWithPLs
? AFresult.getLog10PosteriorOfAFEq0() + 0.0
: AFresult.getLog10PosteriorOfAFGT0() + 0.0;
: AFresult.getLog10PosteriorOfAFGT0() + 0.0 ;
// Add 0.0 removes -0.0 occurrences.
@ -270,6 +240,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
return limitedContext ? null : estimateReferenceConfidence(vc, stratifiedContexts, getModelTheta(model), true, PoFGT0);
// start constructing the resulting VC
final GenomeLocParser genomeLocParser = this.genomeLocParser != null || refContext == null ? this.genomeLocParser : refContext.getGenomeLocParser();
if (genomeLocParser == null)
throw new IllegalStateException("this UG engine was created without a valid genomeLocParser and no refContext was provided");
final GenomeLoc loc = genomeLocParser.createGenomeLoc(vc);
final List<Allele> outputAlleles = outputAlternativeAlleles.outputAlleles(vc.getReference());
final VariantContextBuilder builder = new VariantContextBuilder(callSourceString(), loc.getContig(), loc.getStart(), loc.getStop(), outputAlleles);
@ -506,7 +479,9 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
double log10POfRef = Math.log10(initialPofRef);
// for each sample that we haven't examined yet
for ( String sample : sampleNames ) {
final int sampleCount = samples.sampleCount();
for (int i = 0; i < sampleCount; i++) {
final String sample = samples.sampleAt(i);
final AlignmentContext context = contexts.get(sample);
if ( ignoreCoveredSamples && context != null )
continue;

View File

@ -0,0 +1,159 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import java.util.List;
/**
* Genotyping Likelihoods collection.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypingLikelihoods<A extends Allele> implements SampleList, AlleleList<A> {
private final GenotypeLikelihoods[] likelihoods;
private final PloidyModel ploidyModel;
private final AlleleList<A> alleles;
/**
* Creates a new genotyping-likelihoods collection given the genotype alleles, the sample ploidy model and the
* likelihoods themselves.
* <p>
* Notice that this constructor does not check whether the likelihood array lengths corresponds to the sample plodies and
* number of alleles.
* </p>
*
* @param alleles the genotyping alleles.
* @param ploidyModel the ploidy model.
* @param likelihoods the actual genotype likelihoods, one element per sample.
*
* @throws IllegalArgumentException if any argument is {@code null}, or the number of samples in {@code ploidyModel}
* does not correspond with the number of likelihoods arrays in {@code likelihoods}
*/
GenotypingLikelihoods(final AlleleList<A> alleles, final PloidyModel ploidyModel,
final List<GenotypeLikelihoods> likelihoods) {
if (alleles == null)
throw new IllegalArgumentException("allele list cannot be null");
if (ploidyModel == null)
throw new IllegalArgumentException("the ploidy model cannot be null");
if (likelihoods == null)
throw new IllegalArgumentException("the likelihood collection cannot be null");
if (ploidyModel.sampleCount() != likelihoods.size())
throw new IllegalArgumentException("there must be exactly one likelihood set for each sample");
this.likelihoods = likelihoods.toArray(new GenotypeLikelihoods[likelihoods.size()]);
for (final GenotypeLikelihoods likelihood : this.likelihoods)
if (likelihood == null)
throw new IllegalArgumentException("no genotype likelihood is allowed to be null");
this.alleles = alleles;
this.ploidyModel = ploidyModel;
}
@Override
public int sampleCount() {
return ploidyModel.sampleCount();
}
@Override
public int sampleIndex(final String sample) {
return ploidyModel.sampleIndex(sample);
}
@Override
public String sampleAt(final int sampleIndex) {
return ploidyModel.sampleAt(sampleIndex);
}
/**
* Returns the ploidy of the sample given its index in the collection.
*
* @param sampleIndex the query sample index.
*
* @throws IllegalArgumentException if {@code sampleIndex} is not a valid index for this collection:
* [0,{@link #sampleCount()).
*
* @return 0 or greater.
*/
public int samplePloidy(final int sampleIndex) {
return ploidyModel.samplePloidy(sampleIndex);
}
/**
* Returns the genotype-likelihoods of the sample given its index in the collection.
*
* @param sampleIndex the query sample index.
*
* @throws IllegalArgumentException if {@code sampleIndex} is not a valid index for this collection:
* [0,{@link #sampleCount()).
*
* @return never {@code null}.
*/
public GenotypeLikelihoods sampleLikelihoods(final int sampleIndex) {
return likelihoods[sampleIndex];
}
@Override
public int alleleCount() {
return alleles.alleleCount();
}
@Override
public int alleleIndex(final A allele) {
return alleles.alleleIndex(allele);
}
@Override
public A alleleAt(final int index) {
return alleles.alleleAt(index);
}
}

View File

@ -0,0 +1,75 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
/**
* Common interface for genotyping models.
*
* Given a plo
*
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public interface GenotypingModel {
/**
* Calculate genotype likelihoods given the genotype data and the set of
* alleles to genotype upon.
*
* @param genotypingAlleles the target alleles.
* @param data the data (read-likelihoods and ploidy) to genotype
* @param <A> the allele type.
*
* @throws IllegalArgumentException if {@code genotypingData} or {@code genotypingAlleles} is {@code null},
* or {@code genotypingData} does not cover the requested alleles.
*
* @return never {@code null}.
*/
public <A extends Allele> GenotypingLikelihoods<A> calculateLikelihoods(final AlleleList<A> genotypingAlleles, final GenotypingData<A> data);
}

View File

@ -0,0 +1,116 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
/**
* {@link PloidyModel} implementation tailored to work with a homogeneous constant ploidy
* across samples and positions.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class HomogeneousPloidyModel implements PloidyModel, SampleList {
private SampleList sampleList;
private final int ploidy;
/**
* Constructs a homogeneous ploidy model given the sample list and ploidy.
*
* @param samples the sample list.
* @param ploidy the common ploidy for all samples in {@code samples}.
*
* @throws IllegalArgumentException if {@code samples} is {@code null},
* or ploidy is 0 or less.
*/
public HomogeneousPloidyModel(final SampleList samples, final int ploidy) {
if (ploidy <= 0)
throw new IllegalArgumentException("does not support negative ploidy");
this.ploidy = ploidy;
sampleList = samples;
}
@Override
public int sampleCount() {
return sampleList.sampleCount();
}
@Override
public String sampleAt(final int index) {
return sampleList.sampleAt(index);
}
@Override
public int sampleIndex(final String sample) {
return sampleList.sampleIndex(sample);
}
@Override
public int samplePloidy(final int sampleIndex) {
checkSampleIndex(sampleIndex);
return ploidy;
}
private void checkSampleIndex(final int sampleIndex) {
if (sampleIndex < 0)
throw new IllegalArgumentException("the sample index cannot be negative: " + sampleIndex);
if (sampleIndex >= sampleList.sampleCount())
throw new IllegalArgumentException("the sample index is equal or larger than the sample count: " + sampleIndex + " >= " + sampleList.sampleCount());
}
@Override
public boolean isHomogeneous() {
return true;
}
@Override
public int totalPloidy() {
return ploidy * sampleList.sampleCount();
}
}

View File

@ -0,0 +1,176 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
* The infinite-population-genotyping assumes that samples belong to individuals taken at random
* from a very large population that mate at random.
* <p>
* Consequently genotypes calls between samples are totally independent conditional to the frequencies in
* the population they coming from. And genotypes should exhibit the ratios expected under HWE.
* </p>
* Therefore each sample genotype likelihoods can be considered to
* be independent from all other samples.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class InfiniteRandomMatingPopulationModel implements GenotypingModel {
private final int cachePloidyCapacity;
private final int cacheAlleleCountCapacity;
private ThreadLocal<GenotypeLikelihoodCalculator[][]> likelihoodCalculators;
/**
* Create a new infinite model instance.
*/
public InfiniteRandomMatingPopulationModel() {
this(10,50);
}
public InfiniteRandomMatingPopulationModel(final int calculatorCachePloidyCapacity, final int calculatorCacheAlleleCapacity) {
cachePloidyCapacity = calculatorCachePloidyCapacity;
cacheAlleleCountCapacity = calculatorCachePloidyCapacity;
likelihoodCalculators = new ThreadLocal<GenotypeLikelihoodCalculator[][]>( ) {
@Override
public GenotypeLikelihoodCalculator[][] initialValue() {
return new GenotypeLikelihoodCalculator[calculatorCachePloidyCapacity][calculatorCacheAlleleCapacity];
}
};
}
@Override
public <A extends Allele> GenotypingLikelihoods<A> calculateLikelihoods(final AlleleList<A> genotypingAlleles, final GenotypingData<A> data) {
if (genotypingAlleles == null)
throw new IllegalArgumentException("the allele cannot be null");
if (data == null)
throw new IllegalArgumentException("the genotyping data cannot be null");
final AlleleListPermutation<A> permutation = AlleleListUtils.permutation(data, genotypingAlleles);
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper = AlleleLikelihoodMatrixMapper.newInstance(permutation);
final int sampleCount = data.sampleCount();
switch (sampleCount) {
case 0: return noSampleLikelihoods(permutation,data);
case 1: return singleSampleLikelihoods(genotypingAlleles,data,alleleLikelihoodMatrixMapper);
default:
final PloidyModel ploidyModel = data.ploidyModel();
return ploidyModel.isHomogeneous() ? multiSampleHomogeneousPloidyModelLikelihoods(genotypingAlleles, data, alleleLikelihoodMatrixMapper, sampleCount, ploidyModel)
: multiSampleHeterogeneousPloidyModelLikelihoods(genotypingAlleles, data, alleleLikelihoodMatrixMapper, sampleCount, ploidyModel);
}
}
private <A extends Allele> GenotypingLikelihoods<A> noSampleLikelihoods(final AlleleList<A> genotypingAlleles,
final GenotypingData<A> data) {
@SuppressWarnings("unchecked")
final List<GenotypeLikelihoods> likelihoods = Collections.EMPTY_LIST;
return new GenotypingLikelihoods<>(genotypingAlleles,data.ploidyModel(), likelihoods);
}
private <A extends Allele> GenotypingLikelihoods<A> singleSampleLikelihoods(final AlleleList<A> genotypingAlleles,
final GenotypingData<A> data,
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper) {
final PloidyModel ploidyModel = data.ploidyModel();
final int samplePloidy = ploidyModel.samplePloidy(0);
final int alleleCount = genotypingAlleles.alleleCount();
final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
final ReadLikelihoods.Matrix<A> sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(0));
final List<GenotypeLikelihoods> genotypeLikelihoods = Collections.singletonList(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
return new GenotypingLikelihoods<>(genotypingAlleles,ploidyModel,genotypeLikelihoods);
}
private GenotypeLikelihoodCalculator getLikelihoodsCalculator(final int samplePloidy, final int alleleCount) {
if (samplePloidy >= cacheAlleleCountCapacity)
return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount);
else if (alleleCount >= cacheAlleleCountCapacity)
return GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount);
final GenotypeLikelihoodCalculator[][] cache = likelihoodCalculators.get();
final GenotypeLikelihoodCalculator result = cache[samplePloidy][alleleCount];
return result != null ? result : (cache[samplePloidy][alleleCount] = GenotypeLikelihoodCalculators.getInstance(samplePloidy, alleleCount));
}
private <A extends Allele> GenotypingLikelihoods<A> multiSampleHeterogeneousPloidyModelLikelihoods(final AlleleList<A> genotypingAlleles,
final GenotypingData<A> data,
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper,
final int sampleCount,
final PloidyModel ploidyModel) {
final List<GenotypeLikelihoods> genotypeLikelihoods = new ArrayList<>(sampleCount);
final int alleleCount = genotypingAlleles.alleleCount();
for (int i = 0; i < sampleCount; i++) {
final int samplePloidy = ploidyModel.samplePloidy(i);
final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
final ReadLikelihoods.Matrix<A> sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(i));
genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
}
return new GenotypingLikelihoods<>(genotypingAlleles,ploidyModel,genotypeLikelihoods);
}
private <A extends Allele> GenotypingLikelihoods<A> multiSampleHomogeneousPloidyModelLikelihoods(final AlleleList<A> genotypingAlleles,
final GenotypingData<A> data,
final AlleleLikelihoodMatrixMapper<A> alleleLikelihoodMatrixMapper,
final int sampleCount,
final PloidyModel ploidyModel) {
final int samplePloidy = ploidyModel.samplePloidy(0);
final List<GenotypeLikelihoods> genotypeLikelihoods = new ArrayList<>(sampleCount);
final int alleleCount = genotypingAlleles.alleleCount();
final GenotypeLikelihoodCalculator likelihoodsCalculator = getLikelihoodsCalculator(samplePloidy,alleleCount);
for (int i = 0; i < sampleCount; i++) {
final ReadLikelihoods.Matrix<A> sampleLikelihoods = alleleLikelihoodMatrixMapper.map(data.readLikelihoods().sampleMatrix(i));
genotypeLikelihoods.add(likelihoodsCalculator.genotypeLikelihoods(sampleLikelihoods));
}
return new GenotypingLikelihoods<>(genotypingAlleles,ploidyModel,genotypeLikelihoods);
}
}

View File

@ -0,0 +1,80 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
/**
* Information about the number of chromosome per sample at a given location.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public interface PloidyModel extends SampleList {
/**
* Return the assumed ploidy for a sample given its index.
*
* @param sampleIndex target sample index.
* @return 0 or greater.
*/
public int samplePloidy(final int sampleIndex);
/**
* Checks whether the ploidy is homogeneous across all samples.
*
* @return {@code true} if all samples has the same ploidy.
*/
public boolean isHomogeneous();
/**
* Sum of all ploidy across all samples.
* <p>
* It must match the sum of all ploidies across samples.
* </p>
*
* @return 0 or greater.
*/
public int totalPloidy();
}

View File

@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -199,14 +200,14 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
* Which annotations to add to the output VCF file. See the VariantAnnotator -list argument to view available annotations.
*/
@Argument(fullName="annotation", shortName="A", doc="One or more specific annotations to apply to variant calls", required=false)
protected List<String> annotationsToUse = new ArrayList<String>();
protected List<String> annotationsToUse = new ArrayList<>();
/**
* Which annotations to exclude from output in the VCF file. Note that this argument has higher priority than the -A or -G arguments,
* so annotations will be excluded even if they are explicitly included with the other options.
*/
@Argument(fullName="excludeAnnotation", shortName="XA", doc="One or more specific annotations to exclude", required=false)
protected List<String> annotationsToExclude = new ArrayList<String>();
protected List<String> annotationsToExclude = new ArrayList<>();
/**
* If specified, all available annotations in the group will be applied. See the VariantAnnotator -list argument to view available groups.
@ -218,11 +219,6 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
// the calculation arguments
private UnifiedGenotypingEngine genotypingEngine = null;
// the annotation engine
private VariantAnnotatorEngine annotationEngine;
private Set<String> samples;
// enable deletions in the pileup
@Override
public boolean includeReadsWithDeletionAtLoci() { return true; }
@ -256,17 +252,22 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
*
**/
public void initialize() {
super.initialize();
final GenomeAnalysisEngine toolkit = getToolkit();
final Set<String> sampleNameSet;
if ( UAC.TREAT_ALL_READS_AS_SINGLE_POOL ) {
samples.add(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
sampleNameSet = Collections.singleton(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
} else {
// get all of the unique sample names
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
sampleNameSet = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
if ( UAC.referenceSampleName != null )
samples.remove(UAC.referenceSampleName);
sampleNameSet.remove(UAC.referenceSampleName);
}
final SampleList samples = new IndexedSampleList(sampleNameSet);
if ( UAC.CONTAMINATION_FRACTION_FILE != null )
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, sampleNameSet, logger));
// check for a bad max alleles value
if ( UAC.genotypeArgs.MAX_ALTERNATE_ALLELES > GenotypeLikelihoods.MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED)
@ -282,8 +283,8 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tMLE\tMAP");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
genotypingEngine = new UnifiedGenotypingEngine(UAC, samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
genotypingEngine.setVerboseWriter(verboseWriter);
genotypingEngine.setAnnotationEngine(annotationEngine);
@ -298,11 +299,11 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
final Set<String> samplesForHeader;
if ( ! onlyEmitSamples.isEmpty() ) {
// make sure that onlyEmitSamples is a subset of samples
if ( ! samples.containsAll(onlyEmitSamples) )
if ( ! sampleNameSet.containsAll(onlyEmitSamples) )
throw new UserException.BadArgumentValue("onlyEmitSamples", "must be a strict subset of the samples in the BAM files but is wasn't");
samplesForHeader = onlyEmitSamples;
} else {
samplesForHeader = samples;
samplesForHeader = sampleNameSet;
}
writer.writeHeader(new VCFHeader(headerInfo, samplesForHeader));
}
@ -310,7 +311,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
public static Set<VCFHeaderLine> getHeaderInfo(final UnifiedArgumentCollection UAC,
final VariantAnnotatorEngine annotationEngine,
final DbsnpArgumentCollection dbsnp) {
Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
final Set<VCFHeaderLine> headerInfo = new HashSet<>();
// all annotation fields from VariantAnnotatorEngine
if ( annotationEngine != null )

View File

@ -45,6 +45,9 @@
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
@ -62,9 +65,6 @@ import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
import org.broadinstitute.gatk.utils.pileup.PileupElement;
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import java.io.PrintStream;
import java.lang.reflect.Constructor;
@ -88,7 +88,6 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
// the various loggers and writers
private PrintStream verboseWriter;
private final GenomeLocParser genomeLocParser;
private final boolean BAQEnabledOnCMDLine;
// ---------------------------------------------------------------------------------------------------------
@ -97,36 +96,42 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
//
// ---------------------------------------------------------------------------------------------------------
/**
* Constructs a new Unified-Genotyper engine.
* <p>The new engine won't emmit annotations, will use the full sample set and will not produce additional verbose
* output</p>
* Creates a new unified genotyping given the UG configuration parameters and the GA engine.
* @param configuration the UG configuration.
* @param toolkit the GA engine.
*
* @param toolkit reference to the enclosing genome analysis engine.
* @param configuration configuration object.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
* @throws NullPointerException if either {@code configuration} or {@code toolkit} is {@code null}.
*/
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration) {
this(toolkit, configuration, null);
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final GenomeAnalysisEngine toolkit) {
this(configuration,toolkit.getSampleList(),toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
}
/**
* Constructs a new Unified-Genotyper engine.
* Creates a new unified genotyping given the UG configuration parameters, the targeted set of samples and
* a genome location parser.
*
* @param toolkit reference to the enclosing genome analysis engine.
* @param configuration configuration object.
* @param sampleNames subset of sample names to work on. If {@code null}, all it will use the {@code toolkit} full sample set.
* @param configuration the UG configuration.
* @param samples {@inheritDoc}
* @param baqCalculationMode the BAQ calculation mode.
*
* @throws IllegalArgumentException if either {@code toolkit} or {@code UAC} is {@code null}.
* @throws NullPointerException if any of {@code configuration}, {@code samples} or {@code genomeLocParser} is {@code null}.
*
* @throws IllegalArgumentException if {@code baqCalculationMode} is {@code null}.
*/
public UnifiedGenotypingEngine(final GenomeAnalysisEngine toolkit, final UnifiedArgumentCollection configuration,
final Set<String> sampleNames) {
public UnifiedGenotypingEngine(final UnifiedArgumentCollection configuration,
final SampleList samples, final GenomeLocParser genomeLocParser,
final BAQ.CalculationMode baqCalculationMode) {
super(toolkit,configuration,sampleNames);
super(configuration,samples,genomeLocParser);
this.BAQEnabledOnCMDLine = toolkit.getArguments().BAQMode != BAQ.CalculationMode.OFF;
genomeLocParser = toolkit.getGenomeLocParser();
if (baqCalculationMode == null)
throw new IllegalArgumentException("the BAQ calculation mode cannot be null");
this.BAQEnabledOnCMDLine = baqCalculationMode != BAQ.CalculationMode.OFF;
determineGLModelsToUse();
@ -302,7 +307,8 @@ public class UnifiedGenotypingEngine extends GenotypingEngine<UnifiedArgumentCol
final GenotypeLikelihoodsCalculationModel.Model model,
final Map<String, org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap> perReadAlleleLikelihoodMap) {
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine,
genomeLocParser != null || refContext == null ? genomeLocParser : refContext.getGenomeLocParser(), perReadAlleleLikelihoodMap);
}

View File

@ -48,10 +48,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import java.lang.reflect.Constructor;
import java.util.LinkedList;
@ -105,6 +103,24 @@ public class AFCalcFactory {
}
public static Calculation getDefaultModel() { return EXACT_INDEPENDENT; }
/**
* Returns the best (fastest) model give the required ploidy and alternative allele count.
* @param requiredPloidy required ploidy
* @param requiredAlternativeAlleleCount required alternative allele count.
* @param preferredModel a preferred mode if any. A {@code null} indicate that we should be try to use the default instead.
* @return never {@code null}
*/
public static Calculation getBestModel(final int requiredPloidy, final int requiredAlternativeAlleleCount, final Calculation preferredModel) {
final Calculation preferred = preferredModel == null ? getDefaultModel() : preferredModel;
if (preferred.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return preferred;
if (EXACT_INDEPENDENT.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_INDEPENDENT;
if (EXACT_REFERENCE.usableForParams(requiredPloidy,requiredAlternativeAlleleCount))
return EXACT_REFERENCE;
return EXACT_GENERAL_PLOIDY;
}
}
private static final Map<String, Class<? extends AFCalc>> afClasses;
@ -137,25 +153,10 @@ public class AFCalcFactory {
public static AFCalc createAFCalc(final StandardCallerArgumentCollection UAC,
final int nSamples,
final Logger logger) {
final int maxAltAlleles = UAC.genotypeArgs.MAX_ALTERNATE_ALLELES;
if ( ! UAC.AFmodel.usableForParams(UAC.genotypeArgs.samplePloidy, maxAltAlleles) ) {
logger.info("Requested ploidy " + UAC.genotypeArgs.samplePloidy + " maxAltAlleles " + maxAltAlleles + " not supported by requested model " + UAC.AFmodel + " looking for an option");
final List<Calculation> supportingCalculations = new LinkedList<Calculation>();
for ( final Calculation calc : Calculation.values() ) {
if ( calc.usableForParams(UAC.genotypeArgs.samplePloidy, maxAltAlleles) )
supportingCalculations.add(calc);
}
final Calculation afCalculationModel = Calculation.getBestModel(UAC.genotypeArgs.samplePloidy,UAC.genotypeArgs.MAX_ALTERNATE_ALLELES,
UAC.requestedAlleleFrequencyCalculationModel);
if ( supportingCalculations.isEmpty() )
throw new UserException("no AFCalculation model found that supports ploidy of " + UAC.genotypeArgs.samplePloidy + " and max alt alleles " + maxAltAlleles);
else if ( supportingCalculations.size() > 1 )
logger.debug("Warning, multiple supporting AFCalcs found " + Utils.join(",", supportingCalculations) + " choosing first arbitrarily");
else
UAC.AFmodel = supportingCalculations.get(0);
logger.info("Selecting model " + UAC.AFmodel);
}
final AFCalc calc = createAFCalc(UAC.AFmodel, nSamples, maxAltAlleles, UAC.genotypeArgs.samplePloidy);
final AFCalc calc = createAFCalc(afCalculationModel, nSamples, UAC.genotypeArgs.MAX_ALTERNATE_ALLELES, UAC.genotypeArgs.samplePloidy);
if ( logger != null ) calc.setLogger(logger);
if ( UAC.exactCallsLog != null ) calc.enableProcessLog(UAC.exactCallsLog);

View File

@ -56,7 +56,7 @@ import htsjdk.variant.variantcontext.*;
import java.util.*;
public class GeneralPloidyExactAFCalc extends ExactAFCalc {
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 10; // if PL vectors longer than this # of elements, don't log them
static final int MAX_LENGTH_FOR_POOL_PL_LOGGING = 100; // if PL vectors longer than this # of elements, don't log them
private final int ploidy;

View File

@ -47,6 +47,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.SeqGraph;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
@ -113,7 +114,7 @@ public class GraphBasedLikelihoodCalculationEngine implements ReadLikelihoodCalc
}
@Override
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final AssemblyResultSet assemblyResultSet, final List<String> samples, final Map<String, List<GATKSAMRecord>> perSampleReadList) {
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final AssemblyResultSet assemblyResultSet, final SampleList samples, final Map<String, List<GATKSAMRecord>> perSampleReadList) {
final GraphBasedLikelihoodCalculationEngineInstance graphLikelihoodEngine =
new GraphBasedLikelihoodCalculationEngineInstance(assemblyResultSet,
hmm,log10GlobalReadMismappingRate,heterogeneousKmerSizeResolution);

View File

@ -46,7 +46,11 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedAlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.MultiSampleEdge;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Path;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Route;
@ -61,7 +65,6 @@ import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.pairhmm.FlexibleHMM;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import java.util.*;
@ -233,12 +236,13 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
* @return never {@code null}, and with at least one entry for input sample (keys in {@code perSampleReadList}.
* The value maps can be potentially empty though.
*/
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final List<Haplotype> haplotypes, final List<String> samples,
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final List<Haplotype> haplotypes, final SampleList samples,
final Map<String, List<GATKSAMRecord>> perSampleReadList) {
// General preparation on the input haplotypes:
final ReadLikelihoods<Haplotype> result = new ReadLikelihoods<>(samples, haplotypes, perSampleReadList);
final List<Haplotype> sortedHaplotypes = new ArrayList<>(haplotypes);
Collections.sort(sortedHaplotypes, Haplotype.ALPHANUMERICAL_COMPARATOR);
final AlleleList<Haplotype> alleles = new IndexedAlleleList<>(sortedHaplotypes);
final ReadLikelihoods<Haplotype> result = new ReadLikelihoods<>(samples, alleles, perSampleReadList);
// The actual work:
final int sampleCount = result.sampleCount();
@ -315,7 +319,7 @@ public class GraphBasedLikelihoodCalculationEngineInstance {
private void calculatePerReadAlleleLikelihoodMapHaplotypeProcessing(final int haplotypeIndex,
final ReadLikelihoods.Matrix<Haplotype> likelihoods,
final Map<MultiDeBruijnVertex, Set<ReadSegmentCost>> costsEndingByVertex) {
final Haplotype haplotype = likelihoods.allele(haplotypeIndex);
final Haplotype haplotype = likelihoods.alleleAt(haplotypeIndex);
final HaplotypeRoute haplotypeRoute = haplotypeGraph.getHaplotypeRoute(haplotype);
final Set<MultiDeBruijnVertex> haplotypeVertices = haplotypeRoute.vertexSet();
final Map<GATKSAMRecord, ReadCost> readCostByRead = new HashMap<>();

View File

@ -52,6 +52,7 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
@ -67,12 +68,8 @@ import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalcFactory;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.ReadThreadingAssembler;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.SampleUtils;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegionReadState;
import org.broadinstitute.gatk.utils.activeregion.ActivityProfileState;
@ -606,7 +603,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// the minimum length of a read we'd consider using for genotyping
private final static int MIN_READ_LENGTH = 10;
private List<String> samplesList;
private SampleList samplesList;
private final static Allele FAKE_REF_ALLELE = Allele.create("N", true); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
private final static Allele FAKE_ALT_ALLELE = Allele.create("<FAKE_ALT>", false); // used in isActive function to call into UG Engine. Should never appear anywhere in a VCF file
@ -626,9 +623,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
public void initialize() {
super.initialize();
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY)
throw new UserException.BadArgumentValue("-ploidy", "" + SCAC.genotypeArgs.samplePloidy + "; currently HaplotypeCaller only supports diploid sample analysis (-ploidy 2)");
if (dontGenotype && emitReferenceConfidence())
throw new UserException("You cannot request gVCF output and do not genotype at the same time");
@ -656,12 +650,9 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
logger.info("Disabling physical phasing, which is supported only for reference-model confidence output");
}
if ( SCAC.AFmodel == AFCalcFactory.Calculation.EXACT_GENERAL_PLOIDY )
throw new UserException.BadArgumentValue("pnrm", "HaplotypeCaller doesn't currently support " + SCAC.AFmodel);
samplesList = new ArrayList<>(SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader()));
Set<String> samplesSet = new LinkedHashSet<>(samplesList);
final GenomeAnalysisEngine toolkit = getToolkit();
samplesList = toolkit.getReadSampleList();
final Set<String> sampleSet = SampleListUtils.asSet(samplesList);
// create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
final UnifiedArgumentCollection simpleUAC = SCAC.cloneTo(UnifiedArgumentCollection.class);
@ -672,20 +663,24 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
simpleUAC.CONTAMINATION_FRACTION = 0.0;
simpleUAC.CONTAMINATION_FRACTION_FILE = null;
simpleUAC.exactCallsLog = null;
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(getToolkit(), simpleUAC, samplesSet);
// Seems that at least with some test data we can lose genuine haploid variation if we use
// UGs engine with ploidy == 1
simpleUAC.genotypeArgs.samplePloidy = Math.max(2,SCAC.genotypeArgs.samplePloidy);
activeRegionEvaluationGenotyperEngine = new UnifiedGenotypingEngine(simpleUAC, toolkit);
activeRegionEvaluationGenotyperEngine.setLogger(logger);
if( SCAC.CONTAMINATION_FRACTION_FILE != null )
SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, samplesSet, logger));
SCAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(SCAC.CONTAMINATION_FRACTION_FILE, SCAC.CONTAMINATION_FRACTION, sampleSet, logger));
if( SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES && consensusMode )
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
genotypingEngine = new HaplotypeCallerGenotypingEngine( getToolkit(), SCAC, !doNotRunPhysicalPhasing);
final GenomeLocParser genomeLocParser = toolkit.getGenomeLocParser();
genotypingEngine = new HaplotypeCallerGenotypingEngine( SCAC, samplesList, genomeLocParser, !doNotRunPhysicalPhasing);
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
Set<VCFHeaderLine> headerInfo = new HashSet<>();
final Set<VCFHeaderLine> headerInfo = new HashSet<>();
headerInfo.addAll(genotypingEngine.getAppropriateVCFInfoHeaders());
// all annotation fields from VariantAnnotatorEngine
@ -707,13 +702,20 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
headerInfo.add(new VCFFormatHeaderLine(HAPLOTYPE_CALLER_PHASING_GT_KEY, 1, VCFHeaderLineType.String, "Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"));
}
if (SCAC.genotypeArgs.samplePloidy != HomoSapiensConstants.DEFAULT_PLOIDY) {
if (SCAC.emitReferenceConfidence != ReferenceConfidenceMode.NONE)
throw new UserException.BadArgumentValue("ERC", "For now ploidies different that 2 are not allow for GVCF or BP_RESOLUTION outputs");
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Maximum likelihood expectation (MLE) for the alternate allele count, in the same order as listed, for each individual sample"));
headerInfo.add(new VCFFormatHeaderLine(VCFConstants.MLE_PER_SAMPLE_ALLELE_FRACTION_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Maximum likelihood expectation (MLE) for the alternate allele fraction, in the same order as listed, for each individual sample"));
}
// FILTER fields are added unconditionally as it's not always 100% certain the circumstances
// where the filters are used. For example, in emitting all sites the lowQual field is used
headerInfo.add(new VCFFilterHeaderLine(UnifiedGenotypingEngine.LOW_QUAL_FILTER_NAME, "Low quality"));
initializeReferenceConfidenceModel(samplesSet, headerInfo);
initializeReferenceConfidenceModel(samplesList, headerInfo);
vcfWriter.writeHeader(new VCFHeader(headerInfo, samplesSet));
vcfWriter.writeHeader(new VCFHeader(headerInfo, sampleSet));
try {
// fasta reference reader to supplement the edges of the reference sequence
@ -771,10 +773,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
SCAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES,emitReferenceConfidence());
}
private void initializeReferenceConfidenceModel(final Set<String> samples, final Set<VCFHeaderLine> headerInfo) {
private void initializeReferenceConfidenceModel(final SampleList samples, final Set<VCFHeaderLine> headerInfo) {
referenceConfidenceModel = new ReferenceConfidenceModel(getToolkit().getGenomeLocParser(), samples, getToolkit().getSAMFileHeader(), indelSizeToEliminateInRefModel);
if ( emitReferenceConfidence() ) {
if ( samples.size() != 1 ) throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
if ( samples.sampleCount() != 1 )
throw new UserException.BadArgumentValue("emitRefConfidence", "Can only be used in single sample mode currently");
headerInfo.addAll(referenceConfidenceModel.getVCFHeaderLines());
if ( SCAC.emitReferenceConfidence == ReferenceConfidenceMode.GVCF ) {
// a kluge to enforce the use of this indexing strategy
@ -784,7 +787,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
}
try {
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands);
vcfWriter = new GVCFWriter(vcfWriter, GVCFGQBands,SCAC.genotypeArgs.samplePloidy);
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue("GQBands", "are malformed: " + e.getMessage());
}
@ -857,8 +860,12 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final Map<String, AlignmentContext> splitContexts = AlignmentContextUtils.splitContextBySampleName(context);
final GenotypesContext genotypes = GenotypesContext.create(splitContexts.keySet().size());
final MathUtils.RunningAverage averageHQSoftClips = new MathUtils.RunningAverage();
final GenotypingModel genotypingModel = genotypingEngine.getGenotypingModel();
for( final Map.Entry<String, AlignmentContext> sample : splitContexts.entrySet() ) {
final double[] genotypeLikelihoods = referenceConfidenceModel.calcGenotypeLikelihoodsOfRefVsAny(sample.getValue().getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, averageHQSoftClips).genotypeLikelihoods;
final String sampleName = sample.getKey();
// The ploidy here is not dictated by the sample but by the simple genotyping-engine used to determine whether regions are active or not.
final int activeRegionDetectionHackishSamplePloidy = activeRegionEvaluationGenotyperEngine.getConfiguration().genotypeArgs.samplePloidy;
final double[] genotypeLikelihoods = referenceConfidenceModel.calcGenotypeLikelihoodsOfRefVsAny(sampleName,activeRegionDetectionHackishSamplePloidy,genotypingModel,sample.getValue().getBasePileup(), ref.getBase(), MIN_BASE_QUALTY_SCORE, averageHQSoftClips).genotypeLikelihoods;
genotypes.add( new GenotypeBuilder(sample.getKey()).alleles(noCall).PL(genotypeLikelihoods).make() );
}
@ -969,8 +976,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(),
metaDataTracker,
( consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles ),
emitReferenceConfidence() );
(consensusMode ? Collections.<VariantContext>emptyList() : givenAlleles),
emitReferenceConfidence());
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {
@ -997,7 +1004,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
// output variant containing region.
result.addAll(referenceConfidenceModel.calculateRefConfidence(assemblyResult.getReferenceHaplotype(),
calledHaplotypes.getCalledHaplotypes(), assemblyResult.getPaddedReferenceLoc(), regionForGenotyping,
readLikelihoods, calledHaplotypes.getCalls()));
readLikelihoods, genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), calledHaplotypes.getCalls()));
// output right-flanking non-variant section:
if (trimmingResult.hasRightFlankingRegion())
result.addAll(referenceModelForNoVariation(trimmingResult.nonVariantRightFlankRegion(),false));
@ -1110,7 +1117,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
final List<Haplotype> haplotypes = Collections.singletonList(refHaplotype);
return referenceConfidenceModel.calculateRefConfidence(refHaplotype, haplotypes,
paddedLoc, region, createDummyStratifiedReadMap(refHaplotype, samplesList, region),
Collections.<VariantContext>emptyList());
genotypingEngine.getPloidyModel(), genotypingEngine.getGenotypingModel(), Collections.<VariantContext>emptyList());
} else
return NO_CALLS;
}
@ -1123,11 +1130,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
* @return a map from sample -> PerReadAlleleLikelihoodMap that maps each read to ref
*/
public static ReadLikelihoods<Haplotype> createDummyStratifiedReadMap(final Haplotype refHaplotype,
final List<String> samples,
final SampleList samples,
final ActiveRegion region) {
return new ReadLikelihoods<>(samples, Collections.singletonList(refHaplotype),
return new ReadLikelihoods<>(samples, new IndexedAlleleList<>(refHaplotype),
splitReadsBySample(samples, region.getReads()));
}
@ -1235,18 +1241,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
return splitReadsBySample(samplesList, reads);
}
public static Map<String, List<GATKSAMRecord>> splitReadsBySample( final List<String> samplesList, final Collection<GATKSAMRecord> reads ) {
private static Map<String, List<GATKSAMRecord>> splitReadsBySample( final SampleList samplesList, final Collection<GATKSAMRecord> reads ) {
final Map<String, List<GATKSAMRecord>> returnMap = new HashMap<>();
for( final String sample : samplesList) {
List<GATKSAMRecord> readList = returnMap.get( sample );
if( readList == null ) {
readList = new ArrayList<>();
returnMap.put(sample, readList);
}
}
for( final GATKSAMRecord read : reads ) {
final int sampleCount = samplesList.sampleCount();
for (int i = 0; i < sampleCount; i++)
returnMap.put(samplesList.sampleAt(i), new ArrayList<GATKSAMRecord>());
for( final GATKSAMRecord read : reads )
returnMap.get(read.getReadGroup().getSample()).add(read);
}
return returnMap;
}

View File

@ -45,9 +45,9 @@
*/
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Advanced;
import org.broadinstitute.gatk.utils.commandline.Argument;
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
/**
* Set of arguments for the {@link HaplotypeCaller}

View File

@ -48,12 +48,9 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.Utils;
@ -64,7 +61,6 @@ import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.haplotype.MergeVariantsAcrossHaplotypes;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import java.util.*;
@ -80,28 +76,33 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
private final boolean doPhysicalPhasing;
private final GenotypingModel genotypingModel;
private final PloidyModel ploidyModel;
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
* @param samples {@inheritDoc}
* @param genomeLocParser {@inheritDoc}
* @param doPhysicalPhasing whether to try physical phasing.
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final boolean doPhysicalPhasing) {
super(toolkit,configuration);
this.doPhysicalPhasing = doPhysicalPhasing;
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser, final boolean doPhysicalPhasing) {
super(configuration,samples,genomeLocParser);
if (genomeLocParser == null)
throw new IllegalArgumentException("the genome location parser provided cannot be null");
this.doPhysicalPhasing= doPhysicalPhasing;
ploidyModel = new HomogeneousPloidyModel(samples,configuration.genotypeArgs.samplePloidy);
genotypingModel = new InfiniteRandomMatingPopulationModel();
}
/**
* {@inheritDoc}
* @param toolkit {@inheritDoc}
* @param configuration {@inheritDoc}
* @param sampleNames {@inheritDoc}
*/
public HaplotypeCallerGenotypingEngine(final GenomeAnalysisEngine toolkit, final HaplotypeCallerArgumentCollection configuration, final Set<String> sampleNames) {
super(toolkit,configuration,sampleNames);
doPhysicalPhasing = true;
public HaplotypeCallerGenotypingEngine(final HaplotypeCallerArgumentCollection configuration, final SampleList samples, final GenomeLocParser genomeLocParser) {
this(configuration,samples,genomeLocParser,false);
}
/**
* Change the merge variant across haplotypes for this engine.
*
@ -198,9 +199,9 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
if (readLikelihoods == null || readLikelihoods.sampleCount() == 0) throw new IllegalArgumentException("readLikelihoods input should be non-empty and non-null, got "+readLikelihoods);
if (ref == null || ref.length == 0 ) throw new IllegalArgumentException("ref bytes input should be non-empty and non-null, got " + Arrays.toString(ref));
if (refLoc == null || refLoc.size() != ref.length) throw new IllegalArgumentException(" refLoc must be non-null and length must match ref bytes, got "+refLoc);
if (activeRegionWindow == null ) throw new IllegalArgumentException("activeRegionWindow must be non-null, got "+activeRegionWindow);
if (activeAllelesToGenotype == null ) throw new IllegalArgumentException("activeAllelesToGenotype must be non-null, got "+activeAllelesToGenotype);
if (genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser must be non-null, got "+genomeLocParser);
if (activeRegionWindow == null ) throw new IllegalArgumentException("activeRegionWindow must be non-null");
if (activeAllelesToGenotype == null ) throw new IllegalArgumentException("activeAllelesToGenotype must be non-null");
if (genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser must be non-null");
// update the haplotypes so we're ready to call, getting the ordered list of positions on the reference
// that carry events among the haplotypes
@ -228,20 +229,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
if( mergedVC == null )
continue;
if( mergedVC == null ) { continue; }
final GenotypeLikelihoodsCalculationModel.Model calculationModel = mergedVC.isSNP()
? GenotypeLikelihoodsCalculationModel.Model.SNP : GenotypeLikelihoodsCalculationModel.Model.INDEL;
if (emitReferenceConfidence) {
final List<Allele> alleleList = new ArrayList<>();
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
vcb.alleles(alleleList);
mergedVC = vcb.make();
}
if (emitReferenceConfidence)
mergedVC = addNonRefSymbolicAllele(mergedVC);
final Map<VariantContext, Allele> mergeMap = new LinkedHashMap<>();
mergeMap.put(null, mergedVC.getReference()); // the reference event (null) --> the reference allele
@ -264,7 +261,7 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
readAlleleLikelihoods.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final GenotypesContext genotypes = calculateGLsForThisEvent( readAlleleLikelihoods, mergedVC );
final VariantContext call = calculateGenotypes(null,null,null,null,new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel, false,null);
final VariantContext call = calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), calculationModel);
if( call != null ) {
readAlleleLikelihoods = prepareReadAlleleLikelihoodsForAnnotation(readLikelihoods, perSampleFilteredReadList,
@ -494,6 +491,16 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return new VariantContextBuilder(vc).genotypes(phasedGenotypes).make();
}
private VariantContext addNonRefSymbolicAllele(final VariantContext mergedVC) {
final VariantContextBuilder vcb = new VariantContextBuilder(mergedVC);
final List<Allele> originalList = mergedVC.getAlleles();
final List<Allele> alleleList = new ArrayList<>(originalList.size() + 1);
alleleList.addAll(mergedVC.getAlleles());
alleleList.add(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
vcb.alleles(alleleList);
return vcb.make();
}
// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
// used for genotyping.
private ReadLikelihoods<Allele> prepareReadAlleleLikelihoodsForAnnotation(
@ -653,22 +660,14 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
@Requires({"readLikelihoods!= null", "mergedVC != null"})
@Ensures("result != null")
private GenotypesContext calculateGLsForThisEvent( final ReadLikelihoods<Allele> readLikelihoods, final VariantContext mergedVC ) {
final GenotypesContext genotypes = GenotypesContext.create(readLikelihoods.sampleCount());
// Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
for (final String sample : readLikelihoods.samples() ) {
final int numHaplotypes = mergedVC.getAlleles().size();
final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
final double[][] haplotypeLikelihoodMatrix = PairHMMLikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, readLikelihoods, mergedVC.getAlleles(), true);
int glIndex = 0;
for( int iii = 0; iii < numHaplotypes; iii++ ) {
for( int jjj = 0; jjj <= iii; jjj++ ) {
genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
}
}
logger.debug(" Likelihoods for sample " + sample + " : " + Arrays.toString(genotypeLikelihoods));
genotypes.add(new GenotypeBuilder(sample).alleles(NO_CALL).PL(genotypeLikelihoods).make());
}
return genotypes;
final List<Allele> vcAlleles = mergedVC.getAlleles();
final AlleleList<Allele> alleleList = readLikelihoods.alleleCount() == vcAlleles.size() ? readLikelihoods : new IndexedAlleleList<>(vcAlleles);
final GenotypingLikelihoods<Allele> likelihoods = genotypingModel.calculateLikelihoods(alleleList,new GenotypingData<>(ploidyModel,readLikelihoods));
final int sampleCount = samples.sampleCount();
final GenotypesContext result = GenotypesContext.create(sampleCount);
for (int s = 0; s < sampleCount; s++)
result.add(new GenotypeBuilder(samples.sampleAt(s)).alleles(NO_CALL).PL(likelihoods.sampleLikelihoods(s).getAsPLs()).make());
return result;
}
/**
@ -779,4 +778,22 @@ public class HaplotypeCallerGenotypingEngine extends GenotypingEngine<HaplotypeC
return (vc == null ? -1 : vc.getAlleles().hashCode());
}
}
/**
* Returns the ploidy-model used by this genotyping engine.
*
* @return never {@code null}.
*/
public PloidyModel getPloidyModel() {
return ploidyModel;
}
/**
* Returns the genotyping-model used by this genotyping engine.
*
* @return never {@code null}.
*/
public GenotypingModel getGenotypingModel() {
return genotypingModel;
}
}

View File

@ -49,7 +49,11 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import htsjdk.samtools.SAMUtils;
import htsjdk.variant.variantcontext.Allele;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedAlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.QualityUtils;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -59,7 +63,6 @@ import org.broadinstitute.gatk.utils.pairhmm.*;
import org.broadinstitute.gatk.utils.recalibration.covariates.RepeatCovariate;
import org.broadinstitute.gatk.utils.recalibration.covariates.RepeatLengthCovariate;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.*;
import java.io.File;
import java.io.FileNotFoundException;
@ -249,12 +252,13 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
}
@Override
public ReadLikelihoods<Haplotype> computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final List<String> samples, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
public ReadLikelihoods<Haplotype> computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final SampleList samples, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList();
final List<Haplotype> haplotypeList = assemblyResultSet.getHaplotypeList();
final AlleleList<Haplotype> haplotypes = new IndexedAlleleList<>(haplotypeList);
// configure the HMM
initializePairHMM(haplotypes, perSampleReadList);
initializePairHMM(haplotypeList, perSampleReadList);
// Add likelihoods for each sample's reads to our result
final ReadLikelihoods<Haplotype> result = new ReadLikelihoods<>(samples, haplotypes, perSampleReadList);

View File

@ -46,11 +46,14 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedAlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import htsjdk.variant.variantcontext.Allele;
import java.util.HashMap;
import java.util.List;
@ -63,17 +66,15 @@ import java.util.Random;
public class RandomLikelihoodCalculationEngine implements ReadLikelihoodCalculationEngine {
@Override
public ReadLikelihoods computeReadLikelihoods(final AssemblyResultSet assemblyResultSet,
final List<String> samples,
public ReadLikelihoods<Haplotype> computeReadLikelihoods(final AssemblyResultSet assemblyResultSet,
final SampleList samples,
final Map<String, List<GATKSAMRecord>> reads) {
final List<Haplotype> haplotypes = assemblyResultSet.getHaplotypeList();
final AlleleList<Haplotype> haplotypes = new IndexedAlleleList<>(assemblyResultSet.getHaplotypeList());
final ReadLikelihoods result = new ReadLikelihoods(samples, haplotypes, reads);
final Map<Haplotype,Allele> alleles = new HashMap<>(haplotypes.size());
for (final Haplotype haplotype : haplotypes)
alleles.put(haplotype,Allele.create(haplotype,false));
final Map<Haplotype,Allele> alleles = new HashMap<>(haplotypes.alleleCount());
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final int sampleCount = samples.size();
final int alleleCount = alleles.size();
final int sampleCount = samples.sampleCount();
final int alleleCount = haplotypes.alleleCount();
for (int i = 0; i < sampleCount; i++) {
final List<GATKSAMRecord> sampleReads = result.sampleReads(i);
final int readCount = sampleReads.size();

View File

@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
@ -81,6 +82,7 @@ public interface ReadLikelihoodCalculationEngine {
* active region assembly process.
*
* @param assemblyResultSet the input assembly results.
* @param samples the list of targeted samples.
* @param perSampleReadList the input read sets stratified per sample.
*
* @throws NullPointerException if either parameter is {@code null}.
@ -88,7 +90,7 @@ public interface ReadLikelihoodCalculationEngine {
* @return never {@code null}, and with at least one entry for input sample (keys in {@code perSampleReadList}.
* The value maps can be potentially empty though.
*/
public ReadLikelihoods<Haplotype> computeReadLikelihoods(AssemblyResultSet assemblyResultSet, List<String> samples,
public ReadLikelihoods<Haplotype> computeReadLikelihoods(AssemblyResultSet assemblyResultSet, SampleList samples,
Map<String, List<GATKSAMRecord>> perSampleReadList);
public void close();

View File

@ -46,6 +46,8 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
/**
* Holds information about a genotype call of a single sample reference vs. any non-ref event
*
@ -58,7 +60,7 @@ final class RefVsAnyResult {
/**
* The genotype likelihoods for ref/ref ref/non-ref non-ref/non-ref
*/
final double[] genotypeLikelihoods = new double[3];
final double[] genotypeLikelihoods;
/**
* AD field value for ref / non-ref
@ -74,7 +76,31 @@ final class RefVsAnyResult {
* Cap the het and hom var likelihood values by the hom ref likelihood.
*/
protected void capByHomRefLikelihood() {
genotypeLikelihoods[1] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[1]);
genotypeLikelihoods[2] = Math.min(genotypeLikelihoods[0], genotypeLikelihoods[2]);
final int likelihoodCount = genotypeLikelihoods.length;
for (int i = 1; i < likelihoodCount; i++)
genotypeLikelihoods[i] = Math.min(genotypeLikelihoods[0],genotypeLikelihoods[i]);
}
/**
* Creates a new ref-vs-alt result assuming 3 as the number of genotype likelihoods (human ploidy.
*/
@Deprecated
public RefVsAnyResult() {
genotypeLikelihoods =
new double[(HomoSapiensConstants.DEFAULT_PLOIDY * (HomoSapiensConstants.DEFAULT_PLOIDY + 1)) >> 1];
}
/**
* Creates a new ref-vs-alt result indicating the genotype likelihood vector capacity.
* @param likelihoodCapacity the required capacity of the likelihood array, should match the possible number of
* genotypes given the number of alleles (always 2), ploidy (arbitrary) less the genotyping
* model non-sense genotype count if applies.
* @throws IllegalArgumentException if {@code likelihoodCapacity} is negative.
*/
public RefVsAnyResult(final int likelihoodCapacity) {
if (likelihoodCapacity < 0)
throw new IllegalArgumentException("likelihood capacity is negative");
genotypeLikelihoods = new double[likelihoodCapacity];
}
}

View File

@ -51,6 +51,7 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFHeaderLine;
import htsjdk.variant.vcf.VCFSimpleHeaderLine;
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
@ -85,8 +86,8 @@ public class ReferenceConfidenceModel {
public final static String ALTERNATE_ALLELE_STRING = "ALT"; // arbitrary alternate allele
private final GenomeLocParser genomeLocParser;
private final Set<String> samples;
private final SAMFileHeader header; // TODO -- really shouldn't depend on this
private final SampleList samples;
private final int indelInformativeDepthIndelSize;
private final static boolean WRITE_DEBUGGING_BAM = false;
@ -103,18 +104,17 @@ public class ReferenceConfidenceModel {
* @param indelInformativeDepthIndelSize the max size of indels to consider when calculating indel informative depths
*/
public ReferenceConfidenceModel(final GenomeLocParser genomeLocParser,
final Set<String> samples,
final SampleList samples,
final SAMFileHeader header,
final int indelInformativeDepthIndelSize) {
if ( genomeLocParser == null ) throw new IllegalArgumentException("genomeLocParser cannot be null");
if ( samples == null ) throw new IllegalArgumentException("samples cannot be null");
if ( samples.isEmpty() ) throw new IllegalArgumentException("samples cannot be empty");
if ( samples.sampleCount() == 0) throw new IllegalArgumentException("samples cannot be empty");
if ( header == null ) throw new IllegalArgumentException("header cannot be empty");
if ( indelInformativeDepthIndelSize < 0) throw new IllegalArgumentException("indelInformativeDepthIndelSize must be >= 1 but got " + indelInformativeDepthIndelSize);
this.genomeLocParser = genomeLocParser;
this.samples = samples;
this.header = header;
this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize;
if ( WRITE_DEBUGGING_BAM ) {
@ -124,8 +124,6 @@ public class ReferenceConfidenceModel {
} else {
debuggingWriter = null;
}
initializeIndelPLCache();
}
/**
@ -151,7 +149,7 @@ public class ReferenceConfidenceModel {
/**
* Calculate the reference confidence for a single sample given the its read data
*
* Returns a list of variant contexts, one for each position in the activeregion.getLoc(), each containing
* Returns a list of variant contexts, one for each position in the {@code activeRegion.getLoc()}, each containing
* detailed information about the certainty that the sample is hom-ref for each base in the region.
*
*
@ -162,6 +160,8 @@ public class ReferenceConfidenceModel {
* @param paddedReferenceLoc the location of refHaplotype (which might be larger than activeRegion.getLoc())
* @param activeRegion the active region we want to get the reference confidence over
* @param readLikelihoods a map from a single sample to its PerReadAlleleLikelihoodMap for each haplotype in calledHaplotypes
* @param ploidyModel indicate the ploidy of each sample in {@code stratifiedReadMap}.
* @param model genotyping model.
* @param variantCalls calls made in this region. The return result will contain any variant call in this list in the
* correct order by genomic position, and any variant in this list will stop us emitting a ref confidence
* under any position it covers (for snps and insertions that is 1 bp, but for deletions its the entire ref span)
@ -173,6 +173,8 @@ public class ReferenceConfidenceModel {
final GenomeLoc paddedReferenceLoc,
final ActiveRegion activeRegion,
final ReadLikelihoods<Haplotype> readLikelihoods,
final PloidyModel ploidyModel,
final GenotypingModel model,
final List<VariantContext> variantCalls) {
if ( refHaplotype == null ) throw new IllegalArgumentException("refHaplotype cannot be null");
if ( calledHaplotypes == null ) throw new IllegalArgumentException("calledHaplotypes cannot be null");
@ -182,12 +184,15 @@ public class ReferenceConfidenceModel {
if ( readLikelihoods == null ) throw new IllegalArgumentException("readLikelihoods cannot be null");
if ( readLikelihoods.sampleCount() != 1 ) throw new IllegalArgumentException("readLikelihoods must contain exactly one sample but it contained " + readLikelihoods.sampleCount());
if ( refHaplotype.length() != activeRegion.getExtendedLoc().size() ) throw new IllegalArgumentException("refHaplotype " + refHaplotype.length() + " and activeRegion location size " + activeRegion.getLocation().size() + " are different");
if ( ploidyModel == null) throw new IllegalArgumentException("the ploidy model cannot be null");
if ( model == null) throw new IllegalArgumentException("the genotyping model cannot be null");
final int ploidy = ploidyModel.samplePloidy(0); // the first sample = the only sample in reference-confidence mode.
final GenomeLoc refSpan = activeRegion.getLocation();
final List<ReadBackedPileup> refPileups = getPileupsOverReference(refHaplotype, calledHaplotypes, paddedReferenceLoc, activeRegion, refSpan, readLikelihoods);
final byte[] ref = refHaplotype.getBases();
final List<VariantContext> results = new ArrayList<>(refSpan.size());
final String sampleName = readLikelihoods.sample(0);
final String sampleName = readLikelihoods.sampleAt(0);
final int globalRefOffset = refSpan.getStart() - activeRegion.getExtendedLoc().getStart();
for ( final ReadBackedPileup pileup : refPileups ) {
@ -201,20 +206,20 @@ public class ReferenceConfidenceModel {
// otherwise emit a reference confidence variant context
final int refOffset = offset + globalRefOffset;
final byte refBase = ref[refOffset];
final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(pileup, refBase, (byte)6, null);
final RefVsAnyResult homRefCalc = calcGenotypeLikelihoodsOfRefVsAny(sampleName,ploidy,model,pileup, refBase, (byte)6, null);
homRefCalc.capByHomRefLikelihood();
final Allele refAllele = Allele.create(refBase, true);
final List<Allele> refSiteAlleles = Arrays.asList(refAllele, GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
final VariantContextBuilder vcb = new VariantContextBuilder("HC", curPos.getContig(), curPos.getStart(), curPos.getStart(), refSiteAlleles);
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Arrays.asList(refAllele, refAllele));
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(refAllele, ploidy));
gb.AD(homRefCalc.AD_Ref_Any);
gb.DP(homRefCalc.getDP());
// genotype likelihood calculation
final GenotypeLikelihoods snpGLs = GenotypeLikelihoods.fromLog10Likelihoods(homRefCalc.genotypeLikelihoods);
final int nIndelInformativeReads = calcNIndelInformativeReads(pileup, refOffset, ref, indelInformativeDepthIndelSize);
final GenotypeLikelihoods indelGLs = getIndelPLs(nIndelInformativeReads);
final GenotypeLikelihoods indelGLs = getIndelPLs(ploidy,nIndelInformativeReads);
// now that we have the SNP and indel GLs, we take the one with the least confidence,
// as this is the most conservative estimate of our certainty that we are hom-ref.
@ -251,23 +256,51 @@ public class ReferenceConfidenceModel {
* Get indel PLs corresponding to seeing N nIndelInformativeReads at this site
*
* @param nInformativeReads the number of reads that inform us about being ref without an indel at this site
* @param ploidy the requested ploidy.
* @return non-null GenotypeLikelihoods given N
*/
protected final GenotypeLikelihoods getIndelPLs(final int nInformativeReads) {
return indelPLCache[nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads];
protected final GenotypeLikelihoods getIndelPLs(final int ploidy, final int nInformativeReads) {
if (ploidy > MAX_N_INDEL_PLOIDY)
throw new IllegalArgumentException("you have hit a current limitation of the GVCF output model that cannot handle ploidies larger than " + MAX_N_INDEL_PLOIDY + " , please let the GATK team about it: " + ploidy);
return indelPLCache(ploidy, nInformativeReads > MAX_N_INDEL_INFORMATIVE_READS ? MAX_N_INDEL_INFORMATIVE_READS : nInformativeReads);
}
protected static final int MAX_N_INDEL_INFORMATIVE_READS = 40; // more than this is overkill because GQs are capped at 99 anyway
private static final GenotypeLikelihoods[] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
private static final int MAX_N_INDEL_PLOIDY = 20;
private static final GenotypeLikelihoods[][] indelPLCache = new GenotypeLikelihoods[MAX_N_INDEL_PLOIDY][];
private static final double INDEL_ERROR_RATE = -4.5; // 10^-4.5 indel errors per bp
private void initializeIndelPLCache() {
for( int nInformativeReads = 0; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
final double homRef = 0.0;
final double het = MathUtils.LOG_ONE_HALF * nInformativeReads;
final double homVar = INDEL_ERROR_RATE * nInformativeReads;
indelPLCache[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(new double[]{homRef, het, homVar});
private final GenotypeLikelihoods indelPLCache(final int ploidy, final int nInformativeReads) {
GenotypeLikelihoods[] indelPLCacheByPloidy = indelPLCache[ploidy];
if (indelPLCacheByPloidy == null)
return initializeIndelPLCache(ploidy)[nInformativeReads];
else
return indelPLCacheByPloidy[nInformativeReads];
}
private synchronized GenotypeLikelihoods[] initializeIndelPLCache(final int ploidy) {
// Double-check whether another thread has done the initialization.
if (indelPLCache[ploidy] != null)
return indelPLCache[ploidy];
final double denominator = - MathUtils.Log10Cache.get(ploidy);
final GenotypeLikelihoods[] result = new GenotypeLikelihoods[MAX_N_INDEL_INFORMATIVE_READS + 1];
result[0] = GenotypeLikelihoods.fromLog10Likelihoods(new double[ploidy + 1]);
for( int nInformativeReads = 1; nInformativeReads <= MAX_N_INDEL_INFORMATIVE_READS; nInformativeReads++ ) {
final byte indelQual = (byte) Math.round((INDEL_ERROR_RATE * -10));
final double refLikelihood = QualityUtils.qualToProbLog10(indelQual);
final double altLikelihood = QualityUtils.qualToErrorProbLog10(indelQual);
double[] PLs = new double[ploidy + 1];
PLs[0] = nInformativeReads * refLikelihood;
for (int altCount = 1; altCount <= ploidy; altCount++) {
final double refLikelihoodAccum = refLikelihood + MathUtils.Log10Cache.get(ploidy - altCount);
final double altLikelihoodAccum = altLikelihood + MathUtils.Log10Cache.get(altCount);
PLs[altCount] = nInformativeReads * (MathUtils.approximateLog10SumLog10(refLikelihoodAccum ,altLikelihoodAccum) + denominator);
}
result[nInformativeReads] = GenotypeLikelihoods.fromLog10Likelihoods(PLs);
}
indelPLCache[ploidy] = result;
return result;
}
/**
@ -279,6 +312,7 @@ public class ReferenceConfidenceModel {
* @param hqSoftClips running average data structure (can be null) to collect information about the number of high quality soft clips
* @return a RefVsAnyResult genotype call
*/
@Deprecated
public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
final RefVsAnyResult result = new RefVsAnyResult();
@ -305,6 +339,73 @@ public class ReferenceConfidenceModel {
return result;
}
/**
* Calculate the genotype likelihoods for the sample in pileup for being hom-ref contrasted with being ref vs. alt
*
* @param sampleName target sample name.
* @param ploidy target sample ploidy.
* @param genotypingModel model to calculate likelihoods and genotypes.
* @param pileup the read backed pileup containing the data we want to evaluate
* @param refBase the reference base at this pileup position
* @param minBaseQual the min base quality for a read in the pileup at the pileup position to be included in the calculation
* @param hqSoftClips running average data structure (can be null) to collect information about the number of high quality soft clips
* @return a RefVsAnyResult genotype call.
*/
public RefVsAnyResult calcGenotypeLikelihoodsOfRefVsAny(final String sampleName, final int ploidy,
final GenotypingModel genotypingModel,
final ReadBackedPileup pileup, final byte refBase, final byte minBaseQual, final MathUtils.RunningAverage hqSoftClips) {
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(Allele.create(refBase,true),GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
// Notice that the sample name is rather irrelevant as this information is never used, just need to be the same in both lines bellow.
final int maximumReadCount = pileup.getReads().size();
final List<GATKSAMRecord> reads = new ArrayList<>(maximumReadCount);
final double[][] likelihoods = new double[2][maximumReadCount];
final int[] adCounts = new int[2];
int nextIndex = 0;
for (final PileupElement p : pileup) {
final byte qual = p.isDeletion() ? REF_MODEL_DELETION_QUAL : p.getQual();
if (!p.isDeletion() && qual <= minBaseQual)
continue;
final GATKSAMRecord read = p.getRead();
reads.add(read);
final boolean isAlt = p.getBase() != refBase || p.isDeletion() || p.isBeforeDeletionStart()
|| p.isAfterDeletionEnd() || p.isBeforeInsertion() || p.isAfterInsertion() || p.isNextToSoftClip();
final int bestAllele;
final int worstAllele;
if (isAlt) {
bestAllele = 1;
worstAllele = 0;
} else {
bestAllele = 0;
worstAllele = 1;
}
likelihoods[bestAllele][nextIndex] = QualityUtils.qualToProbLog10(qual);
likelihoods[worstAllele][nextIndex++] = QualityUtils.qualToErrorProbLog10(qual) + MathUtils.LOG_ONE_THIRD;
adCounts[bestAllele]++;
if (isAlt && hqSoftClips != null && p.isNextToSoftClip())
hqSoftClips.add(AlignmentUtils.calcNumHighQualitySoftClips(read, (byte) 28));
}
final Map<String,List<GATKSAMRecord>> sampleToReads = Collections.singletonMap(sampleName,reads);
final ReadLikelihoods<Allele> readLikelihoods = new ReadLikelihoods<>(new IndexedSampleList(sampleName),alleleList,sampleToReads);
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = readLikelihoods.sampleMatrix(0);
final int readCount = sampleLikelihoods.readCount();
for (int i = 0; i < readCount; i++) {
sampleLikelihoods.set(0,i,likelihoods[0][i]);
sampleLikelihoods.set(1,i,likelihoods[1][i]);
}
final PloidyModel ploidyModel = new HomogeneousPloidyModel(new IndexedSampleList(sampleName),ploidy);
final GenotypingLikelihoods<Allele> genotypingLikelihoods = genotypingModel.calculateLikelihoods(alleleList, new GenotypingData<>(ploidyModel, readLikelihoods));
final double[] genotypeLikelihoodArray = genotypingLikelihoods.sampleLikelihoods(0).getAsVector();
final RefVsAnyResult result = new RefVsAnyResult(genotypeLikelihoodArray.length);
System.arraycopy(genotypeLikelihoodArray,0,result.genotypeLikelihoods,0,genotypeLikelihoodArray.length);
System.arraycopy(adCounts,0,result.AD_Ref_Any,0,2);
return result;
}
/**
* Get a list of pileups that span the entire active region span, in order, one for each position
*/
@ -330,7 +431,7 @@ public class ReferenceConfidenceModel {
debuggingWriter.addAlignment(read);
final LocusIteratorByState libs = new LocusIteratorByState(reads.iterator(), LocusIteratorByState.NO_DOWNSAMPLING,
true, genomeLocParser, samples, false);
true, genomeLocParser, SampleListUtils.asSet(samples), false);
final List<ReadBackedPileup> pileups = new LinkedList<>();
final int startPos = activeRegionSpan.getStart();

View File

@ -49,6 +49,9 @@ package org.broadinstitute.gatk.tools.walkers.indels;
import com.google.java.contract.Ensures;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedAlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
import org.broadinstitute.gatk.utils.exceptions.UserException;
@ -444,10 +447,10 @@ public class PairHMMIndelErrorModel {
// Apparently more than one allele can map to the same haplotype after trimming
final Set<Haplotype> distinctHaplotypesSet = new LinkedHashSet<>(trimmedHaplotypeMap.values());
final List<Haplotype> distinctHaplotypesList = Arrays.asList(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
final AlleleList<Haplotype> distinctHaplotypesList = new IndexedAlleleList<>(distinctHaplotypesSet.toArray(new Haplotype[distinctHaplotypesSet.size()]));
// Get the likelihoods for our clipped read against each of our trimmed haplotypes.
final ReadLikelihoods<Haplotype> rl = new ReadLikelihoods<>(
Collections.singletonList("DUMMY_SAMPLE"),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
new IndexedSampleList(Collections.singletonList("DUMMY_SAMPLE")),distinctHaplotypesList,Collections.singletonMap("DUMMY_SAMPLE",Collections.singletonList(processedRead)));
final ReadLikelihoods.Matrix<Haplotype> dummySampleLikelihoods = rl.sampleMatrix(0);
pairHMM.computeLikelihoods(rl.sampleMatrix(0), Collections.singletonList(processedRead), readGCPArrayMap);

View File

@ -46,6 +46,7 @@
package org.broadinstitute.gatk.tools.walkers.validation;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -349,13 +350,14 @@ public class GenotypeAndValidate extends RodWalker<GenotypeAndValidate.CountedDa
if (emitConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING = emitConf;
if (callConf >= 0) uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING = callConf;
final GenomeAnalysisEngine toolkit = getToolkit();
uac.GLmodel = GenotypeLikelihoodsCalculationModel.Model.SNP;
snpEngine = new UnifiedGenotypingEngine(getToolkit(), uac);
snpEngine = new UnifiedGenotypingEngine(uac,toolkit);
// Adding the INDEL calling arguments for UG
UnifiedArgumentCollection uac_indel = uac.clone();
uac_indel.GLmodel = GenotypeLikelihoodsCalculationModel.Model.INDEL;
indelEngine = new UnifiedGenotypingEngine(getToolkit(), uac_indel);
indelEngine = new UnifiedGenotypingEngine(uac_indel,toolkit);
// make sure we have callConf set to the threshold set by the UAC so we can use it later.
callConf = uac.genotypeArgs.STANDARD_CONFIDENCE_FOR_CALLING;

View File

@ -46,8 +46,12 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.CommandLineGATK;
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
@ -111,6 +115,7 @@ import java.util.*;
*/
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARDISC, extraDocs = {CommandLineGATK.class} )
@Reference(window=@Window(start=-10,stop=10))
@SuppressWarnings("unused")
public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWriter> implements AnnotatorCompatible, TreeReducible<VariantContextWriter> {
/**
@ -159,12 +164,13 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
for ( final RodBindingCollection<VariantContext> variantCollection : variantCollections )
variants.addAll(variantCollection.getRodBindings());
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(getToolkit(), variants);
final Set<String> samples = SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE);
final GenomeAnalysisEngine toolkit = getToolkit();
final Map<String, VCFHeader> vcfRods = GATKVCFUtils.getVCFHeadersFromRods(toolkit, variants);
final SampleList samples = new IndexedSampleList(SampleUtils.getSampleList(vcfRods, GATKVariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE));
// create the genotyping engine
genotypingEngine = new UnifiedGenotypingEngine(getToolkit(), createUAC(), samples);
genotypingEngine = new UnifiedGenotypingEngine(createUAC(), samples, toolkit.getGenomeLocParser(), toolkit.getArguments().BAQMode);
// create the annotation engine
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, getToolkit());
annotationEngine = new VariantAnnotatorEngine(Arrays.asList("none"), annotationsToUse, Collections.<String>emptyList(), this, toolkit);
// take care of the VCF headers
final Set<VCFHeaderLine> headerLines = VCFUtils.smartMergeHeaders(vcfRods.values(), true);
@ -179,7 +185,8 @@ public class GenotypeGVCFs extends RodWalker<VariantContext, VariantContextWrite
if ( dbsnp != null && dbsnp.dbsnp.isBound() )
VCFStandardHeaderLines.addStandardInfoLines(headerLines, true, VCFConstants.DBSNP_KEY);
final VCFHeader vcfHeader = new VCFHeader(headerLines, samples);
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
final VCFHeader vcfHeader = new VCFHeader(headerLines, sampleNameSet);
vcfWriter.writeHeader(vcfHeader);
}

View File

@ -46,6 +46,10 @@
package org.broadinstitute.gatk.tools.walkers.variantutils;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
import org.broadinstitute.gatk.utils.commandline.Output;
import org.broadinstitute.gatk.engine.CommandLineGATK;
@ -120,14 +124,18 @@ public class RegenotypeVariants extends RodWalker<Integer, Integer> implements T
UAC.genotypingOutputMode = GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES;
String trackName = variantCollection.variants.getName();
Set<String> samples = SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName));
UG_engine = new UnifiedGenotypingEngine(getToolkit(), UAC, samples);
final GenomeAnalysisEngine toolkit = getToolkit();
final SampleList samples =
new IndexedSampleList(SampleUtils.getSampleListWithVCFHeader(getToolkit(), Arrays.asList(trackName)));
final Set<String> sampleNameSet = SampleListUtils.asSet(samples);
UG_engine = new UnifiedGenotypingEngine(UAC, samples,toolkit.getGenomeLocParser(),toolkit.getArguments().BAQMode);
final Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), Arrays.asList(trackName)));
hInfo.addAll(UnifiedGenotyper.getHeaderInfo(UAC, null, null));
vcfWriter.writeHeader(new VCFHeader(hInfo, samples));
vcfWriter.writeHeader(new VCFHeader(hInfo, sampleNameSet));
}
/**

View File

@ -0,0 +1,230 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.utils.collections;
import java.util.Arrays;
/**
* Simple integer heap with quick look-up of the minimum value.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IntMaxHeap {
private int size;
private int[] values;
/**
* Creates a new empty heap indicating its initial capacity.
* @param initialCapacity number of elements you expect to have at most in the heap.
*
* @throws IllegalArgumentException if {@code initialCapacity} is negative.
*/
public IntMaxHeap(final int initialCapacity) {
if (initialCapacity < 0)
throw new IllegalArgumentException();
// We force it to have at least length 1 so that the capacity expansion works when adding;
// it doubles current length and twice 0 = 0.
values = new int[initialCapacity == 0 ? 1 : initialCapacity];
}
/**
* Adds a new element to the heap.
*
* <p>The heap with grow if it runs out of capacity to hold the new element</p>
*
* @param v the new element.
*/
public void add(final int v) {
// Double capacity if overflow:
ensureCapacity(size + 1);
addWithoutCheckingCapacity(v);
}
/**
* Implements the heap addition floating up the value.
* @param v the value to add.
*/
private void addWithoutCheckingCapacity(final int v) {
int p;
values[p = size++] = v;
// Float up the recently added element:
while (p > 0) {
final int q = (p - 1) >> 1; // parent index.
final int u = values[q]; // parent value.
//Finish check and update:
if (u >= v)
break;
values[p] = u;
values[q] = v;
p = q;
}
}
/**
* Add several integers into the heap.
* @param v values to add.
*/
public void add(final int ... v) {
if (v == null)
throw new IllegalArgumentException("the input array cannot be null");
ensureCapacity(v.length + size);
for (int i : v)
addWithoutCheckingCapacity(i);
}
private void ensureCapacity(final int newSize) {
if (newSize > values.length)
values = Arrays.copyOf(values,Math.max(newSize,10 + values.length << 1));
}
/**
* Returns the current minimum element.
*
* @throws IllegalStateException if the heap is empty.
*
* @return the minimum element in the heap.
*/
public int peek() {
if (size == 0)
throw new IllegalStateException("the heap is empty");
return values[0];
}
/**
* Returns the minimum element of the heap and removes it.
*
* @throws IllegalStateException if the heap is empty.
*
* @return the minimum element in the heap before removing it.
*/
public int remove() {
if (size == 0)
throw new IllegalArgumentException("the heap is empty");
final int result = values[0];
removeUpdate();
return result;
}
/**
* Updates the heap after a removal, sinking the last element from the top-down.
*/
private void removeUpdate() {
// if the remove make the heap to be empty there is nothing to do.
if (--size == 0)
return;
final int v = values[size]; // the last value.
int p;
values[p = 0] = v;
// limit := first index in the heap that does not have any descendants within the heap.
final int limit = (size >> 1);
// Sorry! for the big loop but doesn't seem to be any other *practical* option that would reduce its size.
while (p < limit) {
// Initialize variables:
final int r = (p + 1) << 1; // left descendant index.
final int l = r - 1; // right descendant index (no guarantee to be in the heap).
int u = v; // will contain min(v,values[l],values[r]).
int q = p; // wilL contain argmin_x(values[x], x in {p,l,r}).
// Check left descendant:
int lv = values[l]; // left descendant value.
if (lv > u) { // is the left descendant'v value more than v.
u = lv;
q = l;
}
// Check right descendant:
if (r < size) { // make sure that r is within the heap.
int rv = values[r];
if (rv > u) { // is the right descendant's value less than v or left's
u = rv;
q = r;
}
}
// Finish check and update:
if (p == q) // q == p if neither left or right descendants are less than v.
break;
values[p] = u;
values[q] = v;
p = q;
}
}
/**
* Checks whether the heap is empty.
*
* @return {@code true} iff the heap is empty.
*/
public boolean isEmpty() {
return size == 0;
}
/**
* Returns the current size of the heap.
*
* @return 0 or greater.
*/
public int size() {
return size;
}
/**
* Removes all elements from the heap.
*/
public void clear() {
size = 0;
}
}

View File

@ -46,15 +46,14 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.*;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@ -70,9 +69,7 @@ public class GVCFWriter implements VariantContextWriter {
//
// static VCF field names
//
protected final static String BLOCK_SIZE_INFO_FIELD = "BLOCK_SIZE";
protected final static String MIN_DP_FORMAT_FIELD = "MIN_DP";
protected final static String MIN_GQ_FORMAT_FIELD = "MIN_GQ";
//
// Final fields initialized in constructor
@ -87,6 +84,7 @@ public class GVCFWriter implements VariantContextWriter {
String contigOfNextAvailableStart = null;
private String sampleName = null;
private HomRefBlock currentBlock = null;
private final int defaultPloidy;
/**
* Is the proposed GQ partitions well-formed?
@ -94,7 +92,7 @@ public class GVCFWriter implements VariantContextWriter {
* @param GQPartitions proposed GQ partitions
* @return a non-null string if something is wrong (string explains issue)
*/
protected static List<HomRefBlock> parsePartitions(final List<Integer> GQPartitions) {
protected static List<HomRefBlock> parsePartitions(final List<Integer> GQPartitions, final int defaultPloidy) {
if ( GQPartitions == null ) throw new IllegalArgumentException("GQpartitions cannot be null");
if ( GQPartitions.isEmpty() ) throw new IllegalArgumentException("GQpartitions cannot be empty");
@ -104,10 +102,10 @@ public class GVCFWriter implements VariantContextWriter {
if ( value == null ) throw new IllegalArgumentException("GQPartitions contains a null integer");
if ( value < lastThreshold ) throw new IllegalArgumentException("GQPartitions is out of order. Last is " + lastThreshold + " but next is " + value);
if ( value == lastThreshold ) throw new IllegalArgumentException("GQPartitions is equal elements: Last is " + lastThreshold + " but next is " + value);
result.add(new HomRefBlock(lastThreshold, value));
result.add(new HomRefBlock(lastThreshold, value,defaultPloidy));
lastThreshold = value;
}
result.add(new HomRefBlock(lastThreshold, Integer.MAX_VALUE));
result.add(new HomRefBlock(lastThreshold, Integer.MAX_VALUE,defaultPloidy));
return result;
}
@ -128,11 +126,13 @@ public class GVCFWriter implements VariantContextWriter {
*
* @param underlyingWriter the ultimate destination of the GVCF records
* @param GQPartitions a well-formed list of GQ partitions
* @param defaultPloidy the assumed ploidy for input variant context without one.
*/
public GVCFWriter(final VariantContextWriter underlyingWriter, final List<Integer> GQPartitions) {
public GVCFWriter(final VariantContextWriter underlyingWriter, final List<Integer> GQPartitions, final int defaultPloidy) {
if ( underlyingWriter == null ) throw new IllegalArgumentException("underlyingWriter cannot be null");
this.underlyingWriter = underlyingWriter;
this.GQPartitions = parsePartitions(GQPartitions);
this.GQPartitions = parsePartitions(GQPartitions,defaultPloidy);
this.defaultPloidy = defaultPloidy;
}
/**
@ -148,10 +148,6 @@ public class GVCFWriter implements VariantContextWriter {
header.addMetaDataLine(VCFStandardHeaderLines.getInfoLine(VCFConstants.END_KEY));
header.addMetaDataLine(new VCFFormatHeaderLine(MIN_DP_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum DP observed within the GVCF block"));
// These annotations are no longer standard
//header.addMetaDataLine(new VCFInfoHeaderLine(BLOCK_SIZE_INFO_FIELD, 1, VCFHeaderLineType.Integer, "Size of the homozygous reference GVCF block"));
//header.addMetaDataLine(new VCFFormatHeaderLine(MIN_GQ_FORMAT_FIELD, 1, VCFHeaderLineType.Integer, "Minimum GQ observed within the GVCF block"));
for ( final HomRefBlock partition : GQPartitions ) {
header.addMetaDataLine(partition.toVCFHeaderLine());
}
@ -188,27 +184,30 @@ public class GVCFWriter implements VariantContextWriter {
* @return a VariantContext to be emitted, or null if non is appropriate
*/
protected VariantContext addHomRefSite(final VariantContext vc, final Genotype g) {
if ( nextAvailableStart != -1 ) {
// don't create blocks while the hom-ref site falls before nextAvailableStart (for deletions)
if ( vc.getStart() <= nextAvailableStart && vc.getChr().equals(contigOfNextAvailableStart) ) {
if ( vc.getStart() <= nextAvailableStart && vc.getChr().equals(contigOfNextAvailableStart) )
return null;
}
// otherwise, reset to non-relevant
nextAvailableStart = -1;
contigOfNextAvailableStart = null;
}
if ( currentBlock == null ) {
currentBlock = createNewBlock(vc, g);
return null;
} else if ( currentBlock.withinBounds(g.getGQ()) ) {
final VariantContext result;
if (genotypeCanBeMergedInCurrentBlock(g)) {
currentBlock.add(vc.getStart(), g);
return null;
result = null;
} else {
final VariantContext result = blockToVCF(currentBlock);
result = blockToVCF(currentBlock);
currentBlock = createNewBlock(vc, g);
return result;
}
return result;
}
private boolean genotypeCanBeMergedInCurrentBlock(final Genotype g) {
return currentBlock != null && currentBlock.withinBounds(g.getGQ()) && currentBlock.getPloidy() == g.getPloidy()
&& (currentBlock.getMinPLs() == null || !g.hasPL() || (currentBlock.getMinPLs().length == g.getPL().length));
}
/**
@ -226,21 +225,20 @@ public class GVCFWriter implements VariantContextWriter {
* Convert a HomRefBlock into a VariantContext
*
* @param block the block to convert
* @return a VariantContext representing the gVCF encoding for this block
* @return a VariantContext representing the gVCF encoding for this block.
* It will return {@code null} if input {@code block} is {@code null}, indicating that there
* is no variant-context to be output into the VCF.
*/
private VariantContext blockToVCF(final HomRefBlock block) {
if ( block == null ) throw new IllegalArgumentException("block cannot be null");
if ( block == null ) return null;
final VariantContextBuilder vcb = new VariantContextBuilder(block.getStartingVC());
vcb.attributes(new HashMap<String, Object>(2)); // clear the attributes
vcb.stop(block.getStop());
vcb.attribute(VCFConstants.END_KEY, block.getStop());
// This annotation is no longer standard
//vcb.attribute(BLOCK_SIZE_INFO_FIELD, block.getSize());
// create the single Genotype with GQ and DP annotations
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, Collections.nCopies(2, block.getRef()));
final GenotypeBuilder gb = new GenotypeBuilder(sampleName, GATKVariantContextUtils.homozygousAlleleList(block.getRef(),block.getPloidy()));
gb.noAD().noPL().noAttributes(); // clear all attributes
gb.GQ(block.getMedianGQ());
gb.DP(block.getMedianDP());
@ -269,10 +267,12 @@ public class GVCFWriter implements VariantContextWriter {
break;
}
}
if ( partition == null ) throw new IllegalStateException("GQ " + g + " from " + vc + " didn't fit into any partition " + partition);
if ( partition == null )
throw new IllegalStateException("GQ " + g + " from " + vc + " didn't fit into any partition");
// create the block, add g to it, and return it for use
final HomRefBlock block = new HomRefBlock(vc, partition.getGQLowerBound(), partition.getGQUpperBound());
final HomRefBlock block = new HomRefBlock(vc, partition.getGQLowerBound(), partition.getGQUpperBound(), defaultPloidy);
block.add(vc.getStart(), g);
return block;
}

View File

@ -46,11 +46,11 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.MathUtils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.broadinstitute.gatk.utils.MathUtils;
import java.util.ArrayList;
import java.util.List;
@ -75,6 +75,7 @@ final class HomRefBlock {
final private List<Integer> GQs = new ArrayList<>(100);
final private List<Integer> DPs = new ArrayList<>(100);
private final Allele ref;
private final int ploidy;
/**
* Create a new HomRefBlock
@ -83,7 +84,7 @@ final class HomRefBlock {
* @param minGQ the minGQ (inclusive) to use in this band
* @param maxGQ the maxGQ (exclusive) to use in this band
*/
public HomRefBlock(final VariantContext startingVC, int minGQ, int maxGQ) {
public HomRefBlock(final VariantContext startingVC, final int minGQ, final int maxGQ, final int defaultPloidy) {
if ( startingVC == null ) throw new IllegalArgumentException("startingVC cannot be null");
if ( minGQ > maxGQ ) throw new IllegalArgumentException("bad minGQ " + minGQ + " as its > maxGQ " + maxGQ);
@ -92,6 +93,7 @@ final class HomRefBlock {
this.ref = startingVC.getReference();
this.minGQ = minGQ;
this.maxGQ = maxGQ;
this.ploidy = startingVC.getMaxPloidy(defaultPloidy);
}
/**
@ -100,7 +102,7 @@ final class HomRefBlock {
* @param minGQ the minGQ (inclusive) to use in this band
* @param maxGQ the maxGQ (exclusive) to use in this band
*/
public HomRefBlock(int minGQ, int maxGQ) {
public HomRefBlock(final int minGQ, final int maxGQ, final int ploidy) {
if ( minGQ > maxGQ ) throw new IllegalArgumentException("bad minGQ " + minGQ + " as its > maxGQ " + maxGQ);
this.startingVC = null;
@ -108,6 +110,7 @@ final class HomRefBlock {
this.ref = null;
this.minGQ = minGQ;
this.maxGQ = maxGQ;
this.ploidy = ploidy;
}
/**
@ -119,19 +122,18 @@ final class HomRefBlock {
if ( ! g.hasGQ() ) throw new IllegalArgumentException("g must have GQ field");
if ( ! g.hasPL() ) throw new IllegalArgumentException("g must have PL field");
if ( pos != stop + 1 ) throw new IllegalArgumentException("adding genotype at pos " + pos + " isn't contiguous with previous stop " + stop);
if ( g.getPloidy() != ploidy)
throw new IllegalArgumentException("cannot add a genotype with a different ploidy: " + g.getPloidy() + " != " + ploidy);
if( minPLs == null ) { // if the minPLs vector has not been set yet, create it here by copying the provided genotype's PLs
if( minPLs == null )
minPLs = g.getPL();
else { // otherwise take the min with the provided genotype's PLs
final int[] PL = g.getPL();
if( PL.length == 3 ) {
minPLs = PL.clone();
}
} else { // otherwise take the min with the provided genotype's PLs
final int[] PL = g.getPL();
if( PL.length == 3 ) {
minPLs[0] = Math.min(minPLs[0], PL[0]);
minPLs[1] = Math.min(minPLs[1], PL[1]);
minPLs[2] = Math.min(minPLs[2], PL[2]);
}
if (PL.length != minPLs.length)
throw new IllegalStateException("trying to merge different PL array sizes: " + PL.length + " != " + minPLs.length);
for (int i = 0; i < PL.length; i++)
if (minPLs[i] > PL[i])
minPLs[i] = PL[i];
}
stop = pos;
GQs.add(Math.min(g.getGQ(), 99)); // cap the GQs by the max. of 99 emission
@ -182,4 +184,12 @@ final class HomRefBlock {
public VCFHeaderLine toVCFHeaderLine() {
return new VCFHeaderLine("GVCFBlock", "minGQ=" + getGQLowerBound() + "(inclusive),maxGQ=" + getGQUpperBound() + "(exclusive)");
}
/**
* Get the ploidy of this hom-ref block.
* @return
*/
public int getPloidy() {
return ploidy;
}
}

View File

@ -48,6 +48,9 @@ package org.broadinstitute.gatk.utils.haplotype;
import com.google.java.contract.Requires;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleListUtils;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.PairHMMLikelihoodCalculationEngine;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
@ -77,7 +80,9 @@ public class HaplotypeLDCalculator {
@SuppressWarnings("unchecked")
protected HaplotypeLDCalculator() {
haplotypes = Collections.emptyList();
readLikelihoods = new ReadLikelihoods<>((List<String>)Collections.EMPTY_LIST, (List<Haplotype>)Collections.EMPTY_LIST, Collections.EMPTY_MAP);
final AlleleList<Haplotype> alleleList = AlleleListUtils.emptyList();
readLikelihoods = new ReadLikelihoods<>(SampleListUtils.emptyList(),
alleleList, Collections.EMPTY_MAP);
}
public HaplotypeLDCalculator(final List<Haplotype> haplotypes, final ReadLikelihoods<Haplotype> haplotypeReadMap) {

View File

@ -0,0 +1,171 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.RandomDNA;
import org.testng.Assert;
import org.testng.SkipException;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
/**
* Helper class for those unit-test classes that test on implementations of SampleList.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class AlleleListUnitTester {
private static final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
private static final RandomDNA rndDNA = new RandomDNA(rnd);
/**
* Test that the contents of an allele-list are the ones expected.
* <p/>
* <p>
* This method perform various consistency check involving all the {@link org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList} interface methods.
* Therefore calling this method is equivalent to a thorough check of the {@link org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList} aspect of
* the {@code actual} argument.
* </p>
*
* @param actual the sample-list to assess.
* @param expected the expected sample-list.
* @throws IllegalArgumentException if {@code expected} is {@code null} or contains
* {@code null}s which is an indication of an bug in the testing code.
* @throws RuntimeException if there is some testing assertion exception which
* is an indication of an actual bug the code that is been tested.
*/
public static <A extends Allele> void assertAlleleList(final AlleleList<A> actual, final List<A> expected) {
if (expected == null)
throw new IllegalArgumentException("the expected list cannot be null");
final Set<A> expectedAlleleSet = new HashSet<>(expected.size());
Assert.assertNotNull(actual);
Assert.assertEquals(actual.alleleCount(), expected.size());
for (int i = 0; i < expected.size(); i++) {
final A expectedAllele = expected.get(i);
if (expectedAllele == null)
throw new IllegalArgumentException("the expected sample cannot be null");
if (expectedAllele.equals(NEVER_USE_ALLELE))
throw new IllegalArgumentException("you cannot use the forbidden sample name");
if (expectedAlleleSet.contains(expected.get(i)))
throw new IllegalArgumentException("repeated allele in the expected list, this is a test bug");
final A actualAllele = actual.alleleAt(i);
Assert.assertNotNull(actualAllele, "allele cannot be null");
Assert.assertFalse(expectedAlleleSet.contains(actualAllele), "repeated allele: " + actualAllele);
Assert.assertEquals(actualAllele, expectedAllele, "wrong allele order; index = " + i);
Assert.assertEquals(actual.alleleIndex(actualAllele), i, "allele index mismatch");
expectedAlleleSet.add(actualAllele);
}
Assert.assertEquals(actual.alleleIndex((A) NEVER_USE_ALLELE), -1);
}
/**
* Save to assume that this allele will never be used.
*/
private static final Allele NEVER_USE_ALLELE = Allele.create(new String("ACTGACTGACTGACTGACTGACTGACTGACTGGTCAGTCAGTCAGTCAGTCAGTCA").getBytes(), false);
/**
* Generate testing alleles.
*
* <p>
* Basically all are random alleles given the maximum allele length.
* </p>
*
* <p>
* So with a low max-allele-length and high allele-count you can force repeats.
* </p>
*
* @param alleleCount number of alleles to generate.
* @param maxAlleleLength the maximum length of the allele in bases.
*
* @throws RuntimeException if {@code alleleCount} is negative or {@code maxAlleleLength} is less than 1.
* @return never {@code null}.
*/
public static Allele[] generateRandomAlleles(final int alleleCount, final int maxAlleleLength) {
if (maxAlleleLength < 1)
throw new IllegalArgumentException("the max allele length cannot be less than 1");
final Allele[] result = new Allele[alleleCount];
for (int i = 0; i < alleleCount; i++) {
final int alleleLength = rnd.nextInt(maxAlleleLength) + 1;
result[i] = Allele.create(rndDNA.nextBases(alleleLength));
}
return result;
}
/**
* Generate testing alleles.
*
* <p>
* Basically all are random alleles given the maximum allele length.
* </p>
*
* <p>
* So with a low max-allele-length and high allele-count you can force repeats.
* </p>
*
* @param alleleCount number of alleles to generate.
* @param maxAlleleLength the maximum length of the allele in bases.
* @param skipIfRepeats throw an test-skip exception {@link SkipException} if the resulting allele-list
* has repeats, thus is size is less than {@code alleleCount}
*
* @throws RuntimeException if {@code alleleCount} is negative or {@code maxAlleleLength} is less than 1.
* @return never {@code null}.
*/
static AlleleList<Allele> alleleList(final int alleleCount, final int maxAlleleLength, final boolean skipIfRepeats) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount,maxAlleleLength);
if (alleleCount > 0)
alleles[0] = Allele.create(alleles[0].getBases(),true);
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles);
if (skipIfRepeats && alleleList.alleleCount() != alleles.length)
throw new SkipException("repeated alleles, should be infrequent");
return alleleList;
}
}

View File

@ -0,0 +1,226 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Test {@link org.broadinstitute.gatk.tools.walkers.genotyper.AlleleListUtils}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class AlleleListUtilsUnitTest {
@Test(dataProvider = "singleAlleleListData")
public void testAsList(final List<Allele> alleles1) {
final Allele[] uniqueAlleles = new LinkedHashSet<>(alleles1).toArray(new Allele[0]);
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles1);
final List<Allele> asList = AlleleListUtils.asList(alleleList);
final Allele[] asListArray = asList.toArray(new Allele[asList.size()]);
Assert.assertTrue(Arrays.equals(uniqueAlleles,asListArray));
}
@Test(dataProvider = "singleAlleleListData")
public void testIndexOfReference(final List<Allele> alleles1) {
final Allele[] uniqueAlleles = new LinkedHashSet<>(alleles1).toArray(new Allele[0]);
for (int i = 0; i < uniqueAlleles.length; i++) {
final Allele[] actualAlleles = uniqueAlleles.clone();
actualAlleles[i] = Allele.create(actualAlleles[i].getBases(),true);
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(actualAlleles);
Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),i);
}
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(uniqueAlleles);
Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),-1);
}
@Test(dataProvider = "twoAlleleListData", dependsOnMethods={"testAsList"})
public void testEquals(final List<Allele> alleles1, final List<Allele> alleles2) {
final AlleleList<Allele> alleleList1 = new IndexedAlleleList<Allele>(alleles1);
final AlleleList<Allele> alleleList2 = new IndexedAlleleList<Allele>(alleles2);
Assert.assertTrue(AlleleListUtils.equals(alleleList1,alleleList1));
Assert.assertTrue(AlleleListUtils.equals(alleleList2,alleleList2));
Assert.assertEquals(AlleleListUtils.equals(alleleList1, alleleList2),
Arrays.equals(AlleleListUtils.asList(alleleList1).toArray(new Allele[alleleList1.alleleCount()]),
AlleleListUtils.asList(alleleList2).toArray(new Allele[alleleList2.alleleCount()]))
);
Assert.assertEquals(AlleleListUtils.equals(alleleList1,alleleList2),
AlleleListUtils.equals(alleleList2,alleleList1));
}
@Test(dataProvider = "singleAlleleListData", dependsOnMethods= "testEquals" )
public void testSelfPermutation(final List<Allele> alleles1) {
final AlleleList<Allele> originalAlleleList = new IndexedAlleleList<>(alleles1);
final AlleleListPermutation<Allele> selfPermutation = AlleleListUtils.permutation(originalAlleleList,originalAlleleList);
Assert.assertEquals(selfPermutation.fromSize(),originalAlleleList.alleleCount());
Assert.assertEquals(selfPermutation.toSize(),originalAlleleList.alleleCount());
Assert.assertTrue(selfPermutation.isNonPermuted());
Assert.assertFalse(selfPermutation.isPartial());
for (int i = 0; i < originalAlleleList.alleleCount(); i++) {
Assert.assertEquals(selfPermutation.fromIndex(i), i);
Assert.assertEquals(selfPermutation.toIndex(i),i);
Assert.assertEquals(selfPermutation.fromList(),selfPermutation.toList());
AlleleListUnitTester.assertAlleleList(originalAlleleList, selfPermutation.fromList());
}
Assert.assertTrue(AlleleListUtils.equals(selfPermutation,originalAlleleList));
}
private final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
@Test(dataProvider = "singleAlleleListData", dependsOnMethods = "testEquals")
public void testSubsetPermutation(final List<Allele> alleles1) {
final List<Allele> subsetAlleles = new ArrayList<>(alleles1.size());
for (final Allele allele : alleles1)
if (rnd.nextBoolean()) subsetAlleles.add(allele);
final AlleleList<Allele> originalAlleleList = new IndexedAlleleList<>(alleles1);
final AlleleList<Allele> targetAlleleList = new IndexedAlleleList<>(subsetAlleles);
final AlleleListPermutation<Allele> subset = AlleleListUtils.permutation(originalAlleleList,targetAlleleList);
if (originalAlleleList.alleleCount() == targetAlleleList.alleleCount())
throw new SkipException("no real subset");
Assert.assertTrue(subset.isPartial());
Assert.assertFalse(subset.isNonPermuted());
Assert.assertEquals(subset.fromSize(),originalAlleleList.alleleCount());
Assert.assertEquals(subset.toSize(),targetAlleleList.alleleCount());
AlleleListUnitTester.assertAlleleList(originalAlleleList,subset.fromList());
AlleleListUnitTester.assertAlleleList(targetAlleleList,subset.toList());
for (int i = 0; i < targetAlleleList.alleleCount(); i++)
Assert.assertEquals(subset.fromIndex(i), originalAlleleList.alleleIndex(targetAlleleList.alleleAt(i)));
for (int j = 0; j < originalAlleleList.alleleCount(); j++) {
final Allele allele = originalAlleleList.alleleAt(j);
Assert.assertEquals(subset.toIndex(j),targetAlleleList.alleleIndex(allele));
}
Assert.assertTrue(AlleleListUtils.equals(subset,targetAlleleList));
}
@Test(dataProvider = "singleAlleleListData", dependsOnMethods = {"testAsList","testEquals"})
public void testShufflePermutation(final List<Allele> alleles1) {
final AlleleList<Allele> originalAlleleList = new IndexedAlleleList<>(alleles1);
if (originalAlleleList.alleleCount() <= 1)
throw new SkipException("non-shuffle allele-list");
final Allele[] targetAlleleArray = AlleleListUtils.asList(originalAlleleList).toArray(new Allele[originalAlleleList.alleleCount()]);
final int[] fromIndex = new int[targetAlleleArray.length];
for (int i = 0; i < fromIndex.length; i++)
fromIndex[i] = i;
for (int i = 0; i < targetAlleleArray.length - 1; i++) {
final int swapIndex = rnd.nextInt(targetAlleleArray.length - i - 1);
final int otherIndex = fromIndex[swapIndex + i + 1];
final Allele other = targetAlleleArray[swapIndex + i + 1];
fromIndex[swapIndex + i + 1] = fromIndex[i];
fromIndex[i] = otherIndex;
targetAlleleArray[swapIndex + i + 1] = targetAlleleArray[i];
targetAlleleArray[i] = other;
}
final AlleleList<Allele> targetAlleleList = new IndexedAlleleList<>(targetAlleleArray);
final AlleleListPermutation<Allele> permutation = AlleleListUtils.permutation(originalAlleleList,targetAlleleList);
Assert.assertFalse(permutation.isNonPermuted());
AlleleListUnitTester.assertAlleleList(originalAlleleList,permutation.fromList());
AlleleListUnitTester.assertAlleleList(targetAlleleList,permutation.toList());
Assert.assertFalse(permutation.isPartial());
Assert.assertEquals(permutation.fromSize(),originalAlleleList.alleleCount());
Assert.assertEquals(permutation.toSize(),targetAlleleList.alleleCount());
for (int i = 0; i < permutation.fromSize(); i++) {
Assert.assertEquals(permutation.toIndex(i),targetAlleleList.alleleIndex(originalAlleleList.alleleAt(i)));
Assert.assertEquals(permutation.fromIndex(i),originalAlleleList.alleleIndex(targetAlleleList.alleleAt(i)));
Assert.assertEquals(permutation.fromIndex(i),fromIndex[i]);
}
Assert.assertTrue(AlleleListUtils.equals(permutation,targetAlleleList));
}
private List<Allele>[] alleleLists;
@BeforeClass
public void setUp() {
alleleLists = new List[ALLELE_COUNT.length * MAX_ALLELE_LENGTH.length];
int nextIndex = 0;
for (int i = 0; i < ALLELE_COUNT.length; i++)
for (int j = 0; j < MAX_ALLELE_LENGTH.length; j++)
alleleLists[nextIndex++] = Arrays.asList(AlleleListUnitTester.generateRandomAlleles(ALLELE_COUNT[i], MAX_ALLELE_LENGTH[j]));
}
private static final int[] ALLELE_COUNT = { 0, 1, 5, 10, 20};
private static final int[] MAX_ALLELE_LENGTH = { 1, 2, 3, 10 };
@DataProvider(name="singleAlleleListData")
public Object[][] singleAlleleListData() {
final Object[][] result = new Object[alleleLists.length][];
for (int i = 0; i < alleleLists.length; i++)
result[i] = new Object[] { alleleLists[i]};
return result;
}
@DataProvider(name="twoAlleleListData")
public Object[][] twoAlleleListData() {
final Object[][] result = new Object[alleleLists.length * alleleLists.length][];
int index = 0;
for (int i = 0; i < alleleLists.length; i++)
for (int j = 0; j < alleleLists.length; j++)
result[index++] = new Object[] { alleleLists[i], alleleLists[j]};
return result;
}
}

View File

@ -0,0 +1,329 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeAlleleCounts;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Test {@link org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeAlleleCounts}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeAlleleCountsUnitTest {
@Test(dataProvider="ploidyData")
public void testFirst(final int ploidy) {
final GenotypeAlleleCounts subject = GenotypeAlleleCounts.first(ploidy);
Assert.assertNotNull(subject);
Assert.assertEquals(subject.ploidy(), ploidy);
Assert.assertEquals(subject.distinctAlleleCount(),1);
Assert.assertEquals(subject.alleleCountAt(0),ploidy);
Assert.assertEquals(subject.alleleCountFor(0),ploidy);
Assert.assertEquals(subject.alleleRankFor(0),0);
Assert.assertEquals(subject.alleleRankFor(1),-2);
Assert.assertTrue(subject.containsAllele(0));
Assert.assertFalse(subject.containsAllele(1));
Assert.assertEquals(subject.alleleIndexAt(0),0);
Assert.assertEquals(subject.maximumAlleleIndex(),0);
Assert.assertEquals(subject.minimumAlleleIndex(),0);
Assert.assertTrue(subject.compareTo(subject) == 0);
Assert.assertTrue(subject.equals(subject));
Assert.assertEquals(subject.index(),0);
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
expected[0] = ploidy;
Assert.assertEquals(subject.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
@Test(dataProvider = "ploidyData",dependsOnMethods = "testFirst")
public void testNext(final int ploidy) {
if (ploidy == 0)
testNextZeroPloidy();
else if (ploidy == 1)
testNextOnePloidy();
else
testPloidyTwoOrMore(ploidy);
}
@Test(dataProvider = "ploidyData",dependsOnMethods = "testNext")
public void testIncrease(final int ploidy) {
if (ploidy == 0)
testNextZeroPloidyIncrease();
else if (ploidy == 1)
testNextOnePloidyIncrease();
else
testPloidyTwoOrMoreIncrease(ploidy);
}
private void testNextZeroPloidy() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(0);
final GenotypeAlleleCounts next = first.next();
Assert.assertEquals(first,next);
Assert.assertEquals(first.compareTo(next),0);
Assert.assertEquals(next.compareTo(first), 0);
Assert.assertEquals(next.distinctAlleleCount(),0);
Assert.assertEquals(next.ploidy(),0);
Assert.assertEquals(next.index(),0);
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
private void testNextOnePloidy() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(1);
GenotypeAlleleCounts current = first;
while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts next = current.next();
Assert.assertEquals(next.minimumAlleleIndex(),next.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.minimumAlleleIndex() + 1);
Assert.assertEquals(next.alleleCountAt(0),1);
Assert.assertEquals(next.alleleIndexAt(0),next.minimumAlleleIndex());
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex()),0);
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex() + 1),-2);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()),1);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()+1),0);
Assert.assertEquals(next.ploidy(),1);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(), current.index() + 1);
Assert.assertEquals(next.ploidy(),current.ploidy());
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) expected[current.minimumAlleleIndex() + 1] = 1;
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
current = next;
}
}
private void testPloidyTwoOrMore(final int ploidy) {
if (ploidy < 2)
throw new IllegalArgumentException();
GenotypeAlleleCounts current = GenotypeAlleleCounts.first(ploidy);
while (!current.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts next = current.next();
if (current.distinctAlleleCount() == 1) {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1);
Assert.assertEquals(next.distinctAlleleCount(), 2 );
Assert.assertEquals(next.minimumAlleleIndex(), 0 );
} else {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.alleleCountAt(0) > 1 ? 0
: current.alleleCountAt(0) == 1 ? current.minimumAlleleIndex() + 1 : current.minimumAlleleIndex());
}
// Checking on 0's new count and current.minAllele + 1 alleles.
Assert.assertEquals(next.alleleCountFor(0),current.alleleCountFor(current.minimumAlleleIndex()) - 1);
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex() + 1),
current.alleleCountFor(current.minimumAlleleIndex() + 1) + 1);
// Checks current.minAllele count
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex()),
current.minimumAlleleIndex() == 0 ? current.alleleCountAt(0) - 1 : 0);
int totalCountSum = 0;
final int[] expectedAlleleCountsByIndex = new int[Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex()) + 1];
for (int i = 0; i < next.distinctAlleleCount(); i++) {
final int count = next.alleleCountAt(i);
final int index = next.alleleIndexAt(i);
expectedAlleleCountsByIndex[index] = count;
// Check consistency of alleleCountAt(x) and alleleCountFor(alleleIndexAt(x))
Assert.assertEquals(next.alleleCountFor(index),count);
totalCountSum += count;
// Check on counts of, in theory, unaffected allele counts.
if (index > current.minimumAlleleIndex() + 1)
Assert.assertEquals(next.alleleCountFor(index),current.alleleCountFor(index));
}
Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex())),expectedAlleleCountsByIndex));
Assert.assertEquals(totalCountSum,ploidy);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(),current.index() + 1);
Assert.assertEquals(next.ploidy(),ploidy);
current = next;
}
}
private void testNextZeroPloidyIncrease() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(0);
final GenotypeAlleleCounts next = first.clone();
next.increase();
Assert.assertEquals(first,next);
Assert.assertEquals(first.compareTo(next),0);
Assert.assertEquals(next.compareTo(first), 0);
Assert.assertEquals(next.distinctAlleleCount(),0);
Assert.assertEquals(next.ploidy(),0);
Assert.assertEquals(next.index(),0);
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= 10; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
private void testNextOnePloidyIncrease() {
final GenotypeAlleleCounts first = GenotypeAlleleCounts.first(1);
GenotypeAlleleCounts next = first;
while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts current = next.clone();
next.increase();
Assert.assertEquals(next.minimumAlleleIndex(),next.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.minimumAlleleIndex() + 1);
Assert.assertEquals(next.alleleCountAt(0),1);
Assert.assertEquals(next.alleleIndexAt(0),next.minimumAlleleIndex());
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex()),0);
Assert.assertEquals(next.alleleRankFor(next.minimumAlleleIndex() + 1),-2);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()),1);
Assert.assertEquals(next.alleleCountFor(next.minimumAlleleIndex()+1),0);
Assert.assertEquals(next.ploidy(),1);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(), current.index() + 1);
Assert.assertEquals(next.ploidy(),current.ploidy());
for (int maximumAlleleIndex = 0; maximumAlleleIndex <= MAXIMUM_ALLELE_INDEX; maximumAlleleIndex++) {
final int[] expected = new int[maximumAlleleIndex + 1];
if (maximumAlleleIndex >= current.minimumAlleleIndex() + 1) expected[current.minimumAlleleIndex() + 1] = 1;
Assert.assertEquals(next.alleleCountsByIndex(maximumAlleleIndex),expected);
}
}
}
private void testPloidyTwoOrMoreIncrease(final int ploidy) {
if (ploidy < 2)
throw new IllegalArgumentException();
GenotypeAlleleCounts next = GenotypeAlleleCounts.first(ploidy);
while (!next.containsAllele(MAXIMUM_ALLELE_INDEX + 1)) {
final GenotypeAlleleCounts current = next.clone();
next.increase();
if (current.distinctAlleleCount() == 1) {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex() + 1);
Assert.assertEquals(next.distinctAlleleCount(), 2 );
Assert.assertEquals(next.minimumAlleleIndex(), 0 );
} else {
Assert.assertEquals(next.maximumAlleleIndex(),current.maximumAlleleIndex());
Assert.assertEquals(next.minimumAlleleIndex(),current.alleleCountAt(0) > 1 ? 0
: current.alleleCountAt(0) == 1 ? current.minimumAlleleIndex() + 1 : current.minimumAlleleIndex());
}
// Checking on 0's new count and current.minAllele + 1 alleles.
Assert.assertEquals(next.alleleCountFor(0),current.alleleCountFor(current.minimumAlleleIndex()) - 1);
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex() + 1),
current.alleleCountFor(current.minimumAlleleIndex() + 1) + 1);
// Checks current.minAllele count
Assert.assertEquals(next.alleleCountFor(current.minimumAlleleIndex()),
current.minimumAlleleIndex() == 0 ? current.alleleCountAt(0) - 1 : 0);
int totalCountSum = 0;
final int[] expectedAlleleCountsByIndex = new int[Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex()) + 1];
for (int i = 0; i < next.distinctAlleleCount(); i++) {
final int count = next.alleleCountAt(i);
final int index = next.alleleIndexAt(i);
expectedAlleleCountsByIndex[index] = count;
// Check consistency of alleleCountAt(x) and alleleCountFor(alleleIndexAt(x))
Assert.assertEquals(next.alleleCountFor(index),count);
totalCountSum += count;
// Check on counts of, in theory, unaffected allele counts.
if (index > current.minimumAlleleIndex() + 1)
Assert.assertEquals(next.alleleCountFor(index),current.alleleCountFor(index));
}
Assert.assertTrue(Arrays.equals(next.alleleCountsByIndex(Math.max(MAXIMUM_ALLELE_INDEX,next.maximumAlleleIndex())),expectedAlleleCountsByIndex));
Assert.assertEquals(totalCountSum,ploidy);
Assert.assertTrue(next.compareTo(current) > 0);
Assert.assertTrue(current.compareTo(next) < 0);
Assert.assertTrue(next.compareTo(next) == 0);
Assert.assertTrue(next.equals(next));
Assert.assertFalse(next.equals(current));
Assert.assertFalse(current.equals(next));
Assert.assertEquals(next.index(),current.index() + 1);
Assert.assertEquals(next.ploidy(),ploidy);
}
}
private static final int MAXIMUM_ALLELE_INDEX = 10;
private static final int[] PLOIDY = new int[] { 1, 2, 3, 7, 10};
@DataProvider(name="ploidyData")
public Object[][] ploidyData() {
final Object[][] result = new Object[PLOIDY.length][];
for (int i = 0; i < PLOIDY.length; i++)
result[i] = new Object[] { PLOIDY[i ]};
return result;
}
}

View File

@ -0,0 +1,172 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
/**
* Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculators} and {@link org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodCalculator}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class GenotypeLikelihoodCalculatorUnitTest {
@Test(dataProvider = "ploidyAndMaximumAlleleData")
public void testPloidyAndMaximumAllele(final int ploidy, final int alleleCount) {
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, alleleCount);
Assert.assertNotNull(calculator);
Assert.assertEquals(calculator.ploidy(),ploidy);
Assert.assertEquals(calculator.alleleCount(), alleleCount);
Assert.assertEquals(calculator.genotypeCount(),calculateGenotypeCount(ploidy, alleleCount)," ploidy = " + ploidy + " alleleCount = " + alleleCount);
final int genotypeCount = calculator.genotypeCount();
final int testGenotypeCount = Math.min(30000,genotypeCount);
for (int i = 0; i < testGenotypeCount; i++) {
final GenotypeAlleleCounts alleleCounts = calculator.genotypeAlleleCountsAt(i);
Assert.assertNotNull(alleleCounts);
if (i > 0)
Assert.assertTrue(calculator.genotypeAlleleCountsAt(i - 1).compareTo(alleleCounts) < 0);
final int[] alleleArray = new int[ploidy];
int index = 0;
for (int j = 0; j < alleleCounts.distinctAlleleCount(); j++)
Arrays.fill(alleleArray, index, index += alleleCounts.alleleCountAt(j), alleleCounts.alleleIndexAt(j));
final int[] alleleCountArray = new int[alleleCounts.distinctAlleleCount() << 1];
alleleCounts.copyAlleleCounts(alleleCountArray,0);
Assert.assertEquals(index,ploidy);
Assert.assertEquals(calculator.allelesToIndex(alleleArray),i);
Assert.assertEquals(calculator.alleleCountsToIndex(alleleCountArray),i);
}
}
@Test(dataProvider = "ploidyAndMaximumAlleleAndReadCountsData", dependsOnMethods = "testPloidyAndMaximumAllele")
public void testLikelihoodCalculation(final int ploidy, final int alleleCount, final int[] readCount) {
final ReadLikelihoods<Allele> readLikelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount,readCount);
final GenotypeLikelihoodCalculator calculator = GenotypeLikelihoodCalculators.getInstance(ploidy, alleleCount);
final int genotypeCount = calculator.genotypeCount();
final int testGenotypeCount = Math.min(30000,genotypeCount);
final int sampleCount = readCount.length;
for (int s = 0; s < sampleCount ; s++) {
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = readLikelihoods.sampleMatrix(s);
final GenotypeLikelihoods genotypeLikelihoods = calculator.genotypeLikelihoods(sampleLikelihoods);
final double[] genotypeLikelihoodsDoubles = genotypeLikelihoods.getAsVector();
Assert.assertEquals(genotypeLikelihoodsDoubles.length,genotypeCount);
for (int i = 0; i < testGenotypeCount; i++) {
final GenotypeAlleleCounts genotypeAlleleCounts = calculator.genotypeAlleleCountsAt(i);
Assert.assertNotNull(genotypeLikelihoods);
final double[] readGenotypeLikelihoods = new double[sampleLikelihoods.readCount()];
for (int r = 0; r < sampleLikelihoods.readCount(); r++) {
final double[] compoments = new double[genotypeAlleleCounts.distinctAlleleCount()];
for (int ar = 0; ar < genotypeAlleleCounts.distinctAlleleCount(); ar++) {
final int a = genotypeAlleleCounts.alleleIndexAt(ar);
final int aCount = genotypeAlleleCounts.alleleCountAt(ar);
final double readLk = sampleLikelihoods.get(a, r);
compoments[ar] = readLk + Math.log10(aCount);
}
readGenotypeLikelihoods[r] = MathUtils.approximateLog10SumLog10(compoments) - Math.log10(ploidy);
}
final double genotypeLikelihood = MathUtils.sum(readGenotypeLikelihoods);
Assert.assertEquals(genotypeLikelihoodsDoubles[i], genotypeLikelihood, 0.0001);
}
}
}
// Simple inefficient calculation of the genotype count given the ploidy.
private int calculateGenotypeCount(final int ploidy, final int alleleCount) {
if (ploidy == 0)
return 0;
else if (ploidy == 1)
return alleleCount;
else if (ploidy == 2)
return ((alleleCount) * (alleleCount + 1)) >> 1;
else if (alleleCount == 0)
return 0;
else {
return calculateGenotypeCount(ploidy - 1, alleleCount) +
calculateGenotypeCount(ploidy, alleleCount - 1);
}
}
private static final int[] MAXIMUM_ALLELE = new int[] { 1, 2, 5, 6 };
private static final int[] PLOIDY = new int[] { 1, 2, 3, 20 };
private static final int[][] READ_COUNTS = new int[][] {
{ 10 , 100, 50 },
{ 0, 100, 10, 1 , 50 },
{ 1, 2, 3, 4, 20 },
{ 10, 0 },
};
@DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData")
public Object[][] ploidyAndMaximumAlleleAndReadCountsData() {
final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length * READ_COUNTS.length][];
int index = 0;
for (final int i : PLOIDY)
for (final int j : MAXIMUM_ALLELE)
for (final int[] k : READ_COUNTS)
result[index++] = new Object[] { i, j, k };
return result;
}
@DataProvider(name="ploidyAndMaximumAlleleData")
public Object[][] ploidyAndMaximumAlleleData() {
final Object[][] result = new Object[PLOIDY.length * MAXIMUM_ALLELE.length][];
int index = 0;
for (final int i : PLOIDY)
for (final int j : MAXIMUM_ALLELE)
result[index++] = new Object[] { i, j };
return result;
}
}

View File

@ -0,0 +1,103 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
/**
* Test {@link org.broadinstitute.gatk.tools.walkers.genotyper.InfiniteRandomMatingPopulationModel}
*/
public class GenotypingDataUnitTest {
@Test(dataProvider="ploidyAndMaximumAlleleAndReadCountsData")
public void testInstantiation(final int[] ploidies, final int[] readCounts) {
final ReadLikelihoods<Allele> likelihoods = ReadLikelihoodsUnitTester.readLikelihoods(2,readCounts);
final SampleList sampleList = likelihoods;
final PloidyModel ploidyModel = new HeterogeneousPloidyModel(sampleList,ploidies);
final GenotypingData<Allele> data = new GenotypingData<>(ploidyModel,likelihoods);
Assert.assertTrue(AlleleListUtils.equals(data, likelihoods));
Assert.assertTrue(SampleListUtils.equals(data, likelihoods));
Assert.assertEquals(data.readLikelihoods(),likelihoods);
Assert.assertEquals(data.ploidyModel(),ploidyModel);
}
private static final int[][] PLOIDIES = new int[][]{
{1, 1, 1, 1},
{1, 2, 3, 4},
{2, 2, 2, 2},
{2, 1, 2, 1},
{1},
{2},
{},
};
private static final int[][] READ_COUNTS = new int[][] {
{ 10 , 100, 50, 20 },
{ 0, 100, 10, 1 },
{ 1, 2, 3, 4 },
{ 10, 20, 50, 40 },
{ 10 },
{ 20 },
{ }
};
@DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData")
public Object[][] ploidyAndMaximumAlleleAndReadCountsData() {
final List<Object[]> result = new ArrayList<>(PLOIDIES.length * 2);
for (int i = 0; i < PLOIDIES.length; i++)
result.add(new Object[] {PLOIDIES[i], READ_COUNTS[i]});
return result.toArray(new Object[0][]);
}
}

View File

@ -0,0 +1,122 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.tools.walkers.genotyper.PloidyModel;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
/**
* General heterogeneous ploidy model.
*
* <p>
* Currenly only avaialable for testing but will be promoted at some point and have its own unit test.
* </p>
*/
public class HeterogeneousPloidyModel implements PloidyModel {
private final SampleList sampleList;
private final int[] ploidies;
private final int ploidySum;
private final boolean isHomogeneous;
public HeterogeneousPloidyModel(final SampleList sampleList, final int[] ploidies) {
if (sampleList == null)
throw new IllegalArgumentException("the sample list cannot be null");
if (ploidies == null)
throw new IllegalArgumentException("the ploidies cannot be null");
if (sampleList.sampleCount() != ploidies.length)
throw new IllegalArgumentException("sample-list and ploidy array length must match");
this.ploidies = ploidies.clone();
int ploidySum = 0;
for (int i = 0; i < ploidies.length; i++) {
final int p = this.ploidies[i];
if (p < 0)
throw new IllegalArgumentException("no ploidy can be less than 0");
ploidySum += p;
}
this.ploidySum = ploidySum;
isHomogeneous = ploidies.length == 0 || ploidies.length * this.ploidies[0] == ploidySum;
this.sampleList = sampleList;
}
@Override
public int samplePloidy(final int sampleIndex) {
if (sampleIndex < 0 || sampleIndex > ploidies.length)
throw new IllegalArgumentException("invalid sample index: " + sampleIndex);
return ploidies[sampleIndex];
}
@Override
public boolean isHomogeneous() {
return isHomogeneous;
}
@Override
public int totalPloidy() {
return ploidySum;
}
@Override
public int sampleCount() {
return ploidies.length;
}
@Override
public int sampleIndex(final String sample) {
return sampleList.sampleIndex(sample);
}
@Override
public String sampleAt(int sampleIndex) {
return sampleList.sampleAt(sampleIndex);
}
}

View File

@ -0,0 +1,92 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
/**
* Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.HomogeneousPloidyModel}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class HomogeneousPloidyModelUnitTest {
private static final int[] PLOIDY = new int[] { 1, 2, 3, 7, 10};
private static final int[] SAMPLE_COUNT = new int[] { 0, 1, 3, 4, 5, 6, 10, 101};
@Test(dataProvider = "ploidyAndSampleListData")
public void testPloidyAndSampleList(final int ploidy, final int sampleCount) {
final List<String> sampleNames = new ArrayList<>(sampleCount);
for (int i = 0; i < sampleCount; i++)
sampleNames.add("SAMPLE_" + i);
final IndexedSampleList sampleList = new IndexedSampleList(sampleNames);
final HomogeneousPloidyModel ploidyModel = new HomogeneousPloidyModel(sampleList,ploidy);
Assert.assertTrue(ploidyModel.isHomogeneous());
Assert.assertEquals(ploidyModel.totalPloidy(),sampleCount * ploidy);
for (int i = 0; i < sampleCount; i++)
Assert.assertEquals(ploidyModel.samplePloidy(i),ploidy);
SampleListUnitTester.assertSampleList(ploidyModel,sampleNames);
}
@DataProvider(name="ploidyAndSampleListData")
public Object[][] ploidyAndSampleListData() {
final Object[][] result = new Object[PLOIDY.length * SAMPLE_COUNT.length][];
int index = 0;
for (int i = 0; i < PLOIDY.length; i++)
for (int j = 0; j < SAMPLE_COUNT.length; j++ )
result[index++] = new Object[] { PLOIDY[i], SAMPLE_COUNT[j]};
return result;
}
}

View File

@ -0,0 +1,102 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
import static org.broadinstitute.gatk.tools.walkers.genotyper.AlleleListUnitTester.assertAlleleList;
/**
* Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedAlleleListUnitTest {
@Test
public void testEmptyConstructor() {
final IndexedAlleleList<Allele> subject = new IndexedAlleleList<>();
assertAlleleList(subject, Collections.EMPTY_LIST);
}
@Test(dataProvider= "alleleCountMaxAlleleLengthData")
public void testArrayConstructor(final int alleleCount, final int maxAlleleLength) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount, maxAlleleLength);
final LinkedHashSet<Allele> nonRepeatedAlleles = new LinkedHashSet<>(Arrays.asList(alleles));
final IndexedAlleleList<Allele> subject = new IndexedAlleleList<>(alleles);
assertAlleleList(subject, Arrays.asList(nonRepeatedAlleles.toArray(new Allele[nonRepeatedAlleles.size()])));
}
@Test(dataProvider= "alleleCountMaxAlleleLengthData")
public void testCollectionConstructor(final int alleleCount, final int maxAlleleLength) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount, maxAlleleLength);
final List<Allele> alleleList = Arrays.asList(alleles);
final LinkedHashSet<Allele> nonRepeatedAlleles = new LinkedHashSet<>(Arrays.asList(alleles));
final IndexedAlleleList<Allele> subject = new IndexedAlleleList<>(alleleList);
assertAlleleList(subject, Arrays.asList(nonRepeatedAlleles.toArray(new Allele[nonRepeatedAlleles.size()])));
}
private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20};
private static final int[] MAX_ALLELE_LENGTH = { 1, 2, 3, 10 };
@DataProvider(name="alleleCountMaxAlleleLengthData")
public Object[][] alleleCountMaxAlleleLengthData() {
final Object[][] result = new Object[SAMPLE_COUNT.length * MAX_ALLELE_LENGTH.length][];
int nextIndex = 0;
for (int i = 0; i < SAMPLE_COUNT.length; i++)
for (int j = 0; j < MAX_ALLELE_LENGTH.length; j++)
result[nextIndex++] = new Object[] { SAMPLE_COUNT[i], MAX_ALLELE_LENGTH[j]};
return result;
}
}

View File

@ -0,0 +1,131 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Tests {@link org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedSampleListUnitTest {
@Test
public void testEmptyConstructor() {
final IndexedSampleList subject = new IndexedSampleList();
SampleListUnitTester.assertSampleList(subject, Collections.EMPTY_LIST);
}
@Test(dataProvider="sampleCountMaxSampleIndexData")
public void testArrayConstructor(final int sampleCount, final int maxSampleIndex) {
final String[] sampleNames = generateSampleNames(sampleCount,maxSampleIndex);
final LinkedHashSet<String> nonRepeatedNames = new LinkedHashSet<>(Arrays.asList(sampleNames));
final IndexedSampleList subject = new IndexedSampleList(sampleNames);
SampleListUnitTester.assertSampleList(subject, Arrays.asList(nonRepeatedNames.toArray(new String[nonRepeatedNames.size()])));
}
@Test(dataProvider="sampleCountMaxSampleIndexData")
public void testCollectionConstructor(final int sampleCount, final int maxSampleIndex) {
final String[] sampleNames = generateSampleNames(sampleCount,maxSampleIndex);
final List<String> sampleNameList = Arrays.asList(sampleNames);
final LinkedHashSet<String> nonRepeatedNames = new LinkedHashSet<>(Arrays.asList(sampleNames));
final IndexedSampleList subject = new IndexedSampleList(sampleNameList);
SampleListUnitTester.assertSampleList(subject, Arrays.asList(nonRepeatedNames.toArray(new String[nonRepeatedNames.size()])));
}
/**
* Generate testing sample names.
*
* <p>
* Basically all have a common prefix "SAMPLE_" followed by a numeric index.
* </p>
*
* <p>
* With {@code maxSampleIndex} you can force to have some repeated sample names;
* (if {@code sampleCount < maxSampleIndex}.
* </p>
*
* @param sampleCount number of sample names to generate.
* @param maxSampleIndex the maximum sample numeric index.
*
* @throws RuntimeException if {@code sampleCount} or {@code maxSampleIndex} are negative.
* @return never {@code null}.
*/
private String[] generateSampleNames(final int sampleCount, final int maxSampleIndex) {
final String[] result = new String[sampleCount];
for (int i = 0; i < sampleCount; i++)
result[i] = "SAMPLE_" + rnd.nextInt(maxSampleIndex + 1);
return result;
}
private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20};
private static final int[] MAX_SAMPLE_INDEX = { 0, 1, 4, 9, 10000};
private static final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
@DataProvider(name="sampleCountMaxSampleIndexData")
public Object[][] sampleCountMaxSampleIndexData() {
final Object[][] result = new Object[SAMPLE_COUNT.length * MAX_SAMPLE_INDEX.length][];
int nextIndex = 0;
for (int i = 0; i < SAMPLE_COUNT.length; i++)
for (int j = 0; j < MAX_SAMPLE_INDEX.length; j++)
result[nextIndex++] = new Object[] { SAMPLE_COUNT[i], MAX_SAMPLE_INDEX[j]};
return result;
}
}

View File

@ -0,0 +1,145 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* Test {@link org.broadinstitute.gatk.tools.walkers.genotyper.InfiniteRandomMatingPopulationModel}
*/
public class InfiniteRandomMatingPopulationModelUnitTest {
@Test(dataProvider="ploidyAndMaximumAlleleAndReadCountsData")
public void testCalculateLikelihoods(final int[] ploidies, final int alleleCount, final int discardAlleleCount, final int[] readCounts) {
final ReadLikelihoods<Allele> likelihoods = ReadLikelihoodsUnitTester.readLikelihoods(alleleCount,readCounts);
final AlleleList<Allele> genotypingAlleleList = discardAlleleCount == 0 ? likelihoods : discardAllelesAtRandom(likelihoods,discardAlleleCount);
final SampleList sampleList = SampleListUnitTester.sampleList(ploidies.length);
final PloidyModel ploidyModel = new HeterogeneousPloidyModel(sampleList,ploidies);
final GenotypingData<Allele> data = new GenotypingData<>(ploidyModel,likelihoods);
final InfiniteRandomMatingPopulationModel model = new InfiniteRandomMatingPopulationModel();
final GenotypingLikelihoods<Allele> gLikelihoods = model.calculateLikelihoods(genotypingAlleleList,data);
Assert.assertNotNull(gLikelihoods);
AlleleListUnitTester.assertAlleleList(gLikelihoods, AlleleListUtils.asList(genotypingAlleleList));
SampleListUnitTester.assertSampleList(gLikelihoods,SampleListUtils.asList(sampleList));
final int sampleCount = gLikelihoods.sampleCount();
for (int i = 0; i < sampleCount; i++) {
final GenotypeLikelihoods sampleLikelihoods = gLikelihoods.sampleLikelihoods(i);
Assert.assertNotNull(sampleLikelihoods);
final double[] values = sampleLikelihoods.getAsVector();
Assert.assertNotNull(values);
Assert.assertEquals(values.length, GenotypeLikelihoodCalculators.getInstance(ploidies[i], genotypingAlleleList.alleleCount()).genotypeCount());
for (int j = 0; j < values.length; j++)
Assert.assertTrue(values[j] <= 0);
}
}
private AlleleList<Allele> discardAllelesAtRandom(final AlleleList<Allele> likelihoods, final int discardAlleleCount) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final ArrayList<Allele> subset = new ArrayList<>(AlleleListUtils.asList(likelihoods));
for (int i = 0; i < discardAlleleCount; i++) {
subset.remove(rnd.nextInt(subset.size()));
}
return new IndexedAlleleList<>(subset);
}
/**
* Each entry contains to value, where the first is the total number of alleles and the second
* The number to discard some arbitrary number of alleles for genotyping for the {@link #testCalculateLikelihoods}.
*/
private static final int[][] ALLELE_COUNTS = new int[][] {
{1, 0},
{2, 1},
{5, 2},
{10, 4},
{1, 0},
{2, 1},
{10, 7}
};
private static final int[][] PLOIDIES = new int[][]{
{1, 1, 1, 1},
{1, 2, 3, 4},
{2, 2, 2, 2},
{2, 1, 2, 1},
{1},
{2},
{},
};
private static final int[][] READ_COUNTS = new int[][] {
{ 10 , 100, 50, 20 },
{ 0, 100, 10, 1 },
{ 1, 2, 3, 4 },
{ 10, 20, 50, 40 },
{ 10 },
{ 20 },
{ }
};
@DataProvider(name="ploidyAndMaximumAlleleAndReadCountsData")
public Object[][] ploidyAndMaximumAlleleAndReadCountsData() {
final List<Object[]> result = new ArrayList<>(PLOIDIES.length * 2);
for (int i = 0; i < PLOIDIES.length; i++) {
result.add(new Object[] {PLOIDIES[i], ALLELE_COUNTS[i][0], 0, READ_COUNTS[i]});
final int discardAlleleCount = ALLELE_COUNTS[i][1];
if (discardAlleleCount == 0) continue;
result.add(new Object[] { PLOIDIES[i], ALLELE_COUNTS[i][0], ALLELE_COUNTS[i][1], READ_COUNTS[i]});
}
return result.toArray(new Object[0][]);
}
}

View File

@ -0,0 +1,124 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Constains utilities for tests that need to create read-likelihoods.
*/
public class ReadLikelihoodsUnitTester {
static ReadLikelihoods<Allele> readLikelihoods(final int alleleCount, final int[] readCount) {
final int sampleCount = readCount.length;
final AlleleList<Allele> alleleList = AlleleListUnitTester.alleleList(alleleCount,100,true);
final SampleList sampleList = SampleListUnitTester.sampleList(sampleCount);
final Map<String,List<GATKSAMRecord>> sampleToReads = new HashMap<>(sampleCount);
for (int i = 0; i < sampleCount; i++) {
sampleToReads.put(sampleList.sampleAt(i),readList(i,readCount[i]));
}
final ReadLikelihoods<Allele> likelihoods = new ReadLikelihoods<>(sampleList,alleleList, sampleToReads);
for (int s = 0; s < sampleCount; s++) {
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = likelihoods.sampleMatrix(s);
for (int a = 0; a < alleleCount; a++)
for (int r = 0; r < readCount[s]; r++)
sampleLikelihoods.set(a, r, testLikelihood(s, a, r));
}
return likelihoods;
}
/**
* produces a test likelihood depending on the sample, read and allele index.
*/
private static double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) {
return - Math.abs(3 * (sampleIndex + 1) + 7 * (alleleIndex + 1) + 11 * (readIndex + 1));
}
private static SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 1000);
static List<GATKSAMRecord> readList(final int sampleIndex, final int readCount) {
final List<GATKSAMRecord> reads = new ArrayList<>(readCount);
int readIndex = 0;
for (int j = 0; j < readCount; j++)
reads.add(ArtificialSAMUtils.createArtificialRead(SAM_HEADER, "READ_" + sampleIndex + "_" + (readIndex++), 1, 1, 100));
return reads;
}
/**
* Creates a sampleToReads map given the sample list and the required read counts.
* @param sampleList the target sample-list.
* @param readCounts the target read-counts.
* @return never {@code null}.
*/
public static Map<String,List<GATKSAMRecord>> sampleToReads(final SampleList sampleList, final int[] readCounts) {
final Map<String,List<GATKSAMRecord>> result = new HashMap<>(sampleList.sampleCount());
int readIndex = 0;
for (int i = 0; i < sampleList.sampleCount(); i++) {
final int readCount = readCounts[i];
final String sample = sampleList.sampleAt(i);
final List<GATKSAMRecord> records = new ArrayList<>(readCount);
for (int j = 0; j < readCount; j++)
records.add(ArtificialSAMUtils.createArtificialRead(SAM_HEADER,"READ_" + (readIndex++),1,1,100));
result.put(sample,records);
}
return result;
}
}

View File

@ -0,0 +1,122 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.testng.Assert;
import java.util.*;
/**
* Helper class for those unit-test classes that test on implementations of SampleList.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class SampleListUnitTester {
/**
* Test that the contents of a sample-list are the ones expected.
*
* <p>
* This method perform various consistency check involving all the {@link org.broadinstitute.gatk.tools.walkers.genotyper.SampleList} interface methods.
* Therefore calling this method is equivalent to a thorough check of the {@link org.broadinstitute.gatk.tools.walkers.genotyper.SampleList} aspect of
* the {@code actual} argument.
* </p>
*
* @param actual the sample-list to assess.
* @param expected the expected sample-list.
*
* @throws IllegalArgumentException if {@code expected} is {@code null} or contains
* {@code null}s which is an indication of an bug in the testing code.
*
* @throws java.lang.RuntimeException if there is some testing assertion exception which
* is an indication of an actual bug the code that is been tested.
*/
public static void assertSampleList(final SampleList actual, final List<String> expected) {
if (expected == null)
throw new IllegalArgumentException("the expected list cannot be null");
final Set<String> expectedNames = new HashSet<>(expected.size());
Assert.assertNotNull(actual);
Assert.assertEquals(actual.sampleCount(),expected.size());
for (int i = 0; i < expected.size(); i++) {
final String expectedSample = expected.get(i);
if (expectedSample == null)
throw new IllegalArgumentException("the expected sample cannot be null");
if (expectedSample.equals(NEVER_USE_SAMPLE_NAME))
throw new IllegalArgumentException("you cannot use the forbidden sample name");
if (expectedNames.contains(expected.get(i)))
throw new IllegalArgumentException("repeated names in the expected list, this is a test bug");
final String actualSample = actual.sampleAt(i);
Assert.assertNotNull(actualSample,"sample name cannot be null");
Assert.assertFalse(expectedNames.contains(actualSample),"repeated sample name: " + actualSample);
Assert.assertEquals(actualSample,expectedSample,"wrong sample name order; index = " + i);
Assert.assertEquals(actual.sampleIndex(actualSample),i,"sample index mismatch");
expectedNames.add(actualSample);
}
Assert.assertEquals(actual.sampleIndex(NEVER_USE_SAMPLE_NAME),-1);
}
/**
* Creates a sample list for testing given the number of samples in it.
* @param sampleCount the required sample count.
* @return never {@code null}.
*/
static SampleList sampleList(final int sampleCount) {
if (sampleCount < 0)
throw new IllegalArgumentException("the number of sample cannot be negative");
final List<String> result = new ArrayList<>(sampleCount);
for (int i =0; i < sampleCount; i++)
result.add("SAMPLE_" + i);
return new IndexedSampleList(result);
}
/**
* Save to assume that this sample name will never be used.
*/
private static final String NEVER_USE_SAMPLE_NAME = "WHY_WOULD_YOU_CALL_A_SAMPLE_LIKE_THIS? ArE yOu Crazzzzy? " + new Date().toString();
}

View File

@ -0,0 +1,128 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Test {@link org.broadinstitute.gatk.tools.walkers.genotyper.AlleleListUtils}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class SampleListUtilsUnitTest {
@Test(dataProvider = "singleSampleListData")
public void testAsList(final List<String> samples) {
final SampleList sampleList = new IndexedSampleList(samples);
final List<String> asList = SampleListUtils.asList(sampleList);
Assert.assertEquals(samples, asList);
}
@Test(dataProvider = "twoSampleListData", dependsOnMethods={"testAsList"})
public void testEquals(final List<String> sample2, final List<String> samples2) {
final SampleList sampleList1 = new IndexedSampleList(sample2);
final SampleList sampleList2 = new IndexedSampleList(samples2);
Assert.assertTrue(SampleListUtils.equals(sampleList1, sampleList1));
Assert.assertTrue(SampleListUtils.equals(sampleList2,sampleList2));
Assert.assertEquals(SampleListUtils.equals(sampleList1, sampleList2),
Arrays.equals(SampleListUtils.asList(sampleList1).toArray(new String[sampleList1.sampleCount()]),
SampleListUtils.asList(sampleList2).toArray(new String[sampleList2.sampleCount()]))
);
Assert.assertEquals(SampleListUtils.equals(sampleList1,sampleList2),
SampleListUtils.equals(sampleList2,sampleList1));
}
private List<String>[] sampleLists;
@BeforeClass
public void setUp() {
sampleLists = new List[SAMPLE_COUNT.length];
int nextIndex = 0;
for (int i = 0; i < SAMPLE_COUNT.length; i++) {
final List<String> sampleList = new ArrayList<>(SAMPLE_COUNT[i]);
sampleList.add("SAMPLE_" + i);
sampleLists[nextIndex++] = sampleList;
}
}
private static final int[] SAMPLE_COUNT = { 0, 1, 5, 10, 20};
@DataProvider(name="singleSampleListData")
public Object[][] singleSampleListData() {
final Object[][] result = new Object[sampleLists.length][];
for (int i = 0; i < sampleLists.length; i++)
result[i] = new Object[] { sampleLists[i]};
return result;
}
@DataProvider(name="twoSampleListData")
public Object[][] twoAlleleListData() {
final Object[][] result = new Object[sampleLists.length * sampleLists.length][];
int index = 0;
for (int i = 0; i < sampleLists.length; i++)
for (int j = 0; j < sampleLists.length; j++)
result[index++] = new Object[] { sampleLists[i], sampleLists[j]};
return result;
}
}

View File

@ -50,15 +50,15 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
// the imports for unit testing.
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.Utils;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
@ -76,9 +76,9 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
engine.setArguments(new GATKArgumentCollection());
final UnifiedArgumentCollection args = new UnifiedArgumentCollection();
final Set<String> fakeSamples = Collections.singleton("fake");
final SampleList fakeSamples = SampleListUtils.singletonList("fake");
ugEngine = new UnifiedGenotypingEngine(engine, args,fakeSamples);
ugEngine = new UnifiedGenotypingEngine(args,fakeSamples,engine.getGenomeLocParser(),engine.getArguments().BAQMode);
}
private UnifiedGenotypingEngine getEngine() {
@ -87,7 +87,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
@DataProvider(name = "ReferenceQualityCalculation")
public Object[][] makeReferenceQualityCalculation() {
List<Object[]> tests = new ArrayList<Object[]>();
final List<Object[]> tests = new ArrayList<>();
// this functionality can be adapted to provide input data for whatever you might want in your data
final double p = Math.log10(0.5);
@ -114,7 +114,7 @@ public class UnifiedGenotyperEngineUnitTest extends BaseTest {
for ( Integer numAltAlleles = 0; numAltAlleles < 100; numAltAlleles++ ) {
Set<Allele> alleles = new HashSet<Allele>();
final Set<Allele> alleles = new HashSet<>();
alleles.add(Allele.create("A", true)); // ref allele
for (int len = 1; len <=numAltAlleles; len++) {

View File

@ -69,12 +69,12 @@ public class UnifiedGenotyperGeneralPloidySuite1IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testBOTH_GGA_Pools() {
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "05b8af0db7b009721df209eea96bdf1a");
executor.PC_LSV_Test(String.format(" -maxAltAlleles 2 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_BOTH_GGA", "BOTH", "4b646b6fc9c5c2ef88433a5b350310fe");
}
@Test(enabled = true)
public void testINDEL_GGA_Pools() {
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "1ac510860b295d66e1da7b27ba7cafb8");
executor.PC_LSV_Test(String.format(" -maxAltAlleles 1 -ploidy 24 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s", LSV_ALLELES), "LSV_INDEL_GGA", "INDEL", "171355e4d0648fdd50d7d56de950d338");
}
@Test(enabled = true)

View File

@ -68,6 +68,6 @@ public class UnifiedGenotyperGeneralPloidySuite2IntegrationTest extends WalkerTe
@Test(enabled = true)
public void testMT_SNP_GGA_sp10() {
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "654059dda19cb2cf546097e44753ea14");
executor.PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "0f6fdf60d7f93b2db8c8cb92c1fd3e00");
}
}

View File

@ -47,6 +47,7 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import com.google.caliper.Param;
import com.google.caliper.SimpleBenchmark;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.utils.pairhmm.ActiveRegionTestDataSet;
import org.broadinstitute.gatk.utils.pairhmm.FastLoglessPairHMM;
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
@ -112,7 +113,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
public void timeGraphBasedLikelihoods(final int reps) {
for (int i = 0; i < reps; i++) {
final GraphBasedLikelihoodCalculationEngineInstance rtlce = new GraphBasedLikelihoodCalculationEngineInstance(dataSet.assemblyResultSet(), new FastLoglessPairHMM((byte)10),Double.NEGATIVE_INFINITY,HeterogeneousKmerSizeResolution.COMBO_MAX);
rtlce.computeReadLikelihoods(dataSet.haplotypeList(), Collections.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
rtlce.computeReadLikelihoods(dataSet.haplotypeList(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}
@ -121,7 +122,7 @@ public class HCLikelihoodCalculationEnginesBenchmark extends SimpleBenchmark {
for (int i = 0; i < reps; i++) {
final PairHMMLikelihoodCalculationEngine engine = new PairHMMLikelihoodCalculationEngine((byte) 10,
PairHMM.HMM_IMPLEMENTATION.LOGLESS_CACHING, -3, true, PairHMMLikelihoodCalculationEngine.PCR_ERROR_MODEL.NONE);
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), Collections.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
engine.computeReadLikelihoods(dataSet.assemblyResultSet(), SampleListUtils.singletonList("anonymous"), Collections.singletonMap("anonymous", dataSet.readList()));
}
}

View File

@ -47,18 +47,36 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import org.broadinstitute.gatk.engine.walkers.WalkerTest;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
@DataProvider(name = "MyDataProviderHaploid")
public Object[][] makeMyDataProviderHaploid() {
List<Object[]> tests = new ArrayList<>();
final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000";
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "5cc1858896aca6683282f53054bb7a61"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "010a747f5c41ddb7889168e499eb40bb"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "d7dbc1c8e11a277e9db857eb766fd2c6"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "799752d88c4e15e19a953add764d2239"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "fa057b35d6fe9588c2653b6560d6e3c2"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "d10e8907594414890cbf80d282426812"});
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "MyDataProvider")
public Object[][] makeMyDataProvider() {
List<Object[]> tests = new ArrayList<>();
@ -77,6 +95,24 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "MyDataProviderTetraploid")
public Object[][] makeMyDataProviderTetraploid() {
List<Object[]> tests = new ArrayList<>();
final String PCRFreeIntervals = "-L 20:10,000,000-10,010,000";
final String WExIntervals = "-L 20:10,000,000-10,100,000 -isr INTERSECTION -L " + hg19Chr20Intervals;
// this functionality can be adapted to provide input data for whatever you might want in your data
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.NONE, PCRFreeIntervals, "6e157b6fdf4071fcb7da74f40146a611"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "354b84dbfaf55947aea40865e74ce66b"});
tests.add(new Object[]{NA12878_PCRFREE, ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "fc4b7e6528747cb20e0c92699a0787cb"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.NONE, WExIntervals, "6e0f5d82b77ea79a639d43b2db70e751"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "a3daf472f7ab16667e5f6dab1af392ff"});
tests.add(new Object[]{NA12878_WEx, ReferenceConfidenceMode.GVCF, WExIntervals, "af9230fa56752b732572ce956101a2be"});
return tests.toArray(new Object[][]{});
}
/**
* Example testng test using MyDataProvider
*/
@ -86,7 +122,31 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCF bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
final Pair<List<File>,List<String>> executionOutput = executeTest(name, spec);
executeTest(name, spec);
}
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "MyDataProviderHaploid", enabled=false)
public void testHCWithGVCFHaploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 1 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCFHaploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
}
/**
* Example testng test using MyDataProvider
*/
@Test(dataProvider = "MyDataProviderTetraploid", enabled=false)
public void testHCWithGVCFTetraploid(final String bam, final ReferenceConfidenceMode mode, final String intervals, final String md5) {
final String commandLine = String.format("-T HaplotypeCaller -ploidy 4 --disableDithering --pcr_indel_model NONE -R %s -I %s %s -ERC %s --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d",
b37KGReference, bam, intervals, mode, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final String name = "testHCWithGVCFTetraploid bam=" + bam + " intervals= " + intervals + " gvcf= " + mode;
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList(md5));
executeTest(name, spec);
}
@Test
@ -144,6 +204,11 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
private static final String NOCALL_GVCF_BUGFIX_INTERVALS = privateTestDir + "gvcf_nocall_bug.interval_list";
private static final String NOCALL_GVCF_BUGFIX_BAM = privateTestDir + "gvcf_nocall_bug.bam";
private static final String GENERAL_PLOIDY_BUGFIX1_BAM = privateTestDir + "general-ploidy-arrayindex-bug-1.bam";
private static final String GENERAL_PLOIDY_BUGFIX1_INTERVALS = privateTestDir + "general-ploidy-arrayindex-bug-1.intervals";
private static final String GENERAL_PLOIDY_BUGFIX2_BAM = privateTestDir + "general-ploidy-arrayindex-bug-2.bam";
private static final String GENERAL_PLOIDY_BUGFIX2_INTERVALS = privateTestDir + "general-ploidy-arrayindex-bug-2.intervals";
@Test
public void testNoCallGVCFMissingPLsBugFix() {
@ -153,4 +218,23 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
spec.disableShadowBCF();
executeTest("testNoCallGVCFMissingPLsBugFix", spec);
}
@Test(enabled=false)
public void testGeneralPloidyArrayIndexBug1Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 1 -maxAltAlleles 2 -isr INTERSECTION -L 1:23696115-23696189",
b37KGReference, GENERAL_PLOIDY_BUGFIX1_BAM, GENERAL_PLOIDY_BUGFIX1_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug1Fix", spec);
}
@Test(enabled=false)
public void testGeneralPloidyArrayIndexBug2Fix() {
final String commandLine = String.format("-T HaplotypeCaller --pcr_indel_model NONE -R %s -I %s -L %s -ERC GVCF --no_cmdline_in_header -variant_index_type %s -variant_index_parameter %d -ploidy 2 -maxAltAlleles 2 -A DepthPerSampleHC -A StrandBiasBySample -L 1:38052860-38052937",
b37KGReference, GENERAL_PLOIDY_BUGFIX2_BAM, GENERAL_PLOIDY_BUGFIX2_INTERVALS, HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("7c263d77bf831551366c6e36233b46ce"));
spec.disableShadowBCF();
executeTest(" testGeneralPloidyArrayIndexBug2Fix", spec);
}
}

View File

@ -93,24 +93,55 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
HCTest(NA12878_BAM, "", "42de756c08b028be70287ada1022526e");
}
@Test
public void testHaplotypeCallerMultiSampleHaploid() {
HCTest(CEUTRIO_BAM,
"-ploidy 1", "b9e43506af628768fc9fd1ced49822b1");
}
@Test
public void testHaplotypeCallerSingleSampleHaploid() {
HCTest(NA12878_BAM, "-ploidy 1", "fb584b8c3f371ee2e438a3fc2335b26f");
}
@Test
public void testHaplotypeCallerSingleSampleTetraploid() {
HCTest(NA12878_BAM, "-ploidy 4", "d450b486c76520f9c803c603f25563e4");
}
@Test
public void testHaplotypeCallerMinBaseQuality() {
HCTest(NA12878_BAM, "-mbq 15", "d063c0e5af1fd413be0500609ae36d46");
}
@Test
public void testHaplotypeCallerMinBaseQualityHaploid() {
HCTest(NA12878_BAM, "-mbq 15 -ploidy 1", "40259040f6febd8ea5931132cf5d8958");
}
@Test
public void testHaplotypeCallerMinBaseQualityTetraploid() {
HCTest(NA12878_BAM, "-mbq 15 -ploidy 4", "ca11eae5def67ca9717d129348e4cda7");
}
@Test
public void testHaplotypeCallerGraphBasedSingleSample() {
HCTest(NA12878_BAM, "-likelihoodEngine GraphBased", "6cf15ddbfa4a3738e891fd9a09da8d07");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSampleHaploid() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased -ploidy 1", "f0677e5a2882f947f437e8d2049172cb");
}
@Test
public void testHaplotypeCallerGraphBasedMultiSample() {
HCTest(CEUTRIO_BAM, "-likelihoodEngine GraphBased", "4c2a2dad6379b13fee4c7faca17441f5");
}
@Test(enabled = false) // can't annotate the rsID's yet
@Test
public void testHaplotypeCallerSingleSampleWithDbsnp() {
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "");
HCTest(NA12878_BAM, "-D " + b37dbSNP132, "9d7067648561aa35b04d355184a5dea2");
}
@Test
@ -120,6 +151,18 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
"669aac2aa9c22881eda86ee53b13351a");
}
@Test
public void testHaplotypeCallerMultiSampleGGAHaploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 1 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"e50c55c65db3fa55c75ba03b4dd2f1a8");
}
@Test
public void testHaplotypeCallerMultiSampleGGATetraploid() {
HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -ploidy 4 -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
"374d6db6e5f3f4fdb5ede26a529caa8b");
}
@Test
public void testHaplotypeCallerInsertionOnEdgeOfContig() {
HCTest(CEUTRIO_MT_TEST_BAM, "-L MT:1-10", "7f1fb8f9587f64643f6612ef1dd6d4ae");
@ -265,7 +308,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + hg19Reference + " --no_cmdline_in_header -I " + NA12878_PCRFREE250_ADAPTER_TRIMMED + " -o %s -L 20:10,024,000-10,024,500 "
, 1,
Arrays.asList(""));
executeTest("HC calling with dbSNP ID annotation on WEx intervals", spec);
executeTest("HCTestGraphBasedPCRFreePositiveLogLkFix", spec);
}
// --------------------------------------------------------------------------------------------------------------
@ -346,5 +389,4 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
executeTest("testDifferentIndelLocationsDueToSWExactDoubleComparisonsFix::longInterval",longSpec);
}
}

View File

@ -47,6 +47,7 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleListUtils;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.readthreading.HaplotypeGraph;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
@ -262,7 +263,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
dataSet = (ActiveRegionTestDataSet) params[0];
if (INTRODUCE_READ_ERRORS) dataSet.introduceErrors(new Random(13));
graphEngine = new GraphBasedLikelihoodCalculationEngineInstance(dataSet.assemblyResultSet(),hmm,Double.NEGATIVE_INFINITY, HeterogeneousKmerSizeResolution.COMBO_MAX);
graphLks = graphEngine.computeReadLikelihoods(dataSet.haplotypeList(),Collections.singletonList("anonymous"),Collections.singletonMap("anonymous",dataSet.readList())).toPerReadAlleleLikelihoodMap(0);
graphLks = graphEngine.computeReadLikelihoods(dataSet.haplotypeList(), SampleListUtils.singletonList("anonymous"),Collections.singletonMap("anonymous",dataSet.readList())).toPerReadAlleleLikelihoodMap(0);
// clip reads at the anchors.
final Map<GATKSAMRecord,GATKSAMRecord> clippedReads = anchorClippedReads(graphEngine.getHaplotypeGraph(),dataSet.readList());
@ -272,7 +273,7 @@ public class ReadThreadingLikelihoodCalculationEngineUnitTest extends ActiveRegi
clippedReadList.add(clippedReads.containsKey(r) ? clippedReads.get(r) : r);
}
loglessLks = fullPairHMM.computeReadLikelihoods(dataSet.assemblyResultSet(),Collections.singletonList("anonymous"),Collections.singletonMap("anonymous",clippedReadList)).toPerReadAlleleLikelihoodMap(0);
loglessLks = fullPairHMM.computeReadLikelihoods(dataSet.assemblyResultSet(),SampleListUtils.singletonList("anonymous"),Collections.singletonMap("anonymous",clippedReadList)).toPerReadAlleleLikelihoodMap(0);
// Change clipped by unclipped in the resulting likelihood map.
for (final GATKSAMRecord r : clippedReads.keySet()) {

View File

@ -47,11 +47,12 @@
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
import htsjdk.samtools.SAMFileHeader;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.UnvalidatingGenomeLoc;
import org.broadinstitute.gatk.utils.Utils;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
import htsjdk.variant.variantcontext.GenotypeType;
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
@ -61,7 +62,7 @@ import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
@ -75,7 +76,7 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final String RGID = "ID1";
GATKSAMReadGroupRecord rg;
final String sample = "NA12878";
final Set<String> samples = Collections.singleton(sample);
final SampleList samples = SampleListUtils.singletonList(sample);
SAMFileHeader header;
ReferenceConfidenceModel model;
@ -179,12 +180,12 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
@Test
public void testIndelLikelihoods() {
GenotypeLikelihoods prev = model.getIndelPLs(0);
GenotypeLikelihoods prev = model.getIndelPLs(HomoSapiensConstants.DEFAULT_PLOIDY,0);
Assert.assertEquals(prev.getAsPLs(), new int[]{0, 0, 0});
Assert.assertEquals(-10 * prev.getLog10GQ(GenotypeType.HOM_REF), 0.0);
for ( int i = 1; i <= ReferenceConfidenceModel.MAX_N_INDEL_INFORMATIVE_READS; i++ ) {
final GenotypeLikelihoods current = model.getIndelPLs(i);
final GenotypeLikelihoods current = model.getIndelPLs(HomoSapiensConstants.DEFAULT_PLOIDY,i);
final double prevGQ = -10 * prev.getLog10GQ(GenotypeType.HOM_REF);
final double currGQ = -10 * current.getLog10GQ(GenotypeType.HOM_REF);
Assert.assertTrue(prevGQ < currGQ, "GQ Failed with prev " + prev + " curr " + current + " at " + i);
@ -288,15 +289,20 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final List<Integer> expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
@Test
public void testRefConfidencePartialReads() {
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
final String ref = "ACGTAACCGGTT";
for ( int readLen = 3; readLen < ref.length(); readLen++ ) {
for ( int start = 0; start < ref.length() - readLen; start++ ) {
@ -305,11 +311,11 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final List<VariantContext> calls = Collections.emptyList();
data.getActiveRegion().add(data.makeRead(start, readLen));
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final List<Integer> expectedDPs = new ArrayList<>(Collections.nCopies(data.getActiveRegion().getLocation().size(), 0));
for ( int i = start; i < readLen + start; i++ ) expectedDPs.set(i, 1);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}
@ -321,6 +327,9 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
final int start = xxxdata.getStart();
final int stop = xxxdata.getEnd();
final PloidyModel ploidyModel = new HomogeneousPloidyModel(samples,2);
final GenotypingModel genotypingModel = new InfiniteRandomMatingPopulationModel();
for ( int nReads = 0; nReads < 2; nReads++ ) {
final VariantContext vcStart = GATKVariantContextUtils.makeFromAlleles("test", "chr1", start, Arrays.asList("A", "C"));
@ -340,10 +349,10 @@ public class ReferenceConfidenceModelUnitTest extends BaseTest {
data.getActiveRegion().add(data.makeRead(0, data.getRefLength()));
}
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), new ArrayList<>(samples), data.getActiveRegion());
final ReadLikelihoods<Haplotype> likelihoods = HaplotypeCaller.createDummyStratifiedReadMap(data.getRefHap(), samples, data.getActiveRegion());
final List<Integer> expectedDPs = Collections.nCopies(data.getActiveRegion().getLocation().size(), nReads);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, calls);
final List<VariantContext> contexts = model.calculateRefConfidence(data.getRefHap(), haplotypes, data.getPaddedRefLoc(), data.getActiveRegion(), likelihoods, ploidyModel, genotypingModel, calls);
checkReferenceModelResult(data, contexts, expectedDPs, calls);
}
}

View File

@ -56,7 +56,8 @@ import java.util.Random;
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class RandomDNA {
public class
RandomDNA {
private Random random;
@ -73,6 +74,19 @@ public class RandomDNA {
random = new Random();
}
/**
* Creates a new random DNA generator given a random number generator.
* @param rnd the underlying random number generator.
*
* @throws IllegalArgumentException if {@code rnd} is {@code null}.
*/
public RandomDNA(final Random rnd) {
if (rnd == null)
throw new IllegalArgumentException("the random number generator cannot be null");
random = rnd;
}
/**
* Constructs a new random DNA generator providing a seed.
*

View File

@ -0,0 +1,281 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.utils.collections;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.*;
/**
* Tests the working of {@link IndexedSet}
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedSetUnitTest {
@Test(dataProvider = "initialCapacityElementCountMaxElementData")
public void testCompositionBySingleElementAddition(final int initialCapacity,
final int elementCount, final int maxElement) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final IndexedSet<Integer> subject = new IndexedSet<>(initialCapacity);
final Set<Integer> elementSet = new LinkedHashSet<>();
for (int i = 0; i < elementCount; i++) {
final int nextElement = rnd.nextInt(maxElement + 1);
final boolean isNewElement = ! elementSet.contains(nextElement);
Assert.assertEquals(subject.add(nextElement), elementSet.add(nextElement));
Assert.assertEquals(subject.size(),elementSet.size());
if (isNewElement)
Assert.assertEquals(subject.indexOf(nextElement),elementSet.size() - 1);
}
assertEquals(subject, elementSet);
}
@Test(dataProvider = "initialCapacityElementCountMaxElementData")
public void testCompositionByCollectionAddition(final int initialCapacity,
final int elementCount, final int maxElement) {
final IndexedSet<Integer> subject = new IndexedSet<>(initialCapacity);
final List<Integer> elementList = generateElementCollection(elementCount,maxElement);
Assert.assertEquals(subject.addAll(elementList), !elementList.isEmpty());
final Set<Integer> elementSet = new LinkedHashSet<>(elementCount);
elementSet.addAll(elementList);
assertEquals(subject,elementSet);
}
@Test(dataProvider = "elementCountMaxElementData")
public void testCompositionByCollectionConstructor(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
assertEquals(subject,elementSet);
Assert.assertFalse(subject.addAll(elementList));
}
private List<Integer> generateElementCollection(final int elementCount, final int maxElement) {
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final List<Integer> elementList = new ArrayList<>(elementCount);
for (int i = 0; i < elementCount; i++)
elementList.add(rnd.nextInt(maxElement + 1));
return elementList;
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor"})
public void testLookupByIndex(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final Integer[] elementArray = elementSet.toArray(new Integer[elementSet.size()]);
final List<Integer> subjectList = subject.asList();
for (int i = 0; i < subject.size(); i++) {
final int element = elementArray[i];
final int subjectElement = subject.get(i);
final int subjectListElement = subjectList.get(i);
Assert.assertEquals(subjectElement,element);
Assert.assertEquals(subjectListElement,element);
}
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor"})
public void testIndexOf(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final Integer[] elementArray = elementSet.toArray(new Integer[elementSet.size()]);
final List<Integer> subjectList = subject.asList();
for (int i = 0; i < subject.size(); i++) {
final int element = elementArray[i];
final int listElement = subjectList.get(i);
final int subjectIndex = subject.indexOf(element);
Assert.assertEquals(listElement,element);
Assert.assertEquals(subjectIndex,i);
Assert.assertEquals(subject.indexOf(-element - 1),-1);
}
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"})
public void testRemoveHalf(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final int removeCount = (subject.size() + 1) / 2;
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < removeCount; i++) {
final int removeIndex = rnd.nextInt(subject.size());
final int removeElement = subject.get(removeIndex);
subject.remove(removeElement);
elementSet.remove(removeElement);
}
assertEquals(subject,elementSet);
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"})
public void testRemoveAll(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final int removeCount = subject.size();
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < removeCount; i++) {
final int removeIndex = rnd.nextInt(subject.size());
final int removeElement = subject.get(removeIndex);
subject.remove(removeElement);
elementSet.remove(removeElement);
}
assertEquals(subject,elementSet);
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor"})
public void testClear(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
subject.clear();
elementSet.clear();
assertEquals(subject, elementSet);
}
@Test(dataProvider = "elementCountMaxElementData",
dependsOnMethods = {"testCompositionByCollectionConstructor","testIndexOf"})
public void testRemoveAndAdd(final int elementCount, final int maxElement) {
final List<Integer> elementList = generateElementCollection(elementCount, maxElement);
final IndexedSet<Integer> subject = new IndexedSet<>(elementList);
final Set<Integer> elementSet = new LinkedHashSet<>(elementList);
final int removeCount = subject.size();
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < removeCount; i++) {
final int removeIndex = rnd.nextInt(subject.size());
final int removeElement = subject.get(removeIndex);
subject.remove(removeElement);
elementSet.remove(removeElement);
}
subject.addAll(elementList);
elementSet.addAll(elementList);
assertEquals(subject, elementSet);
}
private final int[] INITIAL_CAPACITY = { 0, 10, 100 };
private final int[] ELEMENT_COUNT = { 0, 1, 10, 100 , 1000 };
private final int[] MAX_ELEMENT = { 0, 1, 5, 10, 50, 100, 500 };
@DataProvider(name="initialCapacityElementCountMaxElementData")
public Object[][] initialCapacityElementCountMaxElementData() {
final Object[][] result = new Object[INITIAL_CAPACITY.length * ELEMENT_COUNT.length * MAX_ELEMENT.length][];
int nextIndex = 0;
for (int i = 0; i < INITIAL_CAPACITY.length; i++)
for (int j = 0; j < ELEMENT_COUNT.length; j++)
for (int k = 0; k < MAX_ELEMENT.length; k++)
result[nextIndex++] = new Object[] { INITIAL_CAPACITY[i], ELEMENT_COUNT[j], MAX_ELEMENT[k]};
return result;
}
@DataProvider(name="elementCountMaxElementData")
public Object[][] elementCountMaxElementData() {
final Object[][] result = new Object[ELEMENT_COUNT.length * MAX_ELEMENT.length][];
int nextIndex = 0;
for (int j = 0; j < ELEMENT_COUNT.length; j++)
for (int k = 0; k < MAX_ELEMENT.length; k++)
result[nextIndex++] = new Object[] { ELEMENT_COUNT[j], MAX_ELEMENT[k]};
return result;
}
/**
* Asserts that an indexed-set is equivalent to a insertion-sorted set provided.
* @param subject the indexed-set to test.
* @param elementSet the insertion-sorted set.
*/
private void assertEquals(final IndexedSet<Integer> subject, final Set<Integer> elementSet) {
Assert.assertEquals(subject.size(), elementSet.size());
final List<Integer> subjectList = subject.asList();
Assert.assertEquals(subjectList.size(),elementSet.size());
final Iterator<Integer> subjectIterator = subject.iterator();
final Iterator<Integer> elementSetIterator = subject.iterator();
final ListIterator<Integer> subjectListIterator = subjectList.listIterator();
while (subjectIterator.hasNext()) {
Assert.assertTrue(elementSetIterator.hasNext(),"less elements in indexed-set than in the equivalent hash-set");
Assert.assertTrue(subjectListIterator.hasNext());
final Integer nextElement;
Assert.assertEquals(nextElement = subjectIterator.next(),elementSetIterator.next(),"elements in indexed-set do not follow the same order as equivalent linked hash-set's");
Assert.assertEquals(subjectListIterator.next(),nextElement);
Assert.assertEquals(subject.indexOf(nextElement),subjectListIterator.previousIndex());
}
Assert.assertFalse(elementSetIterator.hasNext());
Assert.assertFalse(subjectListIterator.hasNext());
}
}

View File

@ -0,0 +1,171 @@
/*
* By downloading the PROGRAM you agree to the following terms of use:
*
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
*
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
*
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
*
* 1. DEFINITIONS
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
*
* 2. LICENSE
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
*
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
* Copyright 2012 Broad Institute, Inc.
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
*
* 4. INDEMNIFICATION
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
*
* 5. NO REPRESENTATIONS OR WARRANTIES
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
*
* 6. ASSIGNMENT
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
*
* 7. MISCELLANEOUS
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
*/
package org.broadinstitute.gatk.utils.collections;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
/**
* Tests {@link IntMaxHeap}.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IntMaxHeapUnitTest {
@Test(dataProvider = "capacityData")
public void testCapacity(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
heap.add(v);
}
}
@Test(dataProvider = "capacityData",dependsOnMethods = {"testCapacity"})
public void testEmptynessAndSize(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
Assert.assertEquals(heap.size(),0);
Assert.assertTrue(heap.isEmpty());
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
heap.add(v);
Assert.assertEquals(heap.size(),i+1);
Assert.assertFalse(heap.isEmpty());
}
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testEmptynessAndSize"})
public void testClear(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
heap.add(v);
}
heap.clear();
Assert.assertEquals(heap.size(),0);
Assert.assertTrue(heap.isEmpty());
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testCapacity"})
public void testAddArray(final int initialCapacity, final int elementCount) {
final IntMaxHeap addHeap = new IntMaxHeap(initialCapacity);
final IntMaxHeap arrayAddHeap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final int[] values = new int[elementCount];
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
values[i] = v;
addHeap.add(v);
}
arrayAddHeap.add(values);
Assert.assertEquals(arrayAddHeap.size(),addHeap.size());
while (!arrayAddHeap.isEmpty())
Assert.assertEquals(arrayAddHeap.remove(),addHeap.remove());
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testEmptynessAndSize"})
public void testRemove(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final List<Integer> values = new ArrayList<>(elementCount);
for (int i = 0; i < elementCount; i++) {
final int v = rnd.nextInt();
values.add(v);
heap.add(v);
}
Collections.sort(values, Collections.reverseOrder());
for (int i = 0; i < elementCount; i++) {
Assert.assertEquals(heap.remove(),(int)values.get(i), "element-count = " + elementCount + ", initial-capacity = " + initialCapacity);
Assert.assertEquals(heap.size(),elementCount - i - 1);
}
}
@Test(dataProvider = "capacityData", dependsOnMethods = {"testCapacity"})
public void testPeek(final int initialCapacity, final int elementCount) {
final IntMaxHeap heap = new IntMaxHeap(initialCapacity);
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
int top = rnd.nextInt();
heap.add(top);
Assert.assertEquals(heap.peek(),top);
for (int i = 1; i < elementCount; i++) {
final int v = rnd.nextInt();
if (v > top) top = v;
heap.add(v);
Assert.assertEquals(heap.peek(),top);
}
}
@DataProvider(name="capacityData")
public Object[][] capacityData() {
return new Object[][] {
{0,100}, {1,113}, {20,301}
};
}
}

View File

@ -48,12 +48,15 @@ package org.broadinstitute.gatk.utils.genotyper;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
import org.broadinstitute.gatk.utils.GenomeLoc;
import org.broadinstitute.gatk.utils.GenomeLocParser;
import org.broadinstitute.gatk.utils.MathUtils;
import org.broadinstitute.gatk.utils.sam.ArtificialSAMUtils;
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.testng.Assert;
import org.testng.SkipException;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@ -72,7 +75,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testInstantiationAndQuery(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
Assert.assertEquals(result.sampleCount(), samples.length);
Assert.assertEquals(result.alleleCount(), alleles.length);
@ -85,7 +88,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testLikelihoodFillingAndQuery(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> result = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] likelihoods = fillWithRandomLikelihoods(samples, alleles, result);
testLikelihoodMatrixQueries(samples, result, likelihoods);
}
@ -106,7 +109,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testBestAlleles(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples,alleles,original);
final int alleleCount = alleles.length;
for (int s = 0; s < samples.length; s++) {
@ -146,7 +149,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testBestAlleleMap(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples,alleles,original);
final Map<Allele,List<GATKSAMRecord>> expected = new HashMap<>(alleles.length);
for (final Allele allele : alleles)
@ -171,7 +174,7 @@ public class ReadLikelihoodsUnitTest
}
}
if ((bestAlleleLk - secondBestAlleleLk) > ReadLikelihoods.BestAllele.INFORMATIVE_THRESHOLD)
expected.get(alleles[bestAlleleIndex]).add(sampleMatrix.read(r));
expected.get(alleles[bestAlleleIndex]).add(sampleMatrix.readAt(r));
}
}
@ -189,7 +192,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testFilterPoorlyModeledReads(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
for (int s = 0; s < samples.length; s++) {
final int sampleReadCount = original.sampleReadCount(s);
@ -220,7 +223,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testFilterReadsToOverlap(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final GenomeLoc evenReadOverlap = locParser.createGenomeLoc(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(),EVEN_READ_START ,EVEN_READ_START );
fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
@ -231,7 +234,7 @@ public class ReadLikelihoodsUnitTest
newLikelihoods[s][a] = new double[(original.sampleReadCount(s) + 1) / 2];
final ReadLikelihoods.Matrix<Allele> sampleMatrix = original.sampleMatrix(s);
for (int r = 0; r < newLikelihoods[s][a].length; r++) {
Assert.assertEquals(result.readIndex(s,sampleMatrix.read(r << 1)),r);
Assert.assertEquals(result.readIndex(s,sampleMatrix.readAt(r << 1)),r);
newLikelihoods[s][a][r] = sampleMatrix.get(a, r << 1);
}
}
@ -240,14 +243,14 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "marginalizationDataSets")
public void testMarginalizationWithOverlap(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads, final Map<Allele,List<Allele>> newToOldAlleleMapping) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final GenomeLoc evenReadOverlap = locParser.createGenomeLoc(SAM_HEADER.getSequenceDictionary().getSequences().get(0).getSequenceName(),EVEN_READ_START ,EVEN_READ_START );
fillWithRandomLikelihoods(samples, alleles, original);
final ReadLikelihoods<Allele> marginalized = original.marginalize(newToOldAlleleMapping,evenReadOverlap);
Assert.assertNotNull(marginalized);
Assert.assertEquals(newToOldAlleleMapping.size(),marginalized.alleleCount());
for (int a = 0; a < marginalized.alleleCount(); a++) {
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.allele(a));
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.alleleAt(a));
Assert.assertNotNull(oldAlleles);
for (int s = 0; s < samples.length; s++) {
final ReadLikelihoods.Matrix<Allele> oldSmapleLikelihoods = original.sampleMatrix(s);
@ -268,13 +271,13 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "marginalizationDataSets")
public void testMarginalization(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads, final Map<Allele,List<Allele>> newToOldAlleleMapping) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
fillWithRandomLikelihoods(samples, alleles, original);
final ReadLikelihoods<Allele> marginalized = original.marginalize(newToOldAlleleMapping);
Assert.assertNotNull(marginalized);
Assert.assertEquals(newToOldAlleleMapping.size(),marginalized.alleleCount());
for (int a = 0; a < marginalized.alleleCount(); a++) {
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.allele(a));
final List<Allele> oldAlleles = newToOldAlleleMapping.get(marginalized.alleleAt(a));
Assert.assertNotNull(oldAlleles);
for (int s = 0; s < samples.length; s++) {
final ReadLikelihoods.Matrix<Allele> oldSmapleLikelihoods = original.sampleMatrix(s);
@ -295,7 +298,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeBestToZero(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result= original.clone();
result.normalizeLikelihoods(true, Double.NEGATIVE_INFINITY);
@ -321,7 +324,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeCapWorstLK(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result= original.clone();
result.normalizeLikelihoods(false, - 0.001);
@ -354,7 +357,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testNormalizeCapWorstLKAndBestToZero(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result= original.clone();
result.normalizeLikelihoods(true, - 0.001);
@ -390,7 +393,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testAddMissingAlleles(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
@ -408,11 +411,11 @@ public class ReadLikelihoodsUnitTest
// We add a single missing.
result.addMissingAlleles(Arrays.asList(newOne = Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-12345.6);
Assert.assertEquals(original.alleleCount() + 1, result.alleleCount());
Assert.assertEquals(result.alleleCount(), original.alleleCount() + 1);
// We add too more amongst exisisting alleles:
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.allele(1),
result.allele(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
result.addMissingAlleles(Arrays.asList(newTwo = Allele.create("ATATATTATATTAATATT".getBytes(), false),result.alleleAt(1),
result.alleleAt(0),newThree = Allele.create("TGTGTGTATTG".getBytes(),false),Allele.create("ACCCCCAAAATTTAAAGGG".getBytes(),false)),-6.54321);
Assert.assertEquals(original.alleleCount()+3,result.alleleCount());
@ -439,7 +442,7 @@ public class ReadLikelihoodsUnitTest
@Test(dataProvider = "dataSets")
public void testAddNonRefAllele(final String[] samples, final Allele[] alleles, final Map<String,List<GATKSAMRecord>> reads) {
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(Arrays.asList(samples), Arrays.asList(alleles), reads);
final ReadLikelihoods<Allele> original = new ReadLikelihoods<>(new IndexedSampleList(samples), new IndexedAlleleList<>(alleles), reads);
final double[][][] originalLikelihoods = fillWithRandomLikelihoods(samples,alleles,original);
final ReadLikelihoods<Allele> result = original.clone();
result.addNonReferenceAllele(GATKVariantContextUtils.NON_REF_SYMBOLIC_ALLELE);
@ -473,13 +476,13 @@ public class ReadLikelihoodsUnitTest
private void testLikelihoodMatrixQueries(String[] samples, ReadLikelihoods<Allele> result, final double[][][] likelihoods) {
for (final String sample : samples) {
final int sampleIndex = result.sampleIndex(sample);
final double[][] likelihoodMatrix = result.sampleValues(sampleIndex);
final int sampleReadCount = result.sampleReadCount(sampleIndex);
Assert.assertEquals(result.alleleCount(), likelihoodMatrix.length);
for (int a = 0; a < likelihoodMatrix.length; a++) {
Assert.assertEquals(likelihoodMatrix[a].length,sampleReadCount);
final int alleleCount = result.alleleCount();
Assert.assertEquals(result.alleleCount(), alleleCount);
for (int a = 0; a < alleleCount; a++) {
Assert.assertEquals(result.sampleReadCount(sampleIndex),sampleReadCount);
for (int r = 0; r < sampleReadCount; r++)
Assert.assertEquals(likelihoodMatrix[a][r],
Assert.assertEquals(result.sampleMatrix(sampleIndex).get(a,r),
likelihoods == null ? 0.0 : likelihoods[sampleIndex][a][r], EPSILON);
}
}
@ -539,7 +542,7 @@ public class ReadLikelihoodsUnitTest
final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
final Object[][] result = new Object[SAMPLE_SETS.length * ALLELE_SETS.length * ALLELE_SETS.length][];
int nextIndex = 0;
for (int s = 0; s < SAMPLE_SETS.length; s++)
for (int s = 0; s < SAMPLE_SETS.length; s++) {
for (int a = 0; a < ALLELE_SETS.length; a++) {
for (int b = 0; b < ALLELE_SETS.length; b++) {
if (ALLELE_SETS[b].length < ALLELE_SETS[a].length)
@ -548,6 +551,7 @@ public class ReadLikelihoodsUnitTest
};
}
}
}
return Arrays.copyOf(result,nextIndex);
}catch (final Throwable e) {
throw new RuntimeException(e);
@ -588,9 +592,6 @@ public class ReadLikelihoodsUnitTest
}
}
final SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader();
final GenomeLocParser locParser = new GenomeLocParser(SAM_HEADER.getSequenceDictionary());
private Map<String,List<GATKSAMRecord>> dataSetReads(final String[] samples,
final Random rnd) {
final Map<String,List<GATKSAMRecord>> result = new HashMap<>(samples.length);
@ -606,4 +607,245 @@ public class ReadLikelihoodsUnitTest
}
return result;
}
@Test(dataProvider="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference")
public void testInstantiationAndBasicQueries(final int[] readCounts, final int alleleCount, final boolean hasReference) {
final SampleList sampleList = sampleList(readCounts);
final AlleleList<Allele> alleleList = alleleList(alleleCount,hasReference);
final Map<String,List<GATKSAMRecord>> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList, readCounts);
final ReadLikelihoods<Allele> subject = new ReadLikelihoods<>(sampleList,alleleList,sampleToReads);
AlleleListUnitTester.assertAlleleList(subject, AlleleListUtils.asList(alleleList));
SampleListUnitTester.assertSampleList(subject,SampleListUtils.asList(sampleList));
if (hasReference) {
final int referenceIndex = AlleleListUtils.indexOfReference(alleleList);
Assert.assertTrue(referenceIndex >= 0);
Assert.assertEquals(AlleleListUtils.indexOfReference(alleleList),referenceIndex);
} else {
Assert.assertEquals(AlleleListUtils.indexOfReference(subject), -1);
}
testLikelihoodMatrixQueries(alleleList, sampleList, sampleToReads, subject);
testAlleleQueries(alleleList, subject);
testSampleQueries(sampleList, sampleToReads, subject);
}
@Test(dataProvider="readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference")
public void testLikelihoodWriting(final int[] readCounts, final int alleleCount, final boolean hasReference) {
final SampleList sampleList = sampleList(readCounts);
final AlleleList<Allele> alleleList = alleleList(alleleCount,hasReference);
final Map<String,List<GATKSAMRecord>> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList,readCounts);
final ReadLikelihoods<Allele> subject = new ReadLikelihoods<>(sampleList,alleleList,sampleToReads);
final int sampleCount = readCounts.length;
int totalLikelihoodsSet = 0;
int expectedLikelihoodsSet = 0;
for (int s = 0; s < sampleCount; s++) {
expectedLikelihoodsSet += readCounts[s] * alleleCount;
final ReadLikelihoods.Matrix<Allele> matrix = subject.sampleMatrix(s);
final int readCount = matrix.readCount();
for (int a = 0; a < alleleCount; a++)
for (int r = 0; r < readCount; r++) {
final double likelihood = testLikelihood(s, a, r);
Assert.assertNotEquals(likelihood,0); //Paranoia
totalLikelihoodsSet++;
matrix.set(a,r,likelihood);
Assert.assertEquals(matrix.get(a, r),likelihood);
}
}
Assert.assertEquals(totalLikelihoodsSet,expectedLikelihoodsSet);
}
@Test(dependsOnMethods={"testLikelihoodWriting","testInstantiationAndBasicQueries"},
dataProvider="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference")
public void testMapConversion(final int[] readCounts, final int alleleCount, final boolean hasReference) {
final SampleList sampleList = sampleList(readCounts);
final AlleleList<Allele> alleleList = alleleList(alleleCount,hasReference);
final Map<String,List<GATKSAMRecord>> sampleToReads = ReadLikelihoodsUnitTester.sampleToReads(sampleList,readCounts);
final Set<Allele> alleleWithLikelihoodsSet = new HashSet<>();
final Set<GATKSAMRecord> readsWithLikelihoodsSet = new HashSet<>();
final Map<String,PerReadAlleleLikelihoodMap> map = new HashMap<>(sampleList.sampleCount());
final int sampleCount = sampleList.sampleCount();
for (int s = 0; s < sampleCount; s++) {
final String sample = sampleList.sampleAt(s);
final PerReadAlleleLikelihoodMap perSampleMap = new PerReadAlleleLikelihoodMap();
final List<GATKSAMRecord> reads = sampleToReads.get(sample);
for (int a = 0; a < alleleCount; a++)
for (int r = 0; r < reads.size(); r++) {
perSampleMap.add(reads.get(r), alleleList.alleleAt(a), testLikelihood(s, a, r));
alleleWithLikelihoodsSet.add(alleleList.alleleAt(a));
readsWithLikelihoodsSet.add(reads.get(r));
}
map.put(sample,perSampleMap);
}
ReadLikelihoods<Allele> subject = ReadLikelihoods.fromPerAlleleReadLikelihoodsMap(map);
for (int s = 0; s < sampleCount; s++) {
final String sample = sampleList.sampleAt(s);
final int sIndex = subject.sampleIndex(sample);
Assert.assertTrue(sIndex >= 0);
Assert.assertTrue(sIndex < sampleCount);
final int sampleReadCount = sampleToReads.get(sample).size();
final ReadLikelihoods.Matrix<Allele> sampleLikelihoods = subject.sampleMatrix(sIndex);
for (int a = 0; a < alleleCount; a++) {
final Allele allele = alleleList.alleleAt(a);
final int aIndex = subject.alleleIndex(allele);
Assert.assertEquals(aIndex >= 0,alleleWithLikelihoodsSet.contains(allele));
Assert.assertTrue(aIndex < alleleCount);
if (aIndex == -1) continue;
for (int r = 0; r < sampleReadCount; r++) {
final GATKSAMRecord read = sampleToReads.get(sample).get(r);
final int rIndex = subject.readIndex(sIndex,read);
final int rIndex2 = sampleLikelihoods.readIndex(read);
Assert.assertEquals(rIndex,rIndex2);
Assert.assertEquals(rIndex >= 0,readsWithLikelihoodsSet.contains(read));
Assert.assertTrue(rIndex < sampleReadCount);
if (rIndex == -1)
continue;
final double likelihood = sampleLikelihoods.get(aIndex,rIndex);
Assert.assertEquals(likelihood,testLikelihood(s,a,r));
}
}
}
}
private double testLikelihood(final int sampleIndex, final int alleleIndex, final int readIndex) {
return - Math.abs(31 * (sampleIndex + 1) + 101 * alleleIndex + 1009 * readIndex);
}
private final Random rnd = GenomeAnalysisEngine.getRandomGenerator();
private void testLikelihoodMatrixQueries(final AlleleList<Allele> alleles, final SampleList samples,
final Map<String,List<GATKSAMRecord>> sampleToReads, ReadLikelihoods<Allele> result) {
for (final String sample : SampleListUtils.asList(samples)) {
final int sampleIndex = result.sampleIndex(sample);
final ReadLikelihoods.Matrix<Allele> likelihoodMatrix = result.sampleMatrix(sampleIndex);
final int sampleReadCount = sampleToReads.get(sample).size();
final List<GATKSAMRecord> reads = sampleToReads.get(sample);
Assert.assertEquals(likelihoodMatrix.alleleCount(), alleles.alleleCount());
Assert.assertEquals(likelihoodMatrix.readCount(), sampleReadCount);
for (int a = 0; a < likelihoodMatrix.alleleCount(); a++) {
Assert.assertEquals(likelihoodMatrix.alleleAt(a),alleles.alleleAt(a));
for (int r = 0; r < sampleReadCount; r++) {
Assert.assertEquals(likelihoodMatrix.readAt(r),reads.get(r));
Assert.assertEquals(likelihoodMatrix.get(a, r), 0.0);
}
}
}
}
private void testAlleleQueries(final AlleleList<Allele> alleles, ReadLikelihoods<Allele> result) {
final Set<Integer> alleleIndices = new HashSet<>();
for (final Allele allele : AlleleListUtils.asList(alleles)) {
final int alleleIndex = result.alleleIndex(allele);
Assert.assertTrue(alleleIndex >= 0);
Assert.assertFalse(alleleIndices.contains(alleleIndex));
alleleIndices.add(alleleIndex);
Assert.assertSame(allele,alleles.alleleAt(alleleIndex));
}
}
private void testSampleQueries(final SampleList samples, Map<String, List<GATKSAMRecord>> reads,
final ReadLikelihoods<Allele> result) {
final Set<Integer> sampleIds = new HashSet<>(samples.sampleCount());
for (final String sample : SampleListUtils.asList(samples)) {
final int sampleIndex = result.sampleIndex(sample);
Assert.assertTrue(sampleIndex >= 0);
Assert.assertFalse(sampleIds.contains(sampleIndex));
sampleIds.add(sampleIndex);
final List<GATKSAMRecord> sampleReads = result.sampleReads(sampleIndex);
final Set<GATKSAMRecord> sampleReadsSet = new HashSet<>(sampleReads);
final List<GATKSAMRecord> expectedSampleReadArray = reads.get(sample);
final Set<GATKSAMRecord> expectedSampleReadsSet = new HashSet<>(expectedSampleReadArray);
Assert.assertEquals(sampleReadsSet,expectedSampleReadsSet);
final int sampleReadCount = sampleReads.size();
for (int r = 0; r < sampleReadCount; r++) {
Assert.assertSame(sampleReads.get(r), expectedSampleReadArray.get(r));
final int readIndex = result.readIndex(sampleIndex, sampleReads.get(r));
Assert.assertEquals(readIndex,r);
}
}
}
private AlleleList<Allele> alleleList(final int alleleCount, final boolean hasReference) {
final Allele[] alleles = AlleleListUnitTester.generateRandomAlleles(alleleCount,100);
if (hasReference) {
final int referenceIndex = rnd.nextInt(alleleCount);
alleles[referenceIndex] = Allele.create(alleles[referenceIndex].getBases(),true);
}
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles);
if (alleleList.alleleCount() != alleles.length)
throw new SkipException("repeated alleles, should be infrequent");
return alleleList;
}
private SAMFileHeader SAM_HEADER = ArtificialSAMUtils.createArtificialSamHeader(10, 0, 1000);
final GenomeLocParser locParser = new GenomeLocParser(SAM_HEADER.getSequenceDictionary());
private int[][] READ_COUNTS = new int[][] {
{},
{ 100 },
{ 0 },
{ 0, 0, 0 },
{ 1, 0, 1 },
{ 100, 10 , 100},
{ 1000, 10, 100, 20, 23 }
};
private int[] ALLELE_COUNTS = new int[] { 0, 1, 2, 3, 10, 20 };
@DataProvider(name="readCountsAndAlleleCountData")
public Object[][] readCountsAndAlleleCountData() {
final Object[][] result = new Object[READ_COUNTS.length * ALLELE_COUNTS.length * 2][];
int index = 0;
for (final int[] readCounts : READ_COUNTS)
for (final int alleleCount : ALLELE_COUNTS) {
result[index++] = new Object[]{ readCounts, alleleCount, false};
result[index++] = new Object[]{ readCounts, alleleCount, true};
}
return result;
}
@DataProvider(name="readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference")
public Object[][] readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference() {
final Object[][] raw = readCountsAndAlleleCountData();
final List<Object[]> result = new ArrayList<>(raw.length);
for (final Object[] paramSet : raw)
if (!paramSet[2].equals(true) || !paramSet[1].equals(0))
result.add(paramSet);
return result.toArray(new Object[result.size()][]);
}
@DataProvider(name="readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference")
public Object[][] readCountsAndAlleleCountDataSkippingNoLikelihoodsOrNoAlleleAndWithReference() {
final Object[][] raw = readCountsAndAlleleCountDataSkippingNoAlleleAndWithReference();
final List<Object[]> result = new ArrayList<>(raw.length);
for (final Object[] paramSet : raw) {
final int[] readCounts = (int[]) paramSet[0];
final long totalReadCount = MathUtils.sum(readCounts);
if (totalReadCount > 0)
result.add(paramSet);
}
return result.toArray(new Object[result.size()][]);
}
private SampleList sampleList(final int[] readCounts) {
final List<String> samples = new ArrayList<>(readCounts.length);
for (int i = 0; i < readCounts.length; i++)
samples.add("SAMPLE_" + i);
return new IndexedSampleList(samples);
}
}

View File

@ -46,14 +46,14 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceModel;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.Utils;
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@ -100,21 +100,21 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHeaderWriting() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.writeHeader(new VCFHeader());
Assert.assertTrue(mockWriter.headerWritten);
}
@Test
public void testClose() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.close();
Assert.assertTrue(mockWriter.closed);
}
@Test
public void testCloseWithoutClosingUnderlyingWriter() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.close(false);
Assert.assertFalse(mockWriter.closed);
}
@ -164,7 +164,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCloseEmitsLastVariant() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 0);
@ -176,7 +176,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCloseDoesntEmitsLastVariantWhenNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@ -188,7 +188,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@ -204,7 +204,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryToLowerPositionsRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 30, 30));
writer.add(makeHomRef("20", 31, 30));
@ -220,7 +220,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryFromNonRefToLowerPositionsRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 20, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@ -235,7 +235,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@ -248,7 +248,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testCrossingContigBoundaryNonRefThenNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeNonRef("20", 1, 30));
Assert.assertEquals(mockWriter.emitted.size(), 1);
@ -283,7 +283,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testVariantForcesNonRef() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 30));
writer.add(makeHomRef("20", 2, 30));
@ -300,7 +300,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testEmittingTwoBands() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -315,7 +315,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testNonContiguousBlocks() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -329,7 +329,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testDeletion() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -347,7 +347,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test
public void testHomRefAlt() {
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition);
final GVCFWriter writer = new GVCFWriter(mockWriter, standardPartition, HomoSapiensConstants.DEFAULT_PLOIDY);
writer.add(makeHomRef("20", 1, 0));
writer.add(makeHomRef("20", 2, 0));
@ -383,7 +383,7 @@ public class GVCFWriterUnitTest extends BaseTest {
@Test(dataProvider = "BandPartitionData")
public void testMyData(final List<Integer> partitions, final boolean expectedGood) {
try {
GVCFWriter.parsePartitions(partitions);
GVCFWriter.parsePartitions(partitions,2);
Assert.assertTrue(expectedGood, "Expected to fail but didn't");
} catch ( Exception e ) {
Assert.assertTrue(! expectedGood, "Expected to succeed but failed with message " + e.getMessage());

View File

@ -46,11 +46,12 @@
package org.broadinstitute.gatk.utils.gvcf;
import org.broadinstitute.gatk.utils.BaseTest;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.gatk.utils.BaseTest;
import org.broadinstitute.gatk.utils.variant.HomoSapiensConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
@ -58,7 +59,6 @@ import org.testng.annotations.Test;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
public class HomRefBlockUnitTest extends BaseTest {
@ -71,7 +71,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBasicConstruction() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
Assert.assertSame(band.getStartingVC(), vc);
Assert.assertEquals(band.getRef(), vc.getReference());
Assert.assertEquals(band.getGQLowerBound(), 10);
@ -86,8 +86,9 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testMinMedian() {
//TODO - might be better to make this test use a data provider?
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
gb.alleles(vc.getAlleles());
int pos = vc.getStart();
band.add(pos++, gb.DP(10).GQ(11).PL(new int[]{0,11,100}).make());
@ -117,8 +118,9 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test
public void testBigGQIsCapped() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
gb.alleles(vc.getAlleles());
band.add(vc.getStart(), gb.DP(1000).GQ(1000).PL(new int[]{0,10,100}).make());
assertValues(band, 1000, 1000, 99, 99);
@ -126,7 +128,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadAdd() {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final GenotypeBuilder gb = new GenotypeBuilder("NA12878");
band.add(vc.getStart() + 10, gb.DP(10).GQ(11).PL(new int[]{0,10,100}).make());
@ -156,7 +158,7 @@ public class HomRefBlockUnitTest extends BaseTest {
@Test(dataProvider = "ContiguousData")
public void testIsContiguous(final String contig, final int pos, final boolean expected) {
final HomRefBlock band = new HomRefBlock(vc, 10, 20);
final HomRefBlock band = new HomRefBlock(vc, 10, 20, HomoSapiensConstants.DEFAULT_PLOIDY);
final VariantContext testVC = new VariantContextBuilder(vc).chr(contig).start(pos).stop(pos).make();
Assert.assertEquals(band.isContiguous(testVC), expected);
}

View File

@ -26,14 +26,13 @@
package org.broadinstitute.gatk.engine;
import com.google.java.contract.Ensures;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.reference.IndexedFastaSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.variant.vcf.VCFConstants;
import org.apache.log4j.Logger;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
import org.broadinstitute.gatk.engine.arguments.ValidationExclusion;
import org.broadinstitute.gatk.engine.datasources.reads.*;
@ -55,8 +54,12 @@ import org.broadinstitute.gatk.engine.refdata.utils.RMDTriplet;
import org.broadinstitute.gatk.engine.resourcemanagement.ThreadAllocation;
import org.broadinstitute.gatk.engine.samples.SampleDB;
import org.broadinstitute.gatk.engine.samples.SampleDBBuilder;
import org.broadinstitute.gatk.engine.walkers.*;
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedSampleList;
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
import org.broadinstitute.gatk.utils.*;
import org.broadinstitute.gatk.utils.classloader.PluginManager;
import org.broadinstitute.gatk.utils.commandline.*;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.exceptions.UserException;
import org.broadinstitute.gatk.utils.interval.IntervalUtils;
@ -64,7 +67,6 @@ import org.broadinstitute.gatk.utils.progressmeter.ProgressMeter;
import org.broadinstitute.gatk.utils.recalibration.BQSRArgumentSet;
import org.broadinstitute.gatk.utils.text.XReadLines;
import org.broadinstitute.gatk.utils.threading.ThreadEfficiencyMonitor;
import htsjdk.variant.vcf.VCFConstants;
import java.io.File;
import java.io.FileNotFoundException;
@ -1138,7 +1140,7 @@ public class GenomeAnalysisEngine {
* Returns data source objects encapsulating all rod data;
* individual rods can be accessed through the returned data source objects.
*
* @return the rods data sources
* @return the rods data sources, never {@code null}.
*/
public List<ReferenceOrderedDataSource> getRodDataSources() {
return this.rodDataSources;
@ -1254,4 +1256,20 @@ public class GenomeAnalysisEngine {
runtimeLimitInNanoseconds = TimeUnit.NANOSECONDS.convert(args.maxRuntime, args.maxRuntimeUnits);
}
}
/**
* Returns the sample list including all samples.
* @return never {@code null}.
*/
public SampleList getSampleList() {
return new IndexedSampleList(getSampleDB().getSampleNames());
}
/**
* Returns the sample list including samples in read inputs.
* @return never {@code null}.
*/
public SampleList getReadSampleList() {
return new IndexedSampleList(SampleUtils.getSAMFileSamples(getSAMFileHeader()));
}
}

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
/**
* Created by valentin on 5/12/14.
*/
public interface AlleleList<A extends Allele> {
public int alleleCount();
public int alleleIndex(final A allele);
public A alleleAt(final int index);
}

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.collections.Permutation;
/**
* Marks allele list permutation implementation classes.
*/
public interface AlleleListPermutation<A extends Allele> extends Permutation<A>, AlleleList<A> {
}

View File

@ -0,0 +1,334 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import java.util.AbstractList;
import java.util.List;
/**
* Utils operations on {@link AlleleList} instances.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class AlleleListUtils {
@SuppressWarnings("unchecked")
private static final AlleleList EMPTY_LIST = new AlleleList() {
@Override
public int alleleCount() {
return 0;
}
@Override
public int alleleIndex(final Allele allele) {
return -1;
}
@Override
public Allele alleleAt(final int index) {
throw new IllegalArgumentException("allele index is out of range");
}
};
/**
* Checks whether two allele lists are in fact the same.
* @param first one list to compare.
* @param second another list to compare.
*
* @throws IllegalArgumentException if if either list is {@code null}.
*
* @return {@code true} iff both list are equal.
*/
public static <A extends Allele> boolean equals(final AlleleList<A> first, final AlleleList<A> second) {
if (first == null || second == null)
throw new IllegalArgumentException("no null list allowed");
final int alleleCount = first.alleleCount();
if (alleleCount != second.alleleCount())
return false;
for (int i = 0; i < alleleCount; i++) {
final A firstSample = first.alleleAt(i);
if (firstSample == null)
throw new IllegalStateException("no null samples allowed in sample-lists: first list at " + i);
final A secondSample = second.alleleAt(i);
if (secondSample == null)
throw new IllegalArgumentException("no null samples allowed in sample-list: second list at " + i);
if (!firstSample.equals(secondSample))
return false;
}
return true;
}
/**
* Resolves the index of the reference allele in an allele-list.
*
* <p>
* If there is no reference allele, it returns -1. If there is more than one reference allele,
* it returns the first occurrence (lowest index).
* </p>
*
* @param list the search allele-list.
* @param <A> allele component type.
*
* @throws IllegalArgumentException if {@code list} is {@code null}.
*
* @return -1 if there is no reference allele, or a values in [0,{@code list.alleleCount()}).
*/
public static <A extends Allele> int indexOfReference(final AlleleList<A> list) {
if (list == null)
throw new IllegalArgumentException("the input list cannot be null");
final int alleleCount = list.alleleCount();
for (int i = 0; i < alleleCount; i++)
if (list.alleleAt(i).isReference())
return i;
return -1;
}
/**
* Returns a {@link java.util.List} unmodifiable view of a allele-list
* @param list the sample-list to wrap.
*
* @throws IllegalArgumentException if {@code list} is {@code null}.
*
* @return never {@code null}.
*/
public static <A extends Allele> List<A> asList(final AlleleList<A> list) {
if (list == null)
throw new IllegalArgumentException("the list cannot be null");
return new AsList(list);
}
/**
* Returns an unmodifiable empty allele-list.
* @param <A> the allele class.
* @return never {@code null}.
*/
@SuppressWarnings("unchecked")
public static final <A extends Allele> AlleleList<A> emptyList() {
return EMPTY_LIST;
}
/**
* Simple list view of a sample-list.
*/
private static class AsList<A extends Allele> extends AbstractList<A> {
private final AlleleList<A> list;
private AsList(final AlleleList<A> list) {
this.list = list;
}
@Override
public A get(int index) {
return list.alleleAt(index);
}
@Override
public int size() {
return list.alleleCount();
}
}
/**
* Returns a permutation between two allele lists.
* @param original the original allele list.
* @param target the target allele list.
* @param <A> the allele type.
*
* @throws IllegalArgumentException if {@code original} or {@code target} is {@code null}, or
* elements in {@code target} is not contained in {@code original}
*
* @return never {@code null}
*/
public static <A extends Allele> AlleleListPermutation<A> permutation(final AlleleList<A> original, final AlleleList<A> target) {
if (equals(original,target))
return new NonPermutation<>(original);
else
return new ActualPermutation<>(original,target);
}
private static class NonPermutation<A extends Allele> implements AlleleListPermutation<A> {
private final AlleleList<A> list;
public NonPermutation(final AlleleList<A> original) {
list = original;
}
@Override
public boolean isPartial() {
return false;
}
@Override
public boolean isNonPermuted() {
return true;
}
@Override
public int toIndex(int fromIndex) {
return fromIndex;
}
@Override
public int fromIndex(int toIndex) {
return toIndex;
}
@Override
public int fromSize() {
return list.alleleCount();
}
@Override
public int toSize() {
return list.alleleCount();
}
@Override
public List<A> fromList() {
return asList(list);
}
@Override
public java.util.List<A> toList() {
return asList(list);
}
@Override
public int alleleCount() {
return list.alleleCount();
}
@Override
public int alleleIndex(final A allele) {
return list.alleleIndex(allele);
}
@Override
public A alleleAt(final int index) {
return list.alleleAt(index);
}
}
private static class ActualPermutation<A extends Allele> implements AlleleListPermutation<A> {
private final AlleleList<A> from;
private final AlleleList<A> to;
private final int[] fromIndex;
private final boolean nonPermuted;
private final boolean isPartial;
private ActualPermutation(final AlleleList<A> original, final AlleleList<A> target) {
this.from = original;
this.to = target;
final int toSize = target.alleleCount();
final int fromSize = original.alleleCount();
if (fromSize < toSize)
throw new IllegalArgumentException("target allele list is not a permutation of the original allele list");
fromIndex = new int[toSize];
boolean nonPermuted = fromSize == toSize;
this.isPartial = !nonPermuted;
for (int i = 0; i < toSize; i++) {
final int originalIndex = original.alleleIndex(target.alleleAt(i));
if (originalIndex < 0)
throw new IllegalArgumentException("target allele list is not a permutation of the original allele list");
fromIndex[i] = originalIndex;
nonPermuted &= originalIndex == i;
}
this.nonPermuted = nonPermuted;
}
@Override
public boolean isPartial() {
return isPartial;
}
@Override
public boolean isNonPermuted() {
return nonPermuted;
}
@Override
public int toIndex(int fromIndex) {
return to.alleleIndex(from.alleleAt(fromIndex));
}
@Override
public int fromIndex(int toIndex) {
return fromIndex[toIndex];
}
@Override
public int fromSize() {
return from.alleleCount();
}
@Override
public int toSize() {
return to.alleleCount();
}
@Override
public List<A> fromList() {
return asList(from);
}
@Override
public List<A> toList() {
return asList(to);
}
@Override
public int alleleCount() {
return to.alleleCount();
}
@Override
public int alleleIndex(final A allele) {
return to.alleleIndex(allele);
}
@Override
public A alleleAt(final int index) {
return to.alleleAt(index);
}
}
}

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import htsjdk.variant.variantcontext.Allele;
import org.broadinstitute.gatk.utils.collections.IndexedSet;
import java.util.Collection;
/**
* Allele list implementation using and indexed-set.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedAlleleList<A extends Allele> implements AlleleList<A> {
private final IndexedSet<A> alleles;
/**
* Constructs a new empty allele-list
*/
public IndexedAlleleList() {
alleles = new IndexedSet<>();
}
/**
* Constructs a new allele-list from an array of alleles.
*
* <p>
* Repeats in the input array will be ignored (keeping the first one). The order of alleles in the
* resulting list is the same as in the natural traversal of the input collection.
*
* </p>
* @param alleles the original allele array
*
* @throws java.lang.IllegalArgumentException if {@code alleles} is {@code null} or contains {@code null}s.
*/
public IndexedAlleleList(final A ... alleles) {
this.alleles = new IndexedSet<>(alleles);
}
/**
* Constructs a new allele-list from a collection of alleles.
*
* <p>
* Repeats in the input collection will be ignored (keeping the first one). The order of alleles in the
* resulting list is the same as in the natural traversal of the input collection.
*
* </p>
* @param alleles the original allele collection
*
* @throws java.lang.IllegalArgumentException if {@code alleles} is {@code null} or contains {@code null}s.
*/
public IndexedAlleleList(final Collection<A> alleles) {
this.alleles = new IndexedSet<>(alleles);
}
@Override
public int alleleCount() {
return alleles.size();
}
@Override
public int alleleIndex(final A allele) {
return alleles.indexOf(allele);
}
@Override
public A alleleAt(final int index) {
return alleles.get(index);
}
}

View File

@ -0,0 +1,96 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import org.broadinstitute.gatk.utils.collections.IndexedSet;
import java.util.Collection;
/**
* Simple implementation of a sample-list using and indexed-set.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedSampleList implements SampleList {
private final IndexedSet<String> samples;
/**
* Constructs an empty sample-list.
*/
public IndexedSampleList() {
samples = new IndexedSet<>(0);
}
/**
* Constructs a sample-list from a collection of samples.
*
* <p>
* Repeats in the input collection are ignored (just the first occurrence is kept).
* Sample names will be sorted based on the traversal order
* of the original collection.
* </p>
*
* @param samples input sample collection.
*
* @throws IllegalArgumentException if {@code samples} is {@code null} or it contains {@code nulls}.
*/
public IndexedSampleList(final Collection<String> samples) {
this.samples = new IndexedSet<>(samples);
}
/**
* Constructs a sample-list from an array of samples.
*
* <p>
* Repeats in the input array are ignored (just the first occurrence is kept).
* Sample names will be sorted based on the traversal order
* of the original array.
* </p>
*
* @param samples input sample array.
*
* @throws IllegalArgumentException if {@code samples} is {@code null} or it contains {@code nulls}.
*/
public IndexedSampleList(final String ... samples) {
this.samples = new IndexedSet<>(samples);
}
@Override
public int sampleCount() {
return samples.size();
}
@Override
public int sampleIndex(final String sample) {
return samples.indexOf(sample);
}
@Override
public String sampleAt(int sampleIndex) {
return samples.get(sampleIndex);
}
}

View File

@ -0,0 +1,43 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
/**
* A indexed set of samples.
*
* <p>
* Implementing classes must guarantee that the sample list will remain <b>constant</b> through the life of the object.
* </p>
*/
public interface SampleList {
public int sampleCount();
public int sampleIndex(final String sample);
public String sampleAt(final int sampleIndex);
}

View File

@ -0,0 +1,224 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.tools.walkers.genotyper;
import java.util.*;
/**
* Some utility operations on sample lists.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class SampleListUtils {
private static final SampleList EMPTY_LIST = new SampleList() {
@Override
public int sampleCount() {
return 0;
}
@Override
public int sampleIndex(String sample) {
return -1;
}
@Override
public String sampleAt(final int sampleIndex) {
throw new IllegalArgumentException("index is out of valid range");
}
};
/**
* Empty list.
*
* @return never {@code null}
*/
public static SampleList emptyList() {
return EMPTY_LIST;
}
/**
* Checks whether two sample lists are in fact the same.
* @param first one list to compare.
* @param second another list to compare.
*
* @throws IllegalArgumentException if if either list is {@code null}.
*
* @return {@code true} iff both list are equal.
*/
public static boolean equals(final SampleList first, final SampleList second) {
if (first == null || second == null)
throw new IllegalArgumentException("no null list allowed");
final int sampleCount = first.sampleCount();
if (sampleCount != second.sampleCount())
return false;
for (int i = 0; i < sampleCount; i++) {
final String firstSample = first.sampleAt(i);
if (firstSample == null)
throw new IllegalStateException("no null samples allowed in sample-lists: first list at " + i);
final String secondSample = second.sampleAt(i);
if (secondSample == null)
throw new IllegalArgumentException("no null samples allowed in sample-list: second list at " + i);
if (!firstSample.equals(secondSample))
return false;
}
return true;
}
/**
* Returns a {@link List} unmodifiable view of a sample-list
* @param list the sample-list to wrap.
*
* @throws IllegalArgumentException if {@code list} is {@code null}.
*
* @return never {@code null}.
*/
public static List<String> asList(final SampleList list) {
if (list == null)
throw new IllegalArgumentException("the list cannot be null");
return new AsList(list);
}
/**
* Returns a {@link Set} unmodifiable view of the sample-list
*
* @param list the sample-list to wrap.
*
* @throws IllegalArgumentException if {@code list} is {@code null}
*/
public static Set<String> asSet(final SampleList list) {
if (list == null)
throw new IllegalArgumentException("the list cannot be null");
return new AsSet(list);
}
/**
* Creates a list with a single sample.
*
* @param sampleName the sample name.
* @return never {@code sampleName}
*/
public static SampleList singletonList(final String sampleName) {
if (sampleName == null)
throw new IllegalArgumentException("the sample name cannot be null");
return new SampleList() {
@Override
public int sampleCount() {
return 1;
}
@Override
public int sampleIndex(final String sample) {
return sampleName.equals(sample) ? 0 : -1;
}
@Override
public String sampleAt(int sampleIndex) {
if (sampleIndex == 0)
return sampleName;
throw new IllegalArgumentException("index is out of bounds");
}
};
}
/**
* Simple list view of a sample-list.
*/
private static class AsList extends AbstractList<String> {
private final SampleList list;
private AsList(final SampleList list) {
this.list = list;
}
@Override
public String get(int index) {
return list.sampleAt(index);
}
@Override
public int size() {
return list.sampleCount();
}
}
/**
* Simple set view of a sample-list
*/
private static class AsSet extends AbstractSet<String> {
private final SampleList list;
private AsSet(final SampleList list) {
this.list = list;
}
@Override
public Iterator<String> iterator() {
return new Iterator<String>() {
private int index = 0;
@Override
public boolean hasNext() {
return index < list.sampleCount();
}
@Override
public String next() {
if (index >= list.sampleCount())
throw new NoSuchElementException("iterating beyond sample list end");
return list.sampleAt(index++);
}
@Override
public void remove() {
throw new UnsupportedOperationException("unsupported operation exception");
}
};
}
@Override
public int size() {
return list.sampleCount();
}
@Override
public boolean contains(final Object obj) {
if (obj == null)
return false;
else if (obj instanceof String)
return list.sampleIndex(((String)obj)) >= 0;
else
return false;
}
}
}

View File

@ -151,6 +151,32 @@ public class MathUtils {
return approximateLog10SumLog10(vals, vals.length);
}
/**
* Calculate the approximate log10 sum of an array range.
* @param vals the input values.
* @param fromIndex the first inclusive index in the input array.
* @param toIndex index following the last element to sum in the input array (exclusive).
* @return the approximate sum.
* @throws IllegalArgumentException if {@code vals} is {@code null} or {@code fromIndex} is out of bounds
* or if {@code toIndex} is larger than
* the length of the input array or {@code fromIndex} is larger than {@code toIndex}.
*/
public static double approximateLog10SumLog10(final double[] vals, final int fromIndex, final int toIndex) {
if (fromIndex == toIndex) return Double.NEGATIVE_INFINITY;
final int maxElementIndex = MathUtils.maxElementIndex(vals,fromIndex,toIndex);
double approxSum = vals[maxElementIndex];
for (int i = fromIndex; i < toIndex; i++) {
final double val;
if (i == maxElementIndex || (val = vals[i]) == Double.NEGATIVE_INFINITY)
continue;
final double diff = approxSum - val;
if (diff < JacobianLogTable.MAX_TOLERANCE)
approxSum += JacobianLogTable.get(diff);
}
return approxSum;
}
public static double approximateLog10SumLog10(final double[] vals, final int endIndex) {
final int maxElementIndex = MathUtils.maxElementIndex(vals, endIndex);

View File

@ -60,9 +60,9 @@ public class SampleUtils {
* @param header the sam file header
* @return list of strings representing the sample names
*/
public static Set<String> getSAMFileSamples(SAMFileHeader header) {
public static Set<String> getSAMFileSamples(final SAMFileHeader header) {
// get all of the unique sample names
Set<String> samples = new TreeSet<String>();
final Set<String> samples = new TreeSet<String>();
List<SAMReadGroupRecord> readGroups = header.getReadGroups();
for ( SAMReadGroupRecord readGroup : readGroups )
samples.add(readGroup.getSample());

View File

@ -0,0 +1,342 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.utils.collections;
import it.unimi.dsi.fastutil.objects.Object2IntMap;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import java.util.*;
/**
* Set set where each element can be reference by a unique integer index that runs from
* 0 to the size of the set - 1.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public class IndexedSet<E> extends AbstractSet<E> implements Set<E> {
/**
* Elements stored in an array-list by their index.
*/
private final ArrayList<E> elements;
/**
* A unmodifiable view to the element list. Initially {@code null} it is thread-unsafe lazy instantiated
* when requested first time through {@link #asList}. Therefore typically it is shared by invoking code but
* there could be some extra copies (rare though) in multi-thread runs.
*/
private transient List<E> unmodifiableElementsListView;
/**
* Quick element to index lookup map.
* <p>
* Uses a primitive int value map for efficiency sake.
* </p>
*/
private final Object2IntMap<E> indexByElement;
/**
* Creates an empty indexed set indicating the expected number of elements.
*
* @param initialCapacity the initial number of elements.
*/
public IndexedSet(final int initialCapacity) {
elements = new ArrayList<>(initialCapacity);
indexByElement = new Object2IntOpenHashMap<>(initialCapacity);
}
/**
* Creates a new sample list from a existing collection of elements.
*
* <p>
* Elements will be indexed as they appear in the input array. Repeats will be ignored.
* </p>
*
* @param values the original sample list.
*
* @throws IllegalArgumentException
* if {@code values} array is {@code null} itself, or it contains {@code null}.
*/
@SuppressWarnings("unchecked")
public IndexedSet(final Collection<E> values) {
if (values == null)
throw new IllegalArgumentException("input values cannot be null");
final int initialCapacity = values.size();
elements = new ArrayList<>(initialCapacity);
indexByElement = new Object2IntOpenHashMap<>(initialCapacity);
int nextIndex = 0;
for (final E value : values) {
if (value == null)
throw new IllegalArgumentException("null element not allowed: index == " + nextIndex);
if (indexByElement.containsKey(value))
continue;
indexByElement.put(value, nextIndex++);
elements.add(value);
}
}
/**
* Creates a new sample list from a existing array of elements.
*
* <p>
* Elements will be indexed as they appear in the collection. Repeats will be ignored.
* </p>
*
* @param values the original sample list.
*
* @throws IllegalArgumentException
* if {@code values} collection is {@code null} itself, or it contains {@code null}.
*/
@SuppressWarnings("unchecked")
public IndexedSet(final E ... values) {
if (values == null)
throw new IllegalArgumentException("input values cannot be null");
final int initialCapacity = values.length;
elements = new ArrayList<>(initialCapacity);
indexByElement = new Object2IntOpenHashMap<>(initialCapacity);
int nextIndex = 0;
for (final E value : values) {
if (value == null)
throw new IllegalArgumentException("null element not allowed: index == " + nextIndex);
if (indexByElement.containsKey(value))
continue;
indexByElement.put(value, nextIndex++);
elements.add(value);
}
}
/**
* Returns a list view of the elements in the set.
*
* <p>
* Elements are sorted by their index within the set.
* </p>
*
* <p>
* This view changes as the indexed set changes but it cannot be used to update its contents.
* In such case a {@link UnsupportedOperationException} exception will be thrown if the calling
* code tries to tho just that.
* </p>
*
* @return never {@code null}.
*/
public List<E> asList() {
if (unmodifiableElementsListView == null)
unmodifiableElementsListView = Collections.unmodifiableList(elements);
return unmodifiableElementsListView;
}
/**
* Throws an exception if an index is out of bounds.
*
* <p>
* An element index is valid iff is within [0,{@link #size()}).
* </p>
*
* @param index the query index.
*
* @throws IllegalArgumentException {@code index} is out of bounds.
*/
protected void checkIndex(final int index) {
if (index < 0)
throw new IllegalArgumentException("the index cannot be negative: " + index);
if (index >= size())
throw new IllegalArgumentException("the index is equal or larger than the list length: " + index + " >= " + size());
}
@Override
public Iterator<E> iterator() {
return asList().iterator();
}
/**
* Returns number of elements in the set.
* @return never {@code null}.
*/
@Override
public int size() {
return elements.size();
}
/**
*
* @param o
* @return {@code true} iff {@code o} is in
*/
@Override
@SuppressWarnings("all")
public boolean contains(final Object o) {
return o != null && indexByElement.containsKey(o);
}
/**
* Adds a new element to the set.
*
* <p>
* If the element was already in th set nothing will happen and the method will return {@code false}. However,
* if the element is new to this set, it will assigned the next index available (equal to the size before addition).
* The method will return {@code true} in this case.
* </p>
*
* @param o the object to add.
*
* @throw IllegalArgumentException if {@code o} is {@code null}.
*
* @return {@code true} iff the set was modified by this operation.
*/
@Override
public boolean add(final E o) {
if (o == null)
throw new IllegalArgumentException("the input argument cannot be null");
if (contains(o))
return false;
final int nextIndex = size();
elements.add(o);
indexByElement.put(o, nextIndex);
return true;
}
/**
* Removes an element from the set.
*
* <p>
* If the element was not present in the set, nothing happens and the method return false. However,
* if the element is new to this set, it will be assigned the next index available (equal to the size
* before addition).
* The method will return {@code true} in this case.
* </p>
*
* @param o the object to add.
*
* @throw IllegalArgumentException if {@code o} is {@code null}.
*
* @return {@code true} iff the set was modified by this operation.
*/ @Override
public boolean remove(final Object o) {
final int index = indexByElement.removeInt(o);
if (index == -1)
return false;
elements.remove(index);
indexByElement.remove(o);
final ListIterator<E> it = elements.listIterator(index);
int nextIndex = index;
while (it.hasNext())
indexByElement.put(it.next(),nextIndex++);
return true;
}
/**
* Removes all elements in the set.
*/
@Override
public void clear() {
elements.clear();
indexByElement.clear();
}
/**
* Compares this with another indexed set.
* @param o the other object to compare to.
* @return {@code false} unless {@code o} is a indexed-set that contains the same elements in the same order.
*/
@Override
public boolean equals(final Object o) {
if (o == this)
return true;
if (o == null)
return false;
if (!(o instanceof IndexedSet<?>))
return false;
final IndexedSet<?> other = (IndexedSet<?>)o;
return equals(other);
}
/**
* Compare to another indexed set.
*
* @param other the target indexed set.
*
* @throws java.lang.IllegalArgumentException if {@code other} is {@code null}.
*
* @return {@code true} iff {@other} is not {@code null}, and contains exactly the same elements
* (as compared using {@link Object#equals} a this set with matching indices.
*/
public boolean equals(final IndexedSet<?> other) {
if (other == null)
throw new IllegalArgumentException("other cannot be null");
final ArrayList<?> otherElements = other.elements;
final int elementCount = elements.size();
if (otherElements.size() != elementCount)
return false;
for (int i = 0; i < elementCount; i++)
if (!elements.get(i).equals(otherElements.get(i)))
return false;
return true;
}
@Override
public int hashCode() {
int result = 1;
for (final E element : elements)
result = 31 * result + (element == null ? 0 : element.hashCode());
return result;
}
/**
* Returns the element given its index within the set.
* @param index the target element's index.
*
* @throws IllegalArgumentException if {@code index} is not valid; in [0,{@link #size()}).
*
* @return never {@code null}; as null is not a valid element.
*/
public E get(final int index) {
checkIndex(index);
return elements.get(index);
}
/**
* Returns the index of an object.
* @param o the object of interest.
*
* @throws IllegalArgumentException if {@code o} is {@code null}.
*
* @return {@code -1} if such an object is not an element of this set, otherwise is index in the set thus a
* values within [0,{@link #size()}).
*/
public int indexOf(final E o) {
if (o == null)
throw new IllegalArgumentException("the query object cannot be null");
return indexByElement.containsKey(o) ? indexByElement.getInt(o) : -1;
}
}

View File

@ -0,0 +1,103 @@
/*
* Copyright (c) 2012 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.gatk.utils.collections;
import java.util.List;
/**
* Represent a permutation of a ordered set or list of elements.
*
* @author Valentin Ruano-Rubio &lt;valentin@broadinstitute.org&gt;
*/
public interface Permutation<E> {
/**
* Checks whether this permutation is a partial one of the original list.
*
* <p>
* A partial permutation is one in that no all original elements take part of.
* </p>
*
* @return {@code true} iff this is a partial permutation.
*/
public boolean isPartial();
/**
* Checks whether this is a trivial permutation where the resulting element list is the same as original.
*
* @return {@code true} iff the resulting element list is the same as the original.
*/
public boolean isNonPermuted();
/**
* Given an index on the original list, returns the position of tha element in the resulting list.
*
* @param fromIndex the query original element index.
*
* @throws IllegalArgumentException if {@code fromIndex} is not a valid index within the original list.
*
* @return -1 if that element is not part of the result (partial) permutation, otherwise some number between
* 0 and {@link #toSize()} - 1.
*/
public int toIndex(final int fromIndex);
/**
* Given an index on the resulting list, it gives you the index of that element on the original list.
* @param toIndex the query resulting list index.
*
* @throws IllegalArgumentException if {@code toIndex} is not a valid index, i.e. in [0,{@link #toSize()}-1).
*
* @return a value between 0 and {@link #fromSize()} - 1.
*/
public int fromIndex(final int toIndex);
/**
* Length of the original element list.
*
* @return 0 or greater.
*/
public int fromSize();
/**
* Length of the resulting element list.
*
* @return 0 or greater.
*/
public int toSize();
/**
* Returns an unmodifiable view to the original element list.
* @return never {@code null}.
*/
public List<E> fromList();
/**
* Returns an unmodifiable view to the original element list.
*
* @return never {@code null}.
*/
public List<E> toList();
}

View File

@ -45,9 +45,12 @@ import java.util.*;
*/
public class PerReadAlleleLikelihoodMap {
/** A set of all of the allele, so we can efficiently determine if an allele is already present */
private final Set<Allele> allelesSet = new HashSet<>();
private final Map<Allele,Integer> allelesSet = new HashMap<>();
/** A list of the unique allele, as an ArrayList so we can call get(i) efficiently */
protected final List<Allele> alleles = new ArrayList<>();
protected final Map<GATKSAMRecord, Map<Allele, Double>> likelihoodReadMap = new LinkedHashMap<>();
public PerReadAlleleLikelihoodMap() { }
@ -64,6 +67,10 @@ public class PerReadAlleleLikelihoodMap {
if ( likelihood == null ) throw new IllegalArgumentException("Likelihood cannot be null");
if ( likelihood > 0.0 ) throw new IllegalArgumentException("Likelihood must be negative (L = log(p))");
if (!allelesSet.containsKey(a)) {
allelesSet.put(a,alleles.size());
alleles.add(a);
}
Map<Allele,Double> likelihoodMap = likelihoodReadMap.get(read);
if (likelihoodMap == null){
// LinkedHashMap will ensure iterating through alleles will be in consistent order
@ -73,10 +80,7 @@ public class PerReadAlleleLikelihoodMap {
likelihoodMap.put(a,likelihood);
if (!allelesSet.contains(a)) {
allelesSet.add(a);
alleles.add(a);
}
}
public ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction) {
@ -198,7 +202,7 @@ public class PerReadAlleleLikelihoodMap {
* @return the log10 likelihood that this read matches this allele
*/
public double getLikelihoodAssociatedWithReadAndAllele(final GATKSAMRecord read, final Allele allele){
if (!allelesSet.contains(allele) || !likelihoodReadMap.containsKey(read))
if (!allelesSet.containsKey(allele) || !likelihoodReadMap.containsKey(read))
return 0.0;
return likelihoodReadMap.get(read).get(allele);
@ -381,7 +385,7 @@ public class PerReadAlleleLikelihoodMap {
* @return a non-null unmodifiable map
*/
public Set<Allele> getAllelesSet() {
return Collections.unmodifiableSet(allelesSet);
return Collections.unmodifiableSet(allelesSet.keySet());
}
/**

View File

@ -85,6 +85,24 @@ public class GATKVariantContextUtils {
return true;
}
/**
* Returns a homozygous call allele list given the only allele and the ploidy.
*
* @param allele the only allele in the allele list.
* @param ploidy the ploidy of the resulting allele list.
*
* @throws IllegalArgumentException if {@code allele} is {@code null} or ploidy is negative.
*
* @return never {@code null}.
*/
public static List<Allele> homozygousAlleleList(final Allele allele, final int ploidy) {
if (allele == null || ploidy < 0)
throw new IllegalArgumentException();
// Use a tailored inner class to implement the list:
return Collections.nCopies(ploidy,allele);
}
public enum GenotypeMergeType {
/**
* Make all sample genotypes unique by file. Each sample shared across RODs gets named sample.ROD.

View File

@ -26,20 +26,9 @@
package org.broadinstitute.gatk.utils;
import htsjdk.tribble.Tribble;
import htsjdk.tribble.util.TabixUtils;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.spi.LoggingEvent;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.tribble.readers.PositionalBufferedStream;
import org.broadinstitute.gatk.utils.commandline.CommandLineUtils;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.crypt.CryptUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.io.IOUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import htsjdk.tribble.util.TabixUtils;
import htsjdk.variant.bcf2.BCF2Codec;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
@ -47,6 +36,17 @@ import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFConstants;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLine;
import org.apache.log4j.AppenderSkeleton;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.apache.log4j.PatternLayout;
import org.apache.log4j.spi.LoggingEvent;
import org.broadinstitute.gatk.utils.collections.Pair;
import org.broadinstitute.gatk.utils.commandline.CommandLineUtils;
import org.broadinstitute.gatk.utils.crypt.CryptUtils;
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
import org.broadinstitute.gatk.utils.io.IOUtils;
import org.broadinstitute.gatk.utils.variant.GATKVCFUtils;
import org.testng.Assert;
import org.testng.Reporter;
import org.testng.SkipException;
@ -132,6 +132,7 @@ public abstract class BaseTest {
protected static final String publicTestDirRoot = publicTestDir.replace(publicTestDirRelative, "");
public static final String keysDataLocation = validationDataLocation + "keys/";
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
public static final String exampleFASTA = publicTestDir + "exampleFASTA.fasta";