Merge pull request #762 from broadinstitute/ks_refactor_mvn_packages
Refactor maven packages
This commit is contained in:
commit
2d7f906a27
25
pom.xml
25
pom.xml
|
|
@ -32,6 +32,7 @@
|
|||
<resource.bundle.skip>false</resource.bundle.skip>
|
||||
<!-- TODO: Need a better a way to say "don't include hidden" by default -->
|
||||
<gatkdocs.include.hidden>-build-timestamp "${maven.build.timestamp}"</gatkdocs.include.hidden>
|
||||
<gatk.shell.directory>${gatk.basedir}/public/src/main/scripts/shell</gatk.shell.directory>
|
||||
|
||||
<!--
|
||||
Phases of the build that may be disabled to speed up compilation.
|
||||
|
|
@ -173,7 +174,7 @@
|
|||
<outputDirectory>${project.reporting.outputDirectory}/apidocs</outputDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${gatk.basedir}/gatk-utils/src/main/config/org/broadinstitute/gatk/utils/help</directory>
|
||||
<directory>${gatk.basedir}/public/gatk-utils/src/main/config/org/broadinstitute/gatk/utils/help</directory>
|
||||
</resource>
|
||||
</resources>
|
||||
</configuration>
|
||||
|
|
@ -198,8 +199,7 @@
|
|||
<docletPath>${project.build.outputDirectory}</docletPath>
|
||||
<docletArtifact>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<!-- TODO: THIS IS SUPPOSED TO BE GATK-UTILS! -->
|
||||
<artifactId>gatk-tools-public</artifactId>
|
||||
<artifactId>gatk-utils</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</docletArtifact>
|
||||
<maxmemory>2g</maxmemory>
|
||||
|
|
@ -624,6 +624,21 @@
|
|||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<!--
|
||||
TODO: Separate maven modules into separate git repos?
|
||||
Until then, keep devs from accidentally mixing utils/engine/tools.
|
||||
-->
|
||||
<id>check-utils-engine-tools</id>
|
||||
<goals>
|
||||
<goal>exec</goal>
|
||||
</goals>
|
||||
<phase>process-sources</phase>
|
||||
<inherited>false</inherited>
|
||||
<configuration>
|
||||
<executable>${gatk.shell.directory}/check_utils_engine_tools.sh</executable>
|
||||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<!--
|
||||
TODO: Remove after 3.3+ release.
|
||||
|
|
@ -637,7 +652,7 @@
|
|||
<phase>process-test-resources</phase>
|
||||
<inherited>false</inherited>
|
||||
<configuration>
|
||||
<executable>${gatk.basedir}/public/src/main/scripts/shell/delete_maven_links.sh</executable>
|
||||
<executable>${gatk.shell.directory}/delete_maven_links.sh</executable>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
|
|
@ -689,7 +704,7 @@
|
|||
<!-- Only generate the GATK Docs across the parent aggregation, not the children too. -->
|
||||
<inherited>false</inherited>
|
||||
<configuration>
|
||||
<doclet>org.broadinstitute.gatk.utils.help.GATKDoclet</doclet>
|
||||
<doclet>org.broadinstitute.gatk.tools.walkers.help.WalkerDoclet</doclet>
|
||||
<docletArtifact>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>gatk-package-distribution</artifactId>
|
||||
|
|
|
|||
|
|
@ -43,6 +43,11 @@
|
|||
<artifactId>gatk-tools-protected</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
<!-- slf4j bindings must only be at the package level: http://www.slf4j.org/manual.html -->
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
</dependency>
|
||||
<!-- Tribble codecs & the variant package (VCF, BCF, and VariantContext) -->
|
||||
<dependency>
|
||||
<groupId>samtools</groupId>
|
||||
|
|
@ -73,7 +78,7 @@
|
|||
<!-- Required for binary-dist assembly, excluded by shade -->
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>gatk-engine</artifactId>
|
||||
<artifactId>gatk-utils</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>example-resources</classifier>
|
||||
<type>tar.bz2</type>
|
||||
|
|
|
|||
|
|
@ -41,6 +41,10 @@
|
|||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>picard</groupId>
|
||||
<artifactId>picard</artifactId>
|
||||
</dependency>
|
||||
<!--
|
||||
Extensions generator dependency only applies to the exec:exec,
|
||||
not the artifact, but don't know another way to include
|
||||
|
|
@ -66,7 +70,7 @@
|
|||
-->
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>gatk-tools-public</artifactId>
|
||||
<artifactId>gatk-utils</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
|
|
|
|||
|
|
@ -80,7 +80,7 @@
|
|||
<!-- Required for binary-dist assembly, excluded by shade -->
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>gatk-engine</artifactId>
|
||||
<artifactId>gatk-utils</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<classifier>example-resources</classifier>
|
||||
<type>tar.bz2</type>
|
||||
|
|
|
|||
|
|
@ -48,7 +48,15 @@
|
|||
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>gatk-tools-public</artifactId>
|
||||
<artifactId>gatk-utils</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>${project.groupId}</groupId>
|
||||
<artifactId>gatk-engine</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<type>test-jar</type>
|
||||
<scope>test</scope>
|
||||
|
|
|
|||
|
|
@ -1,232 +0,0 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.arguments;
|
||||
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorImplementation;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode;
|
||||
import org.broadinstitute.gatk.utils.collections.DefaultHashMap;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 8/20/12
|
||||
* A collection of arguments that are common to the various callers.
|
||||
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
|
||||
*/
|
||||
|
||||
public class StandardCallerArgumentCollection implements Cloneable {
|
||||
|
||||
@ArgumentCollection
|
||||
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
||||
|
||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||
public GenotypingOutputMode genotypingOutputMode = GenotypingOutputMode.DISCOVERY;
|
||||
|
||||
/**
|
||||
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
||||
*/
|
||||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
|
||||
public RodBinding<VariantContext> alleles;
|
||||
|
||||
/**
|
||||
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
|
||||
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
|
||||
* will try to remove (N * contamination fraction) bases for each alternate allele.
|
||||
*/
|
||||
@Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
|
||||
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
|
||||
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.0;
|
||||
|
||||
/**
|
||||
* This argument specifies a file with two columns "sample" and "contamination" specifying the contamination level for those samples.
|
||||
* Samples that do not appear in this file will be processed with CONTAMINATION_FRACTION.
|
||||
**/
|
||||
@Advanced
|
||||
@Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", required = false)
|
||||
public File CONTAMINATION_FRACTION_FILE = null;
|
||||
|
||||
/**
|
||||
* Indicates whether there is some sample contamination present.
|
||||
*/
|
||||
private boolean sampleContaminationWasLoaded = false;
|
||||
|
||||
/**
|
||||
*
|
||||
* @return an _Immutable_ copy of the Sample-Contamination Map, defaulting to CONTAMINATION_FRACTION so that if the sample isn't in the map map(sample)==CONTAMINATION_FRACTION
|
||||
*/
|
||||
public Map<String,Double> getSampleContamination(){
|
||||
//make sure that the default value is set up right
|
||||
sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
|
||||
if (!Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0)
|
||||
sampleContaminationWasLoaded = true;
|
||||
return Collections.unmodifiableMap(sampleContamination);
|
||||
}
|
||||
|
||||
public void setSampleContamination(DefaultHashMap<String, Double> sampleContamination) {
|
||||
this.sampleContamination.clear();
|
||||
this.sampleContaminationWasLoaded = !Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0;
|
||||
if (!sampleContaminationWasLoaded)
|
||||
for (final Double d : sampleContamination.values())
|
||||
if (!Double.isNaN(d) && d > 0.0) {
|
||||
sampleContaminationWasLoaded = true;
|
||||
break;
|
||||
}
|
||||
this.sampleContamination.putAll(sampleContamination);
|
||||
this.sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there is some sample contamination present, false otherwise.
|
||||
* @return {@code true} iff there is some sample contamination
|
||||
*/
|
||||
public boolean isSampleContaminationPresent() {
|
||||
return (!Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0) || sampleContaminationWasLoaded;
|
||||
}
|
||||
|
||||
//Needs to be here because it uses CONTAMINATION_FRACTION
|
||||
private DefaultHashMap<String,Double> sampleContamination = new DefaultHashMap<String,Double>(CONTAMINATION_FRACTION);
|
||||
|
||||
/**
|
||||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||
public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName = "logExactCalls", doc="x", required=false)
|
||||
public File exactCallsLog = null;
|
||||
|
||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||
public OutputMode outputMode = OutputMode.EMIT_VARIANTS_ONLY;
|
||||
|
||||
/**
|
||||
* Advanced, experimental argument: if SNP likelihood model is specified, and if EMIT_ALL_SITES output mode is set, when we set this argument then we will also emit PLs at all sites.
|
||||
* This will give a measure of reference confidence and a measure of which alt alleles are more plausible (if any).
|
||||
* WARNINGS:
|
||||
* - This feature will inflate VCF file size considerably.
|
||||
* - All SNP ALT alleles will be emitted with corresponding 10 PL values.
|
||||
* - An error will be emitted if EMIT_ALL_SITES is not set, or if anything other than diploid SNP model is used
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false)
|
||||
public boolean annotateAllSitesWithPLs = false;
|
||||
|
||||
/**
|
||||
* Creates a Standard caller argument collection with default values.
|
||||
*/
|
||||
public StandardCallerArgumentCollection() { }
|
||||
|
||||
/**
|
||||
* "Casts" a caller argument collection into another type.
|
||||
*
|
||||
* <p>Common fields values are copied across</p>
|
||||
* @param clazz the class of the result.
|
||||
* @param <T> result argument collection class.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public <T extends StandardCallerArgumentCollection> T cloneTo(final Class<T> clazz) {
|
||||
// short cut: just use regular clone if it happens to be the same class.
|
||||
if (clazz == getClass())
|
||||
return (T) clone();
|
||||
try {
|
||||
final T result = clazz.newInstance();
|
||||
for (final Field field : getClass().getFields()) {
|
||||
// just copy common fields.
|
||||
if (!field.getDeclaringClass().isAssignableFrom(clazz))
|
||||
continue;
|
||||
final int fieldModifiers = field.getModifiers();
|
||||
if ((fieldModifiers & UNCOPYABLE_MODIFIER_MASK) != 0) continue;
|
||||
//Use the clone() method if appropriate
|
||||
if (Cloneable.class.isAssignableFrom(field.getType())) {
|
||||
Method clone = field.getType().getMethod("clone");
|
||||
field.set(result, clone.invoke(field.get(this)));
|
||||
} else
|
||||
field.set(result,field.get(this));
|
||||
}
|
||||
return result;
|
||||
} catch (final Exception ex) {
|
||||
throw new IllegalStateException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a copy of this configuration.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
@Override
|
||||
public StandardCallerArgumentCollection clone() {
|
||||
try {
|
||||
StandardCallerArgumentCollection cloned = (StandardCallerArgumentCollection) super.clone();
|
||||
cloned.genotypeArgs = genotypeArgs.clone();
|
||||
return cloned;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new IllegalStateException("unreachable code");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds a modifiers mask that identifies those fields that cannot be copied between
|
||||
* StandardCallerArgumentCollections.
|
||||
*/
|
||||
private final int UNCOPYABLE_MODIFIER_MASK = Modifier.PRIVATE | Modifier.STATIC | Modifier.FINAL;
|
||||
}
|
||||
|
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.commandline.Gatherer;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* Date: 3/29/11
|
||||
*/
|
||||
|
||||
|
||||
public class BQSRGatherer extends Gatherer {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(BQSRGatherer.class);
|
||||
private static final String EMPTY_INPUT_LIST = "list of inputs files is empty or there is no usable data in any input file";
|
||||
private static final String MISSING_OUTPUT_FILE = "missing output file name";
|
||||
private static final String MISSING_READ_GROUPS = "Missing read group(s)";
|
||||
|
||||
@Override
|
||||
public void gather(final List<File> inputs, final File output) {
|
||||
final PrintStream outputFile;
|
||||
try {
|
||||
outputFile = new PrintStream(output);
|
||||
} catch(FileNotFoundException e) {
|
||||
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
|
||||
}
|
||||
final GATKReport report = gatherReport(inputs);
|
||||
report.print(outputFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gathers the input recalibration reports into a single report.
|
||||
*
|
||||
* @param inputs Input recalibration GATK reports
|
||||
* @return gathered recalibration GATK report
|
||||
*/
|
||||
public static GATKReport gatherReport(final List<File> inputs) {
|
||||
final SortedSet<String> allReadGroups = new TreeSet<String>();
|
||||
final LinkedHashMap<File, Set<String>> inputReadGroups = new LinkedHashMap<File, Set<String>>();
|
||||
|
||||
// Get the read groups from each input report
|
||||
for (final File input : inputs) {
|
||||
final Set<String> readGroups = RecalibrationReport.getReadGroups(input);
|
||||
inputReadGroups.put(input, readGroups);
|
||||
allReadGroups.addAll(readGroups);
|
||||
}
|
||||
|
||||
// Log the read groups that are missing from specific inputs
|
||||
for (Map.Entry<File, Set<String>> entry: inputReadGroups.entrySet()) {
|
||||
final File input = entry.getKey();
|
||||
final Set<String> readGroups = entry.getValue();
|
||||
if (allReadGroups.size() != readGroups.size()) {
|
||||
// Since this is not completely unexpected, more than debug, but less than a proper warning.
|
||||
logger.info(MISSING_READ_GROUPS + ": " + input.getAbsolutePath());
|
||||
for (final Object readGroup: CollectionUtils.subtract(allReadGroups, readGroups)) {
|
||||
logger.info(" " + readGroup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RecalibrationReport generalReport = null;
|
||||
for (File input : inputs) {
|
||||
final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups);
|
||||
if( inputReport.isEmpty() ) { continue; }
|
||||
|
||||
if (generalReport == null)
|
||||
generalReport = inputReport;
|
||||
else
|
||||
generalReport.combine(inputReport);
|
||||
}
|
||||
if (generalReport == null)
|
||||
throw new ReviewedGATKException(EMPTY_INPUT_LIST);
|
||||
|
||||
generalReport.calculateQuantizedQualities();
|
||||
|
||||
return generalReport.createGATKReport();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.WalkerManager;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* A ReadTransformer that applies BQSR on the fly to reads
|
||||
*
|
||||
* User: rpoplin
|
||||
* Date: 2/13/12
|
||||
*/
|
||||
public class BQSRReadTransformer extends ReadTransformer {
|
||||
private boolean enabled;
|
||||
private BaseRecalibration bqsr = null;
|
||||
|
||||
@Override
|
||||
public OrderingConstraint getOrderingConstraint() { return OrderingConstraint.MUST_BE_FIRST; }
|
||||
|
||||
@Override
|
||||
public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
|
||||
this.enabled = engine.hasBQSRArgumentSet();
|
||||
if ( enabled ) {
|
||||
// TODO -- See important note below about applying BQSR to a reduced BAM file:
|
||||
// If it is important to make sure that BQSR is not applied (as opposed to having the covariates computed) against a reduced bam file,
|
||||
// we need to figure out how to make this work. The problem is that the ReadTransformers are initialized before the ReadDataSource
|
||||
// inside the GenomeAnalysisEngine, so we generate a NPE when trying to retrieve the SAMFileHeaders. Ultimately, I don't think this is
|
||||
// a necessary check anyways since we disallow running BaseRecalibrator on reduced bams (so we can't generate the recal tables to use here).
|
||||
// Although we could add this check to the apply() method below, it's kind of ugly and inefficient.
|
||||
// The call here would be: RecalUtils.checkForInvalidRecalBams(engine.getSAMFileHeaders(), engine.getArguments().ALLOW_BQSR_ON_REDUCED_BAMS);
|
||||
final BQSRArgumentSet args = engine.getBQSRArgumentSet();
|
||||
this.bqsr = new BaseRecalibration(args.getRecalFile(), args.getQuantizationLevels(), args.shouldDisableIndelQuals(), args.getPreserveQscoresLessThan(), args.shouldEmitOriginalQuals(), args.getGlobalQScorePrior());
|
||||
}
|
||||
final BQSRMode mode = WalkerManager.getWalkerAnnotation(walker, BQSRMode.class);
|
||||
return mode.ApplicationTime();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean enabled() {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* initialize a new BQSRReadTransformer that applies BQSR on the fly to incoming reads.
|
||||
*/
|
||||
@Override
|
||||
public GATKSAMRecord apply(GATKSAMRecord read) {
|
||||
bqsr.recalibrateRead(read);
|
||||
return read;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,208 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import htsjdk.samtools.SAMTag;
|
||||
import htsjdk.samtools.SAMUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.recalibration.EventType;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Utility methods to facilitate on-the-fly base quality score recalibration.
|
||||
*
|
||||
* User: carneiro and rpoplin
|
||||
* Date: 2/4/12
|
||||
*/
|
||||
|
||||
public class BaseRecalibration {
|
||||
private static Logger logger = Logger.getLogger(BaseRecalibration.class);
|
||||
private final static boolean TEST_CACHING = false;
|
||||
|
||||
private final QuantizationInfo quantizationInfo; // histogram containing the map for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables;
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
|
||||
private final boolean disableIndelQuals;
|
||||
private final int preserveQLessThan;
|
||||
private final double globalQScorePrior;
|
||||
private final boolean emitOriginalQuals;
|
||||
|
||||
/**
|
||||
* Constructor using a GATK Report file
|
||||
*
|
||||
* @param RECAL_FILE a GATK Report file containing the recalibration information
|
||||
* @param quantizationLevels number of bins to quantize the quality scores
|
||||
* @param disableIndelQuals if true, do not emit base indel qualities
|
||||
* @param preserveQLessThan preserve quality scores less than this value
|
||||
*/
|
||||
public BaseRecalibration(final File RECAL_FILE, final int quantizationLevels, final boolean disableIndelQuals, final int preserveQLessThan, final boolean emitOriginalQuals, final double globalQScorePrior) {
|
||||
RecalibrationReport recalibrationReport = new RecalibrationReport(RECAL_FILE);
|
||||
|
||||
recalibrationTables = recalibrationReport.getRecalibrationTables();
|
||||
requestedCovariates = recalibrationReport.getRequestedCovariates();
|
||||
quantizationInfo = recalibrationReport.getQuantizationInfo();
|
||||
if (quantizationLevels == 0) // quantizationLevels == 0 means no quantization, preserve the quality scores
|
||||
quantizationInfo.noQuantization();
|
||||
else if (quantizationLevels > 0 && quantizationLevels != quantizationInfo.getQuantizationLevels()) // any other positive value means, we want a different quantization than the one pre-calculated in the recalibration report. Negative values mean the user did not provide a quantization argument, and just wants to use what's in the report.
|
||||
quantizationInfo.quantizeQualityScores(quantizationLevels);
|
||||
|
||||
this.disableIndelQuals = disableIndelQuals;
|
||||
this.preserveQLessThan = preserveQLessThan;
|
||||
this.globalQScorePrior = globalQScorePrior;
|
||||
this.emitOriginalQuals = emitOriginalQuals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recalibrates the base qualities of a read
|
||||
*
|
||||
* It updates the base qualities of the read with the new recalibrated qualities (for all event types)
|
||||
*
|
||||
* Implements a serial recalibration of the reads using the combinational table.
|
||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
||||
*
|
||||
* Given the full recalibration table, we perform the following preprocessing steps:
|
||||
*
|
||||
* - calculate the global quality score shift across all data [DeltaQ]
|
||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
||||
* - The final shift equation is:
|
||||
*
|
||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
||||
*
|
||||
* @param read the read to recalibrate
|
||||
*/
|
||||
public void recalibrateRead(final GATKSAMRecord read) {
|
||||
if (emitOriginalQuals && read.getAttribute(SAMTag.OQ.name()) == null) { // Save the old qualities if the tag isn't already taken in the read
|
||||
try {
|
||||
read.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(read.getBaseQualities()));
|
||||
} catch (IllegalArgumentException e) {
|
||||
throw new UserException.MalformedBAM(read, "illegal base quality encountered; " + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
final ReadCovariates readCovariates = RecalUtils.computeCovariates(read, requestedCovariates);
|
||||
final int readLength = read.getReadLength();
|
||||
|
||||
for (final EventType errorModel : EventType.values()) { // recalibrate all three quality strings
|
||||
if (disableIndelQuals && errorModel != EventType.BASE_SUBSTITUTION) {
|
||||
read.setBaseQualities(null, errorModel);
|
||||
continue;
|
||||
}
|
||||
|
||||
final byte[] quals = read.getBaseQualities(errorModel);
|
||||
|
||||
// get the keyset for this base using the error model
|
||||
final int[][] fullReadKeySet = readCovariates.getKeySet(errorModel);
|
||||
|
||||
// the rg key is constant over the whole read, the global deltaQ is too
|
||||
final int rgKey = fullReadKeySet[0][0];
|
||||
final RecalDatum empiricalQualRG = recalibrationTables.getReadGroupTable().get(rgKey, errorModel.ordinal());
|
||||
|
||||
if( empiricalQualRG != null ) {
|
||||
final double epsilon = ( globalQScorePrior > 0.0 && errorModel.equals(EventType.BASE_SUBSTITUTION) ? globalQScorePrior : empiricalQualRG.getEstimatedQReported() );
|
||||
|
||||
for (int offset = 0; offset < readLength; offset++) { // recalibrate all bases in the read
|
||||
final byte origQual = quals[offset];
|
||||
|
||||
// only recalibrate usable qualities (the original quality will come from the instrument -- reported quality)
|
||||
if ( origQual >= preserveQLessThan ) {
|
||||
// get the keyset for this base using the error model
|
||||
final int[] keySet = fullReadKeySet[offset];
|
||||
final RecalDatum empiricalQualQS = recalibrationTables.getQualityScoreTable().get(keySet[0], keySet[1], errorModel.ordinal());
|
||||
final List<RecalDatum> empiricalQualCovs = new ArrayList<RecalDatum>();
|
||||
for (int i = 2; i < requestedCovariates.length; i++) {
|
||||
if (keySet[i] < 0) {
|
||||
continue;
|
||||
}
|
||||
empiricalQualCovs.add(recalibrationTables.getTable(i).get(keySet[0], keySet[1], keySet[i], errorModel.ordinal()));
|
||||
}
|
||||
|
||||
double recalibratedQualDouble = hierarchicalBayesianQualityEstimate( epsilon, empiricalQualRG, empiricalQualQS, empiricalQualCovs );
|
||||
|
||||
// recalibrated quality is bound between 1 and MAX_QUAL
|
||||
final byte recalibratedQual = QualityUtils.boundQual(MathUtils.fastRound(recalibratedQualDouble), RecalDatum.MAX_RECALIBRATED_Q_SCORE);
|
||||
|
||||
// return the quantized version of the recalibrated quality
|
||||
final byte recalibratedQualityScore = quantizationInfo.getQuantizedQuals().get(recalibratedQual);
|
||||
|
||||
quals[offset] = recalibratedQualityScore;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finally update the base qualities in the read
|
||||
read.setBaseQualities(quals, errorModel);
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("result > 0.0")
|
||||
protected static double hierarchicalBayesianQualityEstimate( final double epsilon, final RecalDatum empiricalQualRG, final RecalDatum empiricalQualQS, final List<RecalDatum> empiricalQualCovs ) {
|
||||
final double globalDeltaQ = ( empiricalQualRG == null ? 0.0 : empiricalQualRG.getEmpiricalQuality(epsilon) - epsilon );
|
||||
final double deltaQReported = ( empiricalQualQS == null ? 0.0 : empiricalQualQS.getEmpiricalQuality(globalDeltaQ + epsilon) - (globalDeltaQ + epsilon) );
|
||||
double deltaQCovariates = 0.0;
|
||||
for( final RecalDatum empiricalQualCov : empiricalQualCovs ) {
|
||||
deltaQCovariates += ( empiricalQualCov == null ? 0.0 : empiricalQualCov.getEmpiricalQuality(deltaQReported + globalDeltaQ + epsilon) - (deltaQReported + globalDeltaQ + epsilon) );
|
||||
}
|
||||
|
||||
return epsilon + globalDeltaQ + deltaQReported + deltaQCovariates;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,500 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A general algorithm for quantizing quality score distributions to use a specific number of levels
|
||||
*
|
||||
* Takes a histogram of quality scores and a desired number of levels and produces a
|
||||
* map from original quality scores -> quantized quality scores.
|
||||
*
|
||||
* Note that this data structure is fairly heavy-weight, holding lots of debugging and
|
||||
* calculation information. If you want to use it efficiently at scale with lots of
|
||||
* read groups the right way to do this:
|
||||
*
|
||||
* Map<ReadGroup, List<Byte>> map
|
||||
* for each read group rg:
|
||||
* hist = getQualHist(rg)
|
||||
* QualQuantizer qq = new QualQuantizer(hist, nLevels, minInterestingQual)
|
||||
* map.set(rg, qq.getOriginalToQuantizedMap())
|
||||
*
|
||||
* This map would then be used to look up the appropriate original -> quantized
|
||||
* quals for each read as it comes in.
|
||||
*
|
||||
* @author Mark Depristo
|
||||
* @since 3/2/12
|
||||
*/
|
||||
public class QualQuantizer {
|
||||
final private static Set<QualInterval> MY_EMPTY_SET = Collections.emptySet();
|
||||
|
||||
private static Logger logger = Logger.getLogger(QualQuantizer.class);
|
||||
|
||||
/**
|
||||
* Inputs to the QualQuantizer
|
||||
*/
|
||||
final int nLevels, minInterestingQual;
|
||||
final List<Long> nObservationsPerQual;
|
||||
|
||||
/**
|
||||
* Map from original qual (e.g., Q30) to new quantized qual (e.g., Q28).
|
||||
*
|
||||
* Has the same range as nObservationsPerQual
|
||||
*/
|
||||
final List<Byte> originalToQuantizedMap;
|
||||
|
||||
/** Sorted set of qual intervals.
|
||||
*
|
||||
* After quantize() this data structure contains only the top-level qual intervals
|
||||
*/
|
||||
final TreeSet<QualInterval> quantizedIntervals;
|
||||
|
||||
/**
|
||||
* Protected creator for testng use only
|
||||
*/
|
||||
protected QualQuantizer(final int minInterestingQual) {
|
||||
this.nObservationsPerQual = Collections.emptyList();
|
||||
this.nLevels = 0;
|
||||
this.minInterestingQual = minInterestingQual;
|
||||
this.quantizedIntervals = null;
|
||||
this.originalToQuantizedMap = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a QualQuantizer for the histogram that has nLevels
|
||||
*
|
||||
* Note this is the only interface to the system. After creating this object
|
||||
* the map can be obtained via getOriginalToQuantizedMap()
|
||||
*
|
||||
* @param nObservationsPerQual A histogram of counts of bases with quality scores. Note that
|
||||
* this histogram must start at 0 (i.e., get(0) => count of Q0 bases) and must include counts all the
|
||||
* way up to the largest quality score possible in the reads. OK if the histogram includes many 0
|
||||
* count bins, as these are quantized for free.
|
||||
* @param nLevels the desired number of distinct quality scores to represent the full original range. Must
|
||||
* be at least 1.
|
||||
* @param minInterestingQual All quality scores <= this value are considered uninteresting and are freely
|
||||
* merged together. For example, if this value is 10, then Q0-Q10 are all considered free to merge, and
|
||||
* quantized into a single value. For ILMN data with lots of Q2 bases this results in a Q2 bin containing
|
||||
* all data with Q0-Q10.
|
||||
*/
|
||||
public QualQuantizer(final List<Long> nObservationsPerQual, final int nLevels, final int minInterestingQual) {
|
||||
this.nObservationsPerQual = nObservationsPerQual;
|
||||
this.nLevels = nLevels;
|
||||
this.minInterestingQual = minInterestingQual;
|
||||
|
||||
// some sanity checking
|
||||
if ( Collections.min(nObservationsPerQual) < 0 ) throw new ReviewedGATKException("Quality score histogram has negative values at: " + Utils.join(", ", nObservationsPerQual));
|
||||
if ( nLevels < 0 ) throw new ReviewedGATKException("nLevels must be >= 0");
|
||||
if ( minInterestingQual < 0 ) throw new ReviewedGATKException("minInterestingQual must be >= 0");
|
||||
|
||||
// actually run the quantizer
|
||||
this.quantizedIntervals = quantize();
|
||||
|
||||
// store the map
|
||||
this.originalToQuantizedMap = intervalsToMap(quantizedIntervals);
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents an contiguous interval of quality scores.
|
||||
*
|
||||
* qStart and qEnd are inclusive, so qStart = qEnd = 2 is the quality score bin of 2
|
||||
*/
|
||||
@Invariant({
|
||||
"qStart <= qEnd",
|
||||
"qStart >= 0",
|
||||
"qEnd <= 1000",
|
||||
"nObservations >= 0",
|
||||
"nErrors >= 0",
|
||||
"nErrors <= nObservations",
|
||||
"fixedQual >= -1 && fixedQual <= QualityUtils.MAX_SAM_QUAL_SCORE",
|
||||
"mergeOrder >= 0"})
|
||||
protected final class QualInterval implements Comparable<QualInterval> {
|
||||
final int qStart, qEnd, fixedQual, level;
|
||||
final long nObservations, nErrors;
|
||||
final Set<QualInterval> subIntervals;
|
||||
|
||||
/** for debugging / visualization. When was this interval created? */
|
||||
int mergeOrder;
|
||||
|
||||
protected QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level) {
|
||||
this(qStart, qEnd, nObservations, nErrors, level, -1, MY_EMPTY_SET);
|
||||
}
|
||||
|
||||
protected QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level, final Set<QualInterval> subIntervals) {
|
||||
this(qStart, qEnd, nObservations, nErrors, level, -1, subIntervals);
|
||||
}
|
||||
|
||||
protected QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level, final int fixedQual) {
|
||||
this(qStart, qEnd, nObservations, nErrors, level, fixedQual, MY_EMPTY_SET);
|
||||
}
|
||||
|
||||
@Requires("level >= 0")
|
||||
public QualInterval(final int qStart, final int qEnd, final long nObservations, final long nErrors, final int level, final int fixedQual, final Set<QualInterval> subIntervals) {
|
||||
this.qStart = qStart;
|
||||
this.qEnd = qEnd;
|
||||
this.nObservations = nObservations;
|
||||
this.nErrors = nErrors;
|
||||
this.fixedQual = fixedQual;
|
||||
this.level = level;
|
||||
this.mergeOrder = 0;
|
||||
this.subIntervals = Collections.unmodifiableSet(subIntervals);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Human readable name of this interval: e.g., 10-12
|
||||
*/
|
||||
public String getName() {
|
||||
return qStart + "-" + qEnd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "QQ:" + getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the error rate (in real space) of this interval, or 0 if there are no observations
|
||||
*/
|
||||
@Ensures("result >= 0.0")
|
||||
public double getErrorRate() {
|
||||
if ( hasFixedQual() )
|
||||
return QualityUtils.qualToErrorProb((byte)fixedQual);
|
||||
else if ( nObservations == 0 )
|
||||
return 0.0;
|
||||
else
|
||||
return (nErrors+1) / (1.0 * (nObservations+1));
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the QUAL of the error rate of this interval, or the fixed qual if this interval was created with a fixed qual.
|
||||
*/
|
||||
@Ensures("result >= 0 && result <= QualityUtils.MAX_SAM_QUAL_SCORE")
|
||||
public byte getQual() {
|
||||
if ( ! hasFixedQual() )
|
||||
return QualityUtils.errorProbToQual(getErrorRate());
|
||||
else
|
||||
return (byte)fixedQual;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if this bin is using a fixed qual
|
||||
*/
|
||||
public boolean hasFixedQual() {
|
||||
return fixedQual != -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final QualInterval qualInterval) {
|
||||
return Integer.valueOf(this.qStart).compareTo(qualInterval.qStart);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a interval representing the merge of this interval and toMerge
|
||||
*
|
||||
* Errors and observations are combined
|
||||
* Subintervals updated in order of left to right (determined by qStart)
|
||||
* Level is 1 + highest level of this and toMerge
|
||||
* Order must be updated elsewhere
|
||||
*
|
||||
* @param toMerge
|
||||
* @return newly created merged QualInterval
|
||||
*/
|
||||
@Requires({"toMerge != null"})
|
||||
@Ensures({
|
||||
"result != null",
|
||||
"result.nObservations >= this.nObservations",
|
||||
"result.nObservations >= toMerge.nObservations",
|
||||
"result.nErrors >= this.nErrors",
|
||||
"result.nErrors >= toMerge.nErrors",
|
||||
"result.qStart == Math.min(this.qStart, toMerge.qStart)",
|
||||
"result.qEnd == Math.max(this.qEnd, toMerge.qEnd)",
|
||||
"result.level > Math.max(this.level, toMerge.level)",
|
||||
"result.subIntervals.size() == 2"
|
||||
})
|
||||
public QualInterval merge(final QualInterval toMerge) {
|
||||
final QualInterval left = this.compareTo(toMerge) < 0 ? this : toMerge;
|
||||
final QualInterval right = this.compareTo(toMerge) < 0 ? toMerge : this;
|
||||
|
||||
if ( left.qEnd + 1 != right.qStart )
|
||||
throw new ReviewedGATKException("Attempting to merge non-contiguous intervals: left = " + left + " right = " + right);
|
||||
|
||||
final long nCombinedObs = left.nObservations + right.nObservations;
|
||||
final long nCombinedErr = left.nErrors + right.nErrors;
|
||||
|
||||
final int level = Math.max(left.level, right.level) + 1;
|
||||
final Set<QualInterval> subIntervals = new HashSet<QualInterval>(Arrays.asList(left, right));
|
||||
QualInterval merged = new QualInterval(left.qStart, right.qEnd, nCombinedObs, nCombinedErr, level, subIntervals);
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
public double getPenalty() {
|
||||
return calcPenalty(getErrorRate());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate the penalty of this interval, given the overall error rate for the interval
|
||||
*
|
||||
* If the globalErrorRate is e, this value is:
|
||||
*
|
||||
* sum_i |log10(e_i) - log10(e)| * nObservations_i
|
||||
*
|
||||
* each the index i applies to all leaves of the tree accessible from this interval
|
||||
* (found recursively from subIntervals as necessary)
|
||||
*
|
||||
* @param globalErrorRate overall error rate in real space against which we calculate the penalty
|
||||
* @return the cost of approximating the bins in this interval with the globalErrorRate
|
||||
*/
|
||||
@Requires("globalErrorRate >= 0.0")
|
||||
@Ensures("result >= 0.0")
|
||||
private double calcPenalty(final double globalErrorRate) {
|
||||
if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
|
||||
return 0.0;
|
||||
|
||||
if ( subIntervals.isEmpty() ) {
|
||||
// this is leave node
|
||||
if ( this.qEnd <= minInterestingQual )
|
||||
// It's free to merge up quality scores below the smallest interesting one
|
||||
return 0;
|
||||
else {
|
||||
return (Math.abs(Math.log10(getErrorRate()) - Math.log10(globalErrorRate))) * nObservations;
|
||||
}
|
||||
} else {
|
||||
double sum = 0;
|
||||
for ( final QualInterval interval : subIntervals )
|
||||
sum += interval.calcPenalty(globalErrorRate);
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main method for computing the quantization intervals.
|
||||
*
|
||||
* Invoked in the constructor after all input variables are initialized. Walks
|
||||
* over the inputs and builds the min. penalty forest of intervals with exactly nLevel
|
||||
* root nodes. Finds this min. penalty forest via greedy search, so is not guarenteed
|
||||
* to find the optimal combination.
|
||||
*
|
||||
* TODO: develop a smarter algorithm
|
||||
*
|
||||
* @return the forest of intervals with size == nLevels
|
||||
*/
|
||||
@Ensures({"! result.isEmpty()", "result.size() == nLevels"})
|
||||
private TreeSet<QualInterval> quantize() {
|
||||
// create intervals for each qual individually
|
||||
final TreeSet<QualInterval> intervals = new TreeSet<QualInterval>();
|
||||
for ( int qStart = 0; qStart < getNQualsInHistogram(); qStart++ ) {
|
||||
final long nObs = nObservationsPerQual.get(qStart);
|
||||
final double errorRate = QualityUtils.qualToErrorProb((byte)qStart);
|
||||
final double nErrors = nObs * errorRate;
|
||||
final QualInterval qi = new QualInterval(qStart, qStart, nObs, (int)Math.floor(nErrors), 0, (byte)qStart);
|
||||
intervals.add(qi);
|
||||
}
|
||||
|
||||
// greedy algorithm:
|
||||
// while ( n intervals >= nLevels ):
|
||||
// find intervals to merge with least penalty
|
||||
// merge it
|
||||
while ( intervals.size() > nLevels ) {
|
||||
mergeLowestPenaltyIntervals(intervals);
|
||||
}
|
||||
|
||||
return intervals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that finds and merges together the lowest penalty pair of intervals
|
||||
* @param intervals
|
||||
*/
|
||||
@Requires("! intervals.isEmpty()")
|
||||
private void mergeLowestPenaltyIntervals(final TreeSet<QualInterval> intervals) {
|
||||
// setup the iterators
|
||||
final Iterator<QualInterval> it1 = intervals.iterator();
|
||||
final Iterator<QualInterval> it1p = intervals.iterator();
|
||||
it1p.next(); // skip one
|
||||
|
||||
// walk over the pairs of left and right, keeping track of the pair with the lowest merge penalty
|
||||
QualInterval minMerge = null;
|
||||
if ( logger.isDebugEnabled() ) logger.debug("mergeLowestPenaltyIntervals: " + intervals.size());
|
||||
int lastMergeOrder = 0;
|
||||
while ( it1p.hasNext() ) {
|
||||
final QualInterval left = it1.next();
|
||||
final QualInterval right = it1p.next();
|
||||
final QualInterval merged = left.merge(right);
|
||||
lastMergeOrder = Math.max(Math.max(lastMergeOrder, left.mergeOrder), right.mergeOrder);
|
||||
if ( minMerge == null || (merged.getPenalty() < minMerge.getPenalty() ) ) {
|
||||
if ( logger.isDebugEnabled() ) logger.debug(" Updating merge " + minMerge);
|
||||
minMerge = merged;
|
||||
}
|
||||
}
|
||||
|
||||
// now actually go ahead and merge the minMerge pair
|
||||
if ( logger.isDebugEnabled() ) logger.debug(" => final min merge " + minMerge);
|
||||
intervals.removeAll(minMerge.subIntervals);
|
||||
intervals.add(minMerge);
|
||||
minMerge.mergeOrder = lastMergeOrder + 1;
|
||||
if ( logger.isDebugEnabled() ) logger.debug("updated intervals: " + intervals);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a final forest of intervals constructs a list mapping
|
||||
* list.get(i) => quantized qual to use for original quality score i
|
||||
*
|
||||
* This function should be called only once to initialize the corresponding
|
||||
* cached value in this object, as the calculation is a bit costly.
|
||||
*
|
||||
* @param intervals
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result.size() == getNQualsInHistogram()")
|
||||
private List<Byte> intervalsToMap(final TreeSet<QualInterval> intervals) {
|
||||
final List<Byte> map = new ArrayList<Byte>(getNQualsInHistogram());
|
||||
map.addAll(Collections.nCopies(getNQualsInHistogram(), Byte.MIN_VALUE));
|
||||
for ( final QualInterval interval : intervals ) {
|
||||
for ( int q = interval.qStart; q <= interval.qEnd; q++ ) {
|
||||
map.set(q, interval.getQual());
|
||||
}
|
||||
}
|
||||
|
||||
if ( Collections.min(map) == Byte.MIN_VALUE )
|
||||
throw new ReviewedGATKException("quantized quality score map contains an un-initialized value");
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
@Ensures("result > 0")
|
||||
private final int getNQualsInHistogram() {
|
||||
return nObservationsPerQual.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Write out a GATKReport to visualize the QualQuantization process of this data
|
||||
* @param out
|
||||
*/
|
||||
public void writeReport(PrintStream out) {
|
||||
final GATKReport report = new GATKReport();
|
||||
|
||||
addQualHistogramToReport(report);
|
||||
addIntervalsToReport(report);
|
||||
|
||||
report.print(out);
|
||||
}
|
||||
|
||||
private final void addQualHistogramToReport(final GATKReport report) {
|
||||
report.addTable("QualHistogram", "Quality score histogram provided to report", 2);
|
||||
GATKReportTable table = report.getTable("QualHistogram");
|
||||
|
||||
table.addColumn("qual");
|
||||
table.addColumn("count");
|
||||
|
||||
for ( int q = 0; q < nObservationsPerQual.size(); q++ ) {
|
||||
table.set(q, "qual", q);
|
||||
table.set(q, "count", nObservationsPerQual.get(q));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private final void addIntervalsToReport(final GATKReport report) {
|
||||
report.addTable("QualQuantizerIntervals", "Table of QualQuantizer quantization intervals", 10);
|
||||
GATKReportTable table = report.getTable("QualQuantizerIntervals");
|
||||
|
||||
table.addColumn("name");
|
||||
table.addColumn("qStart");
|
||||
table.addColumn("qEnd");
|
||||
table.addColumn("level");
|
||||
table.addColumn("merge.order");
|
||||
table.addColumn("nErrors");
|
||||
table.addColumn("nObservations");
|
||||
table.addColumn("qual");
|
||||
table.addColumn("penalty");
|
||||
table.addColumn("root.node");
|
||||
//table.addColumn("subintervals", "NA");
|
||||
|
||||
for ( QualInterval interval : quantizedIntervals )
|
||||
addIntervalToReport(table, interval, true);
|
||||
}
|
||||
|
||||
private final void addIntervalToReport(final GATKReportTable table, final QualInterval interval, final boolean atRootP) {
|
||||
final String name = interval.getName();
|
||||
table.set(name, "name", name);
|
||||
table.set(name, "qStart", interval.qStart);
|
||||
table.set(name, "qEnd", interval.qEnd);
|
||||
table.set(name, "level", interval.level);
|
||||
table.set(name, "merge.order", interval.mergeOrder);
|
||||
table.set(name, "nErrors", interval.nErrors);
|
||||
table.set(name, "nObservations", interval.nObservations);
|
||||
table.set(name, "qual", interval.getQual());
|
||||
table.set(name, "penalty", String.format("%.1f", interval.getPenalty()));
|
||||
table.set(name, "root.node", atRootP);
|
||||
|
||||
for ( final QualInterval sub : interval.subIntervals )
|
||||
addIntervalToReport(table, sub, false);
|
||||
}
|
||||
|
||||
public List<Byte> getOriginalToQuantizedMap() {
|
||||
return originalToQuantizedMap;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import org.broadinstitute.gatk.utils.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.collections.NestedIntegerArray;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Class that encapsulates the information necessary for quality score quantization for BQSR
|
||||
*
|
||||
* @author carneiro
|
||||
* @since 3/26/12
|
||||
*/
|
||||
public class QuantizationInfo {
|
||||
private List<Byte> quantizedQuals;
|
||||
private List<Long> empiricalQualCounts;
|
||||
private int quantizationLevels;
|
||||
|
||||
private QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts, int quantizationLevels) {
|
||||
this.quantizedQuals = quantizedQuals;
|
||||
this.empiricalQualCounts = empiricalQualCounts;
|
||||
this.quantizationLevels = quantizationLevels;
|
||||
}
|
||||
|
||||
public QuantizationInfo(List<Byte> quantizedQuals, List<Long> empiricalQualCounts) {
|
||||
this(quantizedQuals, empiricalQualCounts, calculateQuantizationLevels(quantizedQuals));
|
||||
}
|
||||
|
||||
public QuantizationInfo(final RecalibrationTables recalibrationTables, final int quantizationLevels) {
|
||||
final Long [] qualHistogram = new Long[QualityUtils.MAX_SAM_QUAL_SCORE +1]; // create a histogram with the empirical quality distribution
|
||||
for (int i = 0; i < qualHistogram.length; i++)
|
||||
qualHistogram[i] = 0L;
|
||||
|
||||
final NestedIntegerArray<RecalDatum> qualTable = recalibrationTables.getQualityScoreTable(); // get the quality score table
|
||||
|
||||
for (final RecalDatum value : qualTable.getAllValues()) {
|
||||
final RecalDatum datum = value;
|
||||
final int empiricalQual = MathUtils.fastRound(datum.getEmpiricalQuality()); // convert the empirical quality to an integer ( it is already capped by MAX_QUAL )
|
||||
qualHistogram[empiricalQual] += (long) datum.getNumObservations(); // add the number of observations for every key
|
||||
}
|
||||
empiricalQualCounts = Arrays.asList(qualHistogram); // histogram with the number of observations of the empirical qualities
|
||||
quantizeQualityScores(quantizationLevels);
|
||||
|
||||
this.quantizationLevels = quantizationLevels;
|
||||
}
|
||||
|
||||
|
||||
public void quantizeQualityScores(int nLevels) {
|
||||
QualQuantizer quantizer = new QualQuantizer(empiricalQualCounts, nLevels, QualityUtils.MIN_USABLE_Q_SCORE); // quantize the qualities to the desired number of levels
|
||||
quantizedQuals = quantizer.getOriginalToQuantizedMap(); // map with the original to quantized qual map (using the standard number of levels in the RAC)
|
||||
}
|
||||
|
||||
public void noQuantization() {
|
||||
this.quantizationLevels = QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
for (int i = 0; i < this.quantizationLevels; i++)
|
||||
quantizedQuals.set(i, (byte) i);
|
||||
}
|
||||
|
||||
public List<Byte> getQuantizedQuals() {
|
||||
return quantizedQuals;
|
||||
}
|
||||
|
||||
public int getQuantizationLevels() {
|
||||
return quantizationLevels;
|
||||
}
|
||||
|
||||
public GATKReportTable generateReportTable(boolean sortByCols) {
|
||||
GATKReportTable quantizedTable;
|
||||
if(sortByCols) {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
quantizedTable = new GATKReportTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE, "Quality quantization map", 3);
|
||||
}
|
||||
quantizedTable.addColumn(RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
quantizedTable.addColumn(RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
|
||||
for (int qual = 0; qual <= QualityUtils.MAX_SAM_QUAL_SCORE; qual++) {
|
||||
quantizedTable.set(qual, RecalUtils.QUALITY_SCORE_COLUMN_NAME, qual);
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME, empiricalQualCounts.get(qual));
|
||||
quantizedTable.set(qual, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME, quantizedQuals.get(qual));
|
||||
}
|
||||
return quantizedTable;
|
||||
}
|
||||
|
||||
private static int calculateQuantizationLevels(List<Byte> quantizedQuals) {
|
||||
byte lastByte = -1;
|
||||
int quantizationLevels = 0;
|
||||
for (byte q : quantizedQuals) {
|
||||
if (q != lastByte) {
|
||||
quantizationLevels++;
|
||||
lastByte = q;
|
||||
}
|
||||
}
|
||||
return quantizationLevels;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,176 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.LRUCache;
|
||||
import org.broadinstitute.gatk.utils.recalibration.EventType;
|
||||
|
||||
/**
|
||||
* The object temporarily held by a read that describes all of it's covariates.
|
||||
*
|
||||
* In essence, this is an array of CovariateValues, but it also has some functionality to deal with the optimizations of the NestedHashMap
|
||||
*
|
||||
* @author Mauricio Carneiro
|
||||
* @since 2/8/12
|
||||
*/
|
||||
public class ReadCovariates {
|
||||
private final static Logger logger = Logger.getLogger(ReadCovariates.class);
|
||||
|
||||
/**
|
||||
* How big should we let the LRU cache grow
|
||||
*/
|
||||
private static final int LRU_CACHE_SIZE = 500;
|
||||
|
||||
/**
|
||||
* Use an LRU cache to keep cache of keys (int[][][]) arrays for each read length we've seen.
|
||||
* The cache allows us to avoid the expense of recreating these arrays for every read. The LRU
|
||||
* keeps the total number of cached arrays to less than LRU_CACHE_SIZE.
|
||||
*
|
||||
* This is a thread local variable, so the total memory required may grow to N_THREADS x LRU_CACHE_SIZE
|
||||
*/
|
||||
private final static ThreadLocal<LRUCache<Integer, int[][][]>> keysCache = new ThreadLocal<LRUCache<Integer, int[][][]>>() {
|
||||
@Override protected LRUCache<Integer, int[][][]> initialValue() {
|
||||
return new LRUCache<Integer, int[][][]>(LRU_CACHE_SIZE);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* The keys cache is only valid for a single covariate count. Normally this will remain constant for the analysis.
|
||||
* If running multiple analyses (or the unit test suite), it's necessary to clear the cache.
|
||||
*/
|
||||
public static void clearKeysCache() {
|
||||
keysCache.remove();
|
||||
}
|
||||
|
||||
/**
|
||||
* Our keys, indexed by event type x read length x covariate
|
||||
*/
|
||||
private final int[][][] keys;
|
||||
|
||||
/**
|
||||
* The index of the current covariate, used by addCovariate
|
||||
*/
|
||||
private int currentCovariateIndex = 0;
|
||||
|
||||
public ReadCovariates(final int readLength, final int numberOfCovariates) {
|
||||
final LRUCache<Integer, int[][][]> cache = keysCache.get();
|
||||
final int[][][] cachedKeys = cache.get(readLength);
|
||||
if ( cachedKeys == null ) {
|
||||
// There's no cached value for read length so we need to create a new int[][][] array
|
||||
if ( logger.isDebugEnabled() ) logger.debug("Keys cache miss for length " + readLength + " cache size " + cache.size());
|
||||
keys = new int[EventType.values().length][readLength][numberOfCovariates];
|
||||
cache.put(readLength, keys);
|
||||
} else {
|
||||
keys = cachedKeys;
|
||||
}
|
||||
}
|
||||
|
||||
public void setCovariateIndex(final int index) {
|
||||
currentCovariateIndex = index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the keys for mismatch, insertion, and deletion for the current covariate at read offset
|
||||
*
|
||||
* NOTE: no checks are performed on the number of covariates, for performance reasons. If the count increases
|
||||
* after the keysCache has been accessed, this method will throw an ArrayIndexOutOfBoundsException. This currently
|
||||
* only occurs in the testing harness, and we don't anticipate that it will become a part of normal runs.
|
||||
*
|
||||
* @param mismatch the mismatch key value
|
||||
* @param insertion the insertion key value
|
||||
* @param deletion the deletion key value
|
||||
* @param readOffset the read offset, must be >= 0 and <= the read length used to create this ReadCovariates
|
||||
*/
|
||||
public void addCovariate(final int mismatch, final int insertion, final int deletion, final int readOffset) {
|
||||
keys[EventType.BASE_SUBSTITUTION.ordinal()][readOffset][currentCovariateIndex] = mismatch;
|
||||
keys[EventType.BASE_INSERTION.ordinal()][readOffset][currentCovariateIndex] = insertion;
|
||||
keys[EventType.BASE_DELETION.ordinal()][readOffset][currentCovariateIndex] = deletion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the keys for all covariates at read position for error model
|
||||
*
|
||||
* @param readPosition
|
||||
* @param errorModel
|
||||
* @return
|
||||
*/
|
||||
public int[] getKeySet(final int readPosition, final EventType errorModel) {
|
||||
return keys[errorModel.ordinal()][readPosition];
|
||||
}
|
||||
|
||||
public int[][] getKeySet(final EventType errorModel) {
|
||||
return keys[errorModel.ordinal()];
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// routines for testing
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
protected int[][] getMismatchesKeySet() { return getKeySet(EventType.BASE_SUBSTITUTION); }
|
||||
protected int[][] getInsertionsKeySet() { return getKeySet(EventType.BASE_INSERTION); }
|
||||
protected int[][] getDeletionsKeySet() { return getKeySet(EventType.BASE_DELETION); }
|
||||
|
||||
protected int[] getMismatchesKeySet(final int readPosition) {
|
||||
return getKeySet(readPosition, EventType.BASE_SUBSTITUTION);
|
||||
}
|
||||
|
||||
protected int[] getInsertionsKeySet(final int readPosition) {
|
||||
return getKeySet(readPosition, EventType.BASE_INSERTION);
|
||||
}
|
||||
|
||||
protected int[] getDeletionsKeySet(final int readPosition) {
|
||||
return getKeySet(readPosition, EventType.BASE_DELETION);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,434 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.samtools.SAMUtils;
|
||||
import org.apache.commons.math.optimization.fitting.GaussianFunction;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
|
||||
|
||||
/**
|
||||
* An individual piece of recalibration data. Each bin counts up the number of observations and the number
|
||||
* of reference mismatches seen for that combination of covariates.
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*/
|
||||
@Invariant({
|
||||
"estimatedQReported >= 0.0",
|
||||
"! Double.isNaN(estimatedQReported)",
|
||||
"! Double.isInfinite(estimatedQReported)",
|
||||
"empiricalQuality >= 0.0 || empiricalQuality == UNINITIALIZED",
|
||||
"! Double.isNaN(empiricalQuality)",
|
||||
"! Double.isInfinite(empiricalQuality)",
|
||||
"numObservations >= 0",
|
||||
"numMismatches >= 0",
|
||||
"numMismatches <= numObservations"
|
||||
})
|
||||
public class RecalDatum {
|
||||
public final static byte MAX_RECALIBRATED_Q_SCORE = SAMUtils.MAX_PHRED_SCORE;
|
||||
private static final double UNINITIALIZED = -1.0;
|
||||
|
||||
/**
|
||||
* estimated reported quality score based on combined data's individual q-reporteds and number of observations
|
||||
*/
|
||||
private double estimatedQReported;
|
||||
|
||||
/**
|
||||
* the empirical quality for datums that have been collapsed together (by read group and reported quality, for example)
|
||||
*/
|
||||
private double empiricalQuality;
|
||||
|
||||
/**
|
||||
* number of bases seen in total
|
||||
*/
|
||||
private long numObservations;
|
||||
|
||||
/**
|
||||
* number of bases seen that didn't match the reference
|
||||
*/
|
||||
private double numMismatches;
|
||||
|
||||
/**
|
||||
* used when calculating empirical qualities to avoid division by zero
|
||||
*/
|
||||
private static final int SMOOTHING_CONSTANT = 1;
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// constructors
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Create a new RecalDatum with given observation and mismatch counts, and an reported quality
|
||||
*
|
||||
* @param _numObservations observations
|
||||
* @param _numMismatches mismatches
|
||||
* @param reportedQuality Qreported
|
||||
*/
|
||||
public RecalDatum(final long _numObservations, final double _numMismatches, final byte reportedQuality) {
|
||||
if ( _numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||
if ( _numMismatches < 0.0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||
if ( reportedQuality < 0 ) throw new IllegalArgumentException("reportedQuality < 0");
|
||||
|
||||
numObservations = _numObservations;
|
||||
numMismatches = _numMismatches;
|
||||
estimatedQReported = reportedQuality;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy copy into this recal datum, overwriting all of this objects data
|
||||
* @param copy RecalDatum to copy
|
||||
*/
|
||||
public RecalDatum(final RecalDatum copy) {
|
||||
this.numObservations = copy.getNumObservations();
|
||||
this.numMismatches = copy.getNumMismatches();
|
||||
this.estimatedQReported = copy.estimatedQReported;
|
||||
this.empiricalQuality = copy.empiricalQuality;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add in all of the data from other into this object, updating the reported quality from the expected
|
||||
* error rate implied by the two reported qualities
|
||||
*
|
||||
* @param other RecalDatum to combine
|
||||
*/
|
||||
public synchronized void combine(final RecalDatum other) {
|
||||
final double sumErrors = this.calcExpectedErrors() + other.calcExpectedErrors();
|
||||
increment(other.getNumObservations(), other.getNumMismatches());
|
||||
estimatedQReported = -10 * Math.log10(sumErrors / getNumObservations());
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public synchronized void setEstimatedQReported(final double estimatedQReported) {
|
||||
if ( estimatedQReported < 0 ) throw new IllegalArgumentException("estimatedQReported < 0");
|
||||
if ( Double.isInfinite(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is infinite");
|
||||
if ( Double.isNaN(estimatedQReported) ) throw new IllegalArgumentException("estimatedQReported is NaN");
|
||||
|
||||
this.estimatedQReported = estimatedQReported;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public final double getEstimatedQReported() {
|
||||
return estimatedQReported;
|
||||
}
|
||||
public final byte getEstimatedQReportedAsByte() {
|
||||
return (byte)(int)(Math.round(getEstimatedQReported()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Empirical quality score -- derived from the num mismatches and observations
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns the error rate (in real space) of this interval, or 0 if there are no observations
|
||||
* @return the empirical error rate ~= N errors / N obs
|
||||
*/
|
||||
@Ensures({"result >= 0.0"})
|
||||
public double getEmpiricalErrorRate() {
|
||||
if ( numObservations == 0 )
|
||||
return 0.0;
|
||||
else {
|
||||
// cache the value so we don't call log over and over again
|
||||
final double doubleMismatches = numMismatches + SMOOTHING_CONSTANT;
|
||||
// smoothing is one error and one non-error observation, for example
|
||||
final double doubleObservations = numObservations + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT;
|
||||
return doubleMismatches / doubleObservations;
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized void setEmpiricalQuality(final double empiricalQuality) {
|
||||
if ( empiricalQuality < 0 ) throw new IllegalArgumentException("empiricalQuality < 0");
|
||||
if ( Double.isInfinite(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is infinite");
|
||||
if ( Double.isNaN(empiricalQuality) ) throw new IllegalArgumentException("empiricalQuality is NaN");
|
||||
|
||||
this.empiricalQuality = empiricalQuality;
|
||||
}
|
||||
|
||||
public final double getEmpiricalQuality() {
|
||||
return getEmpiricalQuality(getEstimatedQReported());
|
||||
}
|
||||
|
||||
public synchronized final double getEmpiricalQuality(final double conditionalPrior) {
|
||||
if (empiricalQuality == UNINITIALIZED) {
|
||||
calcEmpiricalQuality(conditionalPrior);
|
||||
}
|
||||
return empiricalQuality;
|
||||
}
|
||||
|
||||
public final byte getEmpiricalQualityAsByte() {
|
||||
return (byte)(Math.round(getEmpiricalQuality()));
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// toString methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%d,%.2f,%.2f", getNumObservations(), getNumMismatches(), getEmpiricalQuality());
|
||||
}
|
||||
|
||||
public String stringForCSV() {
|
||||
return String.format("%s,%.2f,%.2f", toString(), getEstimatedQReported(), getEmpiricalQuality() - getEstimatedQReported());
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// increment methods
|
||||
//
|
||||
//---------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public final long getNumObservations() {
|
||||
return numObservations;
|
||||
}
|
||||
|
||||
public final synchronized void setNumObservations(final long numObservations) {
|
||||
if ( numObservations < 0 ) throw new IllegalArgumentException("numObservations < 0");
|
||||
this.numObservations = numObservations;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
public final double getNumMismatches() {
|
||||
return numMismatches;
|
||||
}
|
||||
|
||||
@Requires({"numMismatches >= 0"})
|
||||
public final synchronized void setNumMismatches(final double numMismatches) {
|
||||
if ( numMismatches < 0 ) throw new IllegalArgumentException("numMismatches < 0");
|
||||
this.numMismatches = numMismatches;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires({"by >= 0"})
|
||||
public final synchronized void incrementNumObservations(final long by) {
|
||||
numObservations += by;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires({"by >= 0"})
|
||||
public final synchronized void incrementNumMismatches(final double by) {
|
||||
numMismatches += by;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Requires({"incObservations >= 0", "incMismatches >= 0"})
|
||||
@Ensures({"numObservations == old(numObservations) + incObservations", "numMismatches == old(numMismatches) + incMismatches"})
|
||||
public final synchronized void increment(final long incObservations, final double incMismatches) {
|
||||
numObservations += incObservations;
|
||||
numMismatches += incMismatches;
|
||||
empiricalQuality = UNINITIALIZED;
|
||||
}
|
||||
|
||||
@Ensures({"numObservations == old(numObservations) + 1", "numMismatches >= old(numMismatches)"})
|
||||
public final synchronized void increment(final boolean isError) {
|
||||
increment(1, isError ? 1.0 : 0.0);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// Private implementation helper functions
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* calculate the expected number of errors given the estimated Q reported and the number of observations
|
||||
* in this datum.
|
||||
*
|
||||
* @return a positive (potentially fractional) estimate of the number of errors
|
||||
*/
|
||||
@Ensures("result >= 0.0")
|
||||
private double calcExpectedErrors() {
|
||||
return getNumObservations() * QualityUtils.qualToErrorProb(estimatedQReported);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate and cache the empirical quality score from mismatches and observations (expensive operation)
|
||||
*/
|
||||
@Requires("empiricalQuality == UNINITIALIZED")
|
||||
@Ensures("empiricalQuality != UNINITIALIZED")
|
||||
private synchronized void calcEmpiricalQuality(final double conditionalPrior) {
|
||||
|
||||
// smoothing is one error and one non-error observation
|
||||
final long mismatches = (long)(getNumMismatches() + 0.5) + SMOOTHING_CONSTANT;
|
||||
final long observations = getNumObservations() + SMOOTHING_CONSTANT + SMOOTHING_CONSTANT;
|
||||
|
||||
final double empiricalQual = RecalDatum.bayesianEstimateOfEmpiricalQuality(observations, mismatches, conditionalPrior);
|
||||
|
||||
// This is the old and busted point estimate approach:
|
||||
//final double empiricalQual = -10 * Math.log10(getEmpiricalErrorRate());
|
||||
|
||||
empiricalQuality = Math.min(empiricalQual, (double) MAX_RECALIBRATED_Q_SCORE);
|
||||
}
|
||||
|
||||
//static final boolean DEBUG = false;
|
||||
static private final double RESOLUTION_BINS_PER_QUAL = 1.0;
|
||||
|
||||
static public double bayesianEstimateOfEmpiricalQuality(final long nObservations, final long nErrors, final double QReported) {
|
||||
|
||||
final int numBins = (QualityUtils.MAX_REASONABLE_Q_SCORE + 1) * (int)RESOLUTION_BINS_PER_QUAL;
|
||||
|
||||
final double[] log10Posteriors = new double[numBins];
|
||||
|
||||
for ( int bin = 0; bin < numBins; bin++ ) {
|
||||
|
||||
final double QEmpOfBin = bin / RESOLUTION_BINS_PER_QUAL;
|
||||
|
||||
log10Posteriors[bin] = log10QempPrior(QEmpOfBin, QReported) + log10QempLikelihood(QEmpOfBin, nObservations, nErrors);
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("bin = %d, Qreported = %f, nObservations = %f, nErrors = %f, posteriors = %f", bin, QReported, nObservations, nErrors, log10Posteriors[bin]));
|
||||
}
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qreported = %f, nObservations = %f, nErrors = %f", QReported, nObservations, nErrors));
|
||||
|
||||
final double[] normalizedPosteriors = MathUtils.normalizeFromLog10(log10Posteriors);
|
||||
final int MLEbin = MathUtils.maxElementIndex(normalizedPosteriors);
|
||||
|
||||
final double Qemp = MLEbin / RESOLUTION_BINS_PER_QUAL;
|
||||
return Qemp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Quals above this value should be capped down to this value (because they are too high)
|
||||
* in the base quality score recalibrator
|
||||
*/
|
||||
public final static byte MAX_GATK_USABLE_Q_SCORE = 40;
|
||||
static private final double[] log10QempPriorCache = new double[MAX_GATK_USABLE_Q_SCORE + 1];
|
||||
static {
|
||||
// f(x) = a + b*exp(-((x - c)^2 / (2*d^2)))
|
||||
// Note that b is the height of the curve's peak, c is the position of the center of the peak, and d controls the width of the "bell".
|
||||
final double GF_a = 0.0;
|
||||
final double GF_b = 0.9;
|
||||
final double GF_c = 0.0;
|
||||
final double GF_d = 0.5; // with these parameters, deltas can shift at most ~20 Q points
|
||||
|
||||
final GaussianFunction gaussian = new GaussianFunction(GF_a, GF_b, GF_c, GF_d);
|
||||
for ( int i = 0; i <= MAX_GATK_USABLE_Q_SCORE; i++ ) {
|
||||
double log10Prior = Math.log10(gaussian.value((double) i));
|
||||
if ( Double.isInfinite(log10Prior) )
|
||||
log10Prior = -Double.MAX_VALUE;
|
||||
log10QempPriorCache[i] = log10Prior;
|
||||
}
|
||||
}
|
||||
|
||||
static protected double log10QempPrior(final double Qempirical, final double Qreported) {
|
||||
final int difference = Math.min(Math.abs((int) (Qempirical - Qreported)), MAX_GATK_USABLE_Q_SCORE);
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qemp = %f, log10Priors = %f", Qempirical, log10QempPriorCache[difference]));
|
||||
return log10QempPriorCache[difference];
|
||||
}
|
||||
|
||||
static private final long MAX_NUMBER_OF_OBSERVATIONS = Integer.MAX_VALUE - 1;
|
||||
|
||||
static protected double log10QempLikelihood(final double Qempirical, long nObservations, long nErrors) {
|
||||
if ( nObservations == 0 )
|
||||
return 0.0;
|
||||
|
||||
// the binomial code requires ints as input (because it does caching). This should theoretically be fine because
|
||||
// there is plenty of precision in 2^31 observations, but we need to make sure that we don't have overflow
|
||||
// before casting down to an int.
|
||||
if ( nObservations > MAX_NUMBER_OF_OBSERVATIONS ) {
|
||||
// we need to decrease nErrors by the same fraction that we are decreasing nObservations
|
||||
final double fraction = (double)MAX_NUMBER_OF_OBSERVATIONS / (double)nObservations;
|
||||
nErrors = Math.round((double)nErrors * fraction);
|
||||
nObservations = MAX_NUMBER_OF_OBSERVATIONS;
|
||||
}
|
||||
|
||||
// this is just a straight binomial PDF
|
||||
double log10Prob = MathUtils.log10BinomialProbability((int)nObservations, (int)nErrors, QualityUtils.qualToErrorProbLog10(Qempirical));
|
||||
if ( Double.isInfinite(log10Prob) || Double.isNaN(log10Prob) )
|
||||
log10Prob = -Double.MAX_VALUE;
|
||||
|
||||
//if ( DEBUG )
|
||||
// System.out.println(String.format("Qemp = %f, log10Likelihood = %f", Qempirical, log10Prob));
|
||||
|
||||
return log10Prob;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,582 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.math.MathException;
|
||||
import org.apache.commons.math.stat.inference.ChiSquareTestImpl;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* A tree of recal datum, where each contains a set of sub datum representing sub-states of the higher level one
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 07/27/12
|
||||
*/
|
||||
public class RecalDatumNode<T extends RecalDatum> {
|
||||
private final static double SMALLEST_CHI2_PVALUE = 1e-300;
|
||||
protected static final Logger logger = Logger.getLogger(RecalDatumNode.class);
|
||||
|
||||
/**
|
||||
* fixedPenalty is this value if it's considered fixed
|
||||
*/
|
||||
private final static double UNINITIALIZED = Double.NEGATIVE_INFINITY;
|
||||
|
||||
private final T recalDatum;
|
||||
private double fixedPenalty = UNINITIALIZED;
|
||||
private final Set<RecalDatumNode<T>> subnodes;
|
||||
|
||||
@Requires({"recalDatum != null"})
|
||||
public RecalDatumNode(final T recalDatum) {
|
||||
this(recalDatum, new HashSet<RecalDatumNode<T>>());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return recalDatum.toString();
|
||||
}
|
||||
|
||||
@Requires({"recalDatum != null", "subnodes != null"})
|
||||
public RecalDatumNode(final T recalDatum, final Set<RecalDatumNode<T>> subnodes) {
|
||||
this(recalDatum, UNINITIALIZED, subnodes);
|
||||
}
|
||||
|
||||
@Requires({"recalDatum != null"})
|
||||
protected RecalDatumNode(final T recalDatum, final double fixedPenalty) {
|
||||
this(recalDatum, fixedPenalty, new HashSet<RecalDatumNode<T>>());
|
||||
}
|
||||
|
||||
@Requires({"recalDatum != null", "subnodes != null"})
|
||||
protected RecalDatumNode(final T recalDatum, final double fixedPenalty, final Set<RecalDatumNode<T>> subnodes) {
|
||||
this.recalDatum = recalDatum;
|
||||
this.fixedPenalty = fixedPenalty;
|
||||
this.subnodes = new HashSet<RecalDatumNode<T>>(subnodes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the recal data associated with this node
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public T getRecalDatum() {
|
||||
return recalDatum;
|
||||
}
|
||||
|
||||
/**
|
||||
* The set of all subnodes of this tree. May be modified.
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public Set<RecalDatumNode<T>> getSubnodes() {
|
||||
return subnodes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the fixed penalty, if set, or else the the calculated penalty for this node
|
||||
* @return
|
||||
*/
|
||||
public double getPenalty() {
|
||||
if ( fixedPenalty != UNINITIALIZED )
|
||||
return fixedPenalty;
|
||||
else
|
||||
return calcPenalty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the fixed penalty for this node to a fresh calculation from calcPenalty
|
||||
*
|
||||
* This is important in the case where you want to compute the penalty from a full
|
||||
* tree and then chop the tree up afterwards while considering the previous penalties.
|
||||
* If you don't call this function then manipulating the tree may result in the
|
||||
* penalty functions changing with changes in the tree.
|
||||
*
|
||||
* @param doEntireTree recurse into all subnodes?
|
||||
* @return the fixed penalty for this node
|
||||
*/
|
||||
public double calcAndSetFixedPenalty(final boolean doEntireTree) {
|
||||
fixedPenalty = calcPenalty();
|
||||
if ( doEntireTree )
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
sub.calcAndSetFixedPenalty(doEntireTree);
|
||||
return fixedPenalty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add node to the set of subnodes of this node
|
||||
* @param sub
|
||||
*/
|
||||
@Requires("sub != null")
|
||||
public void addSubnode(final RecalDatumNode<T> sub) {
|
||||
subnodes.add(sub);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this a leaf node (i.e., has no subnodes)?
|
||||
* @return
|
||||
*/
|
||||
public boolean isLeaf() {
|
||||
return subnodes.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this node immediately above only leaf nodes?
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public boolean isAboveOnlyLeaves() {
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
if ( ! sub.isLeaf() )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* What's the immediate number of subnodes from this node?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int getNumSubnodes() {
|
||||
return subnodes.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Total penalty is the sum of leaf node penalties
|
||||
*
|
||||
* This algorithm assumes that penalties have been fixed before pruning, as leaf nodes by
|
||||
* definition have 0 penalty unless they represent a pruned tree with underlying -- but now
|
||||
* pruned -- subtrees
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public double totalPenalty() {
|
||||
if ( isLeaf() )
|
||||
return getPenalty();
|
||||
else {
|
||||
double sum = 0.0;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
sum += sub.totalPenalty();
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum penalty among all nodes
|
||||
* @return
|
||||
*/
|
||||
public double maxPenalty(final boolean leafOnly) {
|
||||
double max = ! leafOnly || isLeaf() ? getPenalty() : Double.MIN_VALUE;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
max = Math.max(max, sub.maxPenalty(leafOnly));
|
||||
return max;
|
||||
}
|
||||
|
||||
/**
|
||||
* The minimum penalty among all nodes
|
||||
* @return
|
||||
*/
|
||||
public double minPenalty(final boolean leafOnly) {
|
||||
double min = ! leafOnly || isLeaf() ? getPenalty() : Double.MAX_VALUE;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
min = Math.min(min, sub.minPenalty(leafOnly));
|
||||
return min;
|
||||
}
|
||||
|
||||
/**
|
||||
* What's the longest branch from this node to any leaf?
|
||||
* @return
|
||||
*/
|
||||
public int maxDepth() {
|
||||
int subMax = 0;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
subMax = Math.max(subMax, sub.maxDepth());
|
||||
return subMax + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* What's the shortest branch from this node to any leaf? Includes this node
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result > 0")
|
||||
public int minDepth() {
|
||||
if ( isLeaf() )
|
||||
return 1;
|
||||
else {
|
||||
int subMin = Integer.MAX_VALUE;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
subMin = Math.min(subMin, sub.minDepth());
|
||||
return subMin + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the number of nodes, including this one, reachable from this node
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result > 0")
|
||||
public int size() {
|
||||
int size = 1;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
size += sub.size();
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Count the number of leaf nodes reachable from this node
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public int numLeaves() {
|
||||
if ( isLeaf() )
|
||||
return 1;
|
||||
else {
|
||||
int size = 0;
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
size += sub.numLeaves();
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the phred-scaled p-value for a chi^2 test for independent among subnodes of this node.
|
||||
*
|
||||
* The chi^2 value indicates the degree of independence of the implied error rates among the
|
||||
* immediate subnodes
|
||||
*
|
||||
* @return the phred-scaled p-value for chi2 penalty, or 0.0 if it cannot be calculated
|
||||
*/
|
||||
private double calcPenalty() {
|
||||
if ( isLeaf() || freeToMerge() )
|
||||
return 0.0;
|
||||
else if ( subnodes.size() == 1 )
|
||||
// only one value, so its free to merge away
|
||||
return 0.0;
|
||||
else {
|
||||
final long[][] counts = new long[subnodes.size()][2];
|
||||
|
||||
int i = 0;
|
||||
for ( final RecalDatumNode<T> subnode : subnodes ) {
|
||||
// use the yates correction to help avoid all zeros => NaN
|
||||
counts[i][0] = Math.round(subnode.getRecalDatum().getNumMismatches()) + 1L;
|
||||
counts[i][1] = subnode.getRecalDatum().getNumObservations() + 2L;
|
||||
i++;
|
||||
}
|
||||
|
||||
try {
|
||||
final double chi2PValue = new ChiSquareTestImpl().chiSquareTest(counts);
|
||||
final double penalty = -10.0 * Math.log10(Math.max(chi2PValue, SMALLEST_CHI2_PVALUE));
|
||||
|
||||
// make sure things are reasonable and fail early if not
|
||||
if (Double.isInfinite(penalty) || Double.isNaN(penalty))
|
||||
throw new ReviewedGATKException("chi2 value is " + chi2PValue + " at " + getRecalDatum());
|
||||
|
||||
return penalty;
|
||||
} catch ( MathException e ) {
|
||||
throw new ReviewedGATKException("Failed in calculating chi2 value", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this node free to merge because its rounded Q score is the same as all nodes below
|
||||
* @return
|
||||
*/
|
||||
private boolean freeToMerge() {
|
||||
if ( isLeaf() ) // leaves are free to merge
|
||||
return true;
|
||||
else {
|
||||
final byte myQual = getRecalDatum().getEmpiricalQualityAsByte();
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
if ( sub.getRecalDatum().getEmpiricalQualityAsByte() != myQual )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the penalty of this interval, given the overall error rate for the interval
|
||||
*
|
||||
* If the globalErrorRate is e, this value is:
|
||||
*
|
||||
* sum_i |log10(e_i) - log10(e)| * nObservations_i
|
||||
*
|
||||
* each the index i applies to all leaves of the tree accessible from this interval
|
||||
* (found recursively from subnodes as necessary)
|
||||
*
|
||||
* @param globalErrorRate overall error rate in real space against which we calculate the penalty
|
||||
* @return the cost of approximating the bins in this interval with the globalErrorRate
|
||||
*/
|
||||
@Requires("globalErrorRate >= 0.0")
|
||||
@Ensures("result >= 0.0")
|
||||
private double calcPenaltyLog10(final double globalErrorRate) {
|
||||
if ( globalErrorRate == 0.0 ) // there were no observations, so there's no penalty
|
||||
return 0.0;
|
||||
|
||||
if ( isLeaf() ) {
|
||||
// this is leave node
|
||||
return (Math.abs(Math.log10(recalDatum.getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * (double)recalDatum.getNumObservations();
|
||||
// TODO -- how we can generalize this calculation?
|
||||
// if ( this.qEnd <= minInterestingQual )
|
||||
// // It's free to merge up quality scores below the smallest interesting one
|
||||
// return 0;
|
||||
// else {
|
||||
// return (Math.abs(Math.log10(getEmpiricalErrorRate()) - Math.log10(globalErrorRate))) * getNumObservations();
|
||||
// }
|
||||
} else {
|
||||
double sum = 0;
|
||||
for ( final RecalDatumNode<T> hrd : subnodes)
|
||||
sum += hrd.calcPenaltyLog10(globalErrorRate);
|
||||
return sum;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a freshly allocated tree prunes to have no more than maxDepth from the root to any leaf
|
||||
*
|
||||
* @param maxDepth
|
||||
* @return
|
||||
*/
|
||||
public RecalDatumNode<T> pruneToDepth(final int maxDepth) {
|
||||
if ( maxDepth < 1 )
|
||||
throw new IllegalArgumentException("maxDepth < 1");
|
||||
else {
|
||||
final Set<RecalDatumNode<T>> subPruned = new HashSet<RecalDatumNode<T>>(getNumSubnodes());
|
||||
if ( maxDepth > 1 )
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
subPruned.add(sub.pruneToDepth(maxDepth - 1));
|
||||
return new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, subPruned);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a freshly allocated tree with to no more than maxElements in order of penalty
|
||||
*
|
||||
* Note that nodes must have fixed penalties to this algorithm will fail.
|
||||
*
|
||||
* @param maxElements
|
||||
* @return
|
||||
*/
|
||||
public RecalDatumNode<T> pruneByPenalty(final int maxElements) {
|
||||
RecalDatumNode<T> root = this;
|
||||
|
||||
while ( root.size() > maxElements ) {
|
||||
// remove the lowest penalty element, and continue
|
||||
root = root.removeLowestPenaltyNode();
|
||||
}
|
||||
|
||||
// our size is below the target, so we are good, return
|
||||
return root;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a freshly allocated tree where all mergable nodes with < maxPenalty are merged
|
||||
*
|
||||
* Note that nodes must have fixed penalties to this algorithm will fail.
|
||||
*
|
||||
* @param maxPenaltyIn the maximum penalty we are allowed to incur for a merge
|
||||
* @param applyBonferroniCorrection if true, we will adjust penalty by the phred-scaled bonferroni correction
|
||||
* for the size of the initial tree. That is, if there are 10 nodes in the
|
||||
* tree and maxPenalty is 20 we will actually enforce 10^-2 / 10 = 10^-3 = 30
|
||||
* penalty for multiple testing
|
||||
* @return
|
||||
*/
|
||||
public RecalDatumNode<T> pruneToNoMoreThanPenalty(final double maxPenaltyIn, final boolean applyBonferroniCorrection) {
|
||||
RecalDatumNode<T> root = this;
|
||||
|
||||
final double bonferroniCorrection = 10 * Math.log10(this.size());
|
||||
final double maxPenalty = applyBonferroniCorrection ? maxPenaltyIn + bonferroniCorrection : maxPenaltyIn;
|
||||
|
||||
if ( applyBonferroniCorrection )
|
||||
logger.info(String.format("Applying Bonferroni correction for %d nodes = %.2f to initial penalty %.2f for total " +
|
||||
"corrected max penalty of %.2f", this.size(), bonferroniCorrection, maxPenaltyIn, maxPenalty));
|
||||
|
||||
while ( true ) {
|
||||
final Pair<RecalDatumNode<T>, Double> minPenaltyNode = root.getMinPenaltyAboveLeafNode();
|
||||
|
||||
if ( minPenaltyNode == null || minPenaltyNode.getSecond() > maxPenalty ) {
|
||||
// nothing to merge, or the best candidate is above our max allowed
|
||||
if ( minPenaltyNode == null ) {
|
||||
if ( logger.isDebugEnabled() ) logger.debug("Stopping because no candidates could be found");
|
||||
} else {
|
||||
if ( logger.isDebugEnabled() ) logger.debug("Stopping because node " + minPenaltyNode.getFirst() + " has penalty " + minPenaltyNode.getSecond() + " > max " + maxPenalty);
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
// remove the lowest penalty element, and continue
|
||||
if ( logger.isDebugEnabled() ) logger.debug("Removing node " + minPenaltyNode.getFirst() + " with penalty " + minPenaltyNode.getSecond());
|
||||
root = root.removeLowestPenaltyNode();
|
||||
}
|
||||
}
|
||||
|
||||
// no more candidates exist with penalty < maxPenalty
|
||||
return root;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Find the lowest penalty above leaf node in the tree, and return a tree without it
|
||||
*
|
||||
* Note this excludes the current (root) node
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private RecalDatumNode<T> removeLowestPenaltyNode() {
|
||||
final Pair<RecalDatumNode<T>, Double> nodeToRemove = getMinPenaltyAboveLeafNode();
|
||||
if ( logger.isDebugEnabled() )
|
||||
logger.debug("Removing " + nodeToRemove.getFirst() + " with penalty " + nodeToRemove.getSecond());
|
||||
|
||||
final Pair<RecalDatumNode<T>, Boolean> result = removeNode(nodeToRemove.getFirst());
|
||||
|
||||
if ( ! result.getSecond() )
|
||||
throw new IllegalStateException("Never removed any node!");
|
||||
|
||||
final RecalDatumNode<T> oneRemoved = result.getFirst();
|
||||
if ( oneRemoved == null )
|
||||
throw new IllegalStateException("Removed our root node, wow, didn't expect that");
|
||||
return oneRemoved;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds in the tree the node with the lowest penalty whose subnodes are all leaves
|
||||
*
|
||||
* @return the node and its penalty, or null if no such node exists
|
||||
*/
|
||||
private Pair<RecalDatumNode<T>, Double> getMinPenaltyAboveLeafNode() {
|
||||
if ( isLeaf() )
|
||||
// not allowed to remove leafs directly
|
||||
return null;
|
||||
if ( isAboveOnlyLeaves() )
|
||||
// we only consider removing nodes above all leaves
|
||||
return new Pair<RecalDatumNode<T>, Double>(this, getPenalty());
|
||||
else {
|
||||
// just recurse, taking the result with the min penalty of all subnodes
|
||||
Pair<RecalDatumNode<T>, Double> minNode = null;
|
||||
for ( final RecalDatumNode<T> sub : subnodes ) {
|
||||
final Pair<RecalDatumNode<T>, Double> subFind = sub.getMinPenaltyAboveLeafNode();
|
||||
if ( subFind != null && (minNode == null || subFind.getSecond() < minNode.getSecond()) ) {
|
||||
minNode = subFind;
|
||||
}
|
||||
}
|
||||
return minNode;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a freshly allocated tree without the node nodeToRemove
|
||||
*
|
||||
* @param nodeToRemove
|
||||
* @return
|
||||
*/
|
||||
private Pair<RecalDatumNode<T>, Boolean> removeNode(final RecalDatumNode<T> nodeToRemove) {
|
||||
if ( this == nodeToRemove ) {
|
||||
if ( isLeaf() )
|
||||
throw new IllegalStateException("Trying to remove a leaf node from the tree! " + this + " " + nodeToRemove);
|
||||
// node is the thing we are going to remove, but without any subnodes
|
||||
final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty);
|
||||
return new Pair<RecalDatumNode<T>, Boolean>(node, true);
|
||||
} else {
|
||||
// did we remove something in a sub branch?
|
||||
boolean removedSomething = false;
|
||||
|
||||
// our sub nodes with the penalty node removed
|
||||
final Set<RecalDatumNode<T>> sub = new HashSet<RecalDatumNode<T>>(getNumSubnodes());
|
||||
|
||||
for ( final RecalDatumNode<T> sub1 : subnodes ) {
|
||||
if ( removedSomething ) {
|
||||
// already removed something, just add sub1 back to sub
|
||||
sub.add(sub1);
|
||||
} else {
|
||||
// haven't removed anything yet, so try
|
||||
final Pair<RecalDatumNode<T>, Boolean> maybeRemoved = sub1.removeNode(nodeToRemove);
|
||||
removedSomething = maybeRemoved.getSecond();
|
||||
sub.add(maybeRemoved.getFirst());
|
||||
}
|
||||
}
|
||||
|
||||
final RecalDatumNode<T> node = new RecalDatumNode<T>(getRecalDatum(), fixedPenalty, sub);
|
||||
return new Pair<RecalDatumNode<T>, Boolean>(node, removedSomething);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a collection of all of the data in the leaf nodes of this tree
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public Collection<T> getAllLeaves() {
|
||||
final LinkedList<T> list = new LinkedList<T>();
|
||||
getAllLeavesRec(list);
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helpful recursive function for getAllLeaves()
|
||||
*
|
||||
* @param list the destination for the list of leaves
|
||||
*/
|
||||
private void getAllLeavesRec(final LinkedList<T> list) {
|
||||
if ( isLeaf() )
|
||||
list.add(getRecalDatum());
|
||||
else {
|
||||
for ( final RecalDatumNode<T> sub : subnodes )
|
||||
sub.getAllLeavesRec(list);
|
||||
}
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,419 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.GATKException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 27, 2009
|
||||
*
|
||||
* A collection of the arguments that are used for BQSR. Used to be common to both CovariateCounterWalker and TableRecalibrationWalker.
|
||||
* This set of arguments will also be passed to the constructor of every Covariate when it is instantiated.
|
||||
*/
|
||||
|
||||
public class RecalibrationArgumentCollection implements Cloneable {
|
||||
|
||||
/**
|
||||
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
|
||||
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.)
|
||||
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
|
||||
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
|
||||
*/
|
||||
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
|
||||
public List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
|
||||
*/
|
||||
@Gather(BQSRGatherer.class)
|
||||
@Output(doc = "The output recalibration table file to create", required = true)
|
||||
public File RECAL_TABLE_FILE = null;
|
||||
public PrintStream RECAL_TABLE;
|
||||
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work.
|
||||
*/
|
||||
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
|
||||
public boolean LIST_ONLY = false;
|
||||
|
||||
/**
|
||||
* Note that the ReadGroup and QualityScore covariates are required and do not need to be specified.
|
||||
* Also, unless --no_standard_covs is specified, the Cycle and Context covariates are standard and are included by default.
|
||||
* Use the --list argument to see the available covariates.
|
||||
*/
|
||||
@Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false)
|
||||
public String[] COVARIATES = null;
|
||||
|
||||
/*
|
||||
* The Cycle and Context covariates are standard and are included by default unless this argument is provided.
|
||||
* Note that the ReadGroup and QualityScore covariates are required and cannot be excluded.
|
||||
*/
|
||||
@Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
|
||||
public boolean DO_NOT_USE_STANDARD_COVARIATES = false;
|
||||
|
||||
/**
|
||||
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||
public boolean RUN_WITHOUT_DBSNP = false;
|
||||
|
||||
/**
|
||||
* BaseRecalibrator accepts a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
*/
|
||||
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
|
||||
/**
|
||||
* BaseRecalibrator accepts a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
|
||||
* no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in
|
||||
* their color space tag can not be recalibrated.
|
||||
*/
|
||||
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
|
||||
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||||
*/
|
||||
@Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false)
|
||||
public int MISMATCHES_CONTEXT_SIZE = 2;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||||
*/
|
||||
@Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false)
|
||||
public int INDELS_CONTEXT_SIZE = 3;
|
||||
|
||||
/**
|
||||
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
|
||||
* This argument is ignored if the Cycle covariate is not used.
|
||||
*/
|
||||
@Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false)
|
||||
public int MAXIMUM_CYCLE_VALUE = 500;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is off]
|
||||
*/
|
||||
@Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false)
|
||||
public byte MISMATCHES_DEFAULT_QUALITY = -1;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. [default is on]
|
||||
*/
|
||||
@Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false)
|
||||
public byte INSERTIONS_DEFAULT_QUALITY = 45;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is on]
|
||||
*/
|
||||
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
|
||||
public byte DELETIONS_DEFAULT_QUALITY = 45;
|
||||
|
||||
/**
|
||||
* Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter defines the quality below which (inclusive) a tail is considered low quality
|
||||
*/
|
||||
@Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false)
|
||||
public byte LOW_QUAL_TAIL = 2;
|
||||
|
||||
/**
|
||||
* BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base qualities, this is done by the engine with the -qq or -BQSR options.
|
||||
* This parameter tells BQSR the number of levels of quantization to use to build the quantization table.
|
||||
*/
|
||||
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
|
||||
public int QUANTIZING_LEVELS = 16;
|
||||
|
||||
/**
|
||||
* The tag name for the binary tag covariate (if using it)
|
||||
*/
|
||||
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
|
||||
public String BINARY_TAG_NAME = null;
|
||||
|
||||
/*
|
||||
* whether GATK report tables should have rows in sorted order, starting from leftmost column
|
||||
*/
|
||||
@Argument(fullName = "sort_by_all_columns", shortName = "sortAllCols", doc = "Sort the rows in the tables of reports", required = false)
|
||||
public Boolean SORT_BY_ALL_COLUMNS = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String DEFAULT_PLATFORM = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "force_readgroup", shortName = "fRG", required = false, doc = "If provided, the read group of EVERY read will be forced to be the provided String.")
|
||||
public String FORCE_READGROUP = null;
|
||||
|
||||
@Hidden
|
||||
@Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", required = false, doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only", defaultToStdout = false)
|
||||
public PrintStream RECAL_TABLE_UPDATE_LOG = null;
|
||||
|
||||
/**
|
||||
* The repeat covariate will use a context of this size to calculate it's covariate value for base insertions and deletions
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "max_str_unit_length", shortName = "maxstr", doc = "Max size of the k-mer context to be used for repeat covariates", required = false)
|
||||
public int MAX_STR_UNIT_LENGTH = 8;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "max_repeat_length", shortName = "maxrep", doc = "Max number of repetitions to be used for repeat covariates", required = false)
|
||||
public int MAX_REPEAT_LENGTH = 20;
|
||||
|
||||
|
||||
public File existingRecalibrationReport = null;
|
||||
|
||||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable;
|
||||
if(SORT_BY_ALL_COLUMNS) {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
}
|
||||
argumentsTable.addColumn("Argument");
|
||||
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
argumentsTable.addRowID("covariate", true);
|
||||
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
|
||||
argumentsTable.addRowID("no_standard_covs", true);
|
||||
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||
argumentsTable.addRowID("run_without_dbsnp", true);
|
||||
argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||
argumentsTable.addRowID("solid_recal_mode", true);
|
||||
argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||
argumentsTable.addRowID("solid_nocall_strategy", true);
|
||||
argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.addRowID("mismatches_context_size", true);
|
||||
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("indels_context_size", true);
|
||||
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("mismatches_default_quality", true);
|
||||
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("deletions_default_quality", true);
|
||||
argumentsTable.set("deletions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("insertions_default_quality", true);
|
||||
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("maximum_cycle_value", true);
|
||||
argumentsTable.set("maximum_cycle_value", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MAXIMUM_CYCLE_VALUE);
|
||||
argumentsTable.addRowID("low_quality_tail", true);
|
||||
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.addRowID("default_platform", true);
|
||||
argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||
argumentsTable.addRowID("force_platform", true);
|
||||
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.addRowID("quantizing_levels", true);
|
||||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map with the arguments that differ between this an
|
||||
* another {@link RecalibrationArgumentCollection} instance.
|
||||
* <p/>
|
||||
* The key is the name of that argument in the report file. The value is a message
|
||||
* that explains the difference to the end user.
|
||||
* <p/>
|
||||
* Thus, a empty map indicates that there is no differences between both argument collection that
|
||||
* is relevant to report comparison.
|
||||
* <p/>
|
||||
* This method should not throw any exception.
|
||||
*
|
||||
* @param other the argument-collection to compare against.
|
||||
* @param thisRole the name used to refer to this RAC report that makes sense to the end user.
|
||||
* @param otherRole the name used to refer to the other RAC report that makes sense to the end user.
|
||||
*
|
||||
* @return never <code>null</code>, but a zero-size collection if there are no differences.
|
||||
*/
|
||||
@Requires("other != null && thisRole != null && otherRole != null && !thisRole.equalsIgnoreCase(otherRole)")
|
||||
public Map<String,? extends CharSequence> compareReportArguments(final RecalibrationArgumentCollection other,final String thisRole, final String otherRole) {
|
||||
final Map<String,String> result = new LinkedHashMap<>(15);
|
||||
compareRequestedCovariates(result, other, thisRole, otherRole);
|
||||
compareSimpleReportArgument(result,"no_standard_covs", DO_NOT_USE_STANDARD_COVARIATES, other.DO_NOT_USE_STANDARD_COVARIATES, thisRole, otherRole);
|
||||
compareSimpleReportArgument(result,"run_without_dbsnp",RUN_WITHOUT_DBSNP,other.RUN_WITHOUT_DBSNP,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"solid_recal_mode", SOLID_RECAL_MODE, other.SOLID_RECAL_MODE,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"solid_nocall_strategy", SOLID_NOCALL_STRATEGY, other.SOLID_NOCALL_STRATEGY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"mismatches_context_size", MISMATCHES_CONTEXT_SIZE,other.MISMATCHES_CONTEXT_SIZE,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"mismatches_default_quality", MISMATCHES_DEFAULT_QUALITY, other.MISMATCHES_DEFAULT_QUALITY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"deletions_default_quality", DELETIONS_DEFAULT_QUALITY, other.DELETIONS_DEFAULT_QUALITY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"insertions_default_quality", INSERTIONS_DEFAULT_QUALITY, other.INSERTIONS_DEFAULT_QUALITY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"maximum_cycle_value", MAXIMUM_CYCLE_VALUE, other.MAXIMUM_CYCLE_VALUE,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"low_quality_tail", LOW_QUAL_TAIL, other.LOW_QUAL_TAIL,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"default_platform", DEFAULT_PLATFORM, other.DEFAULT_PLATFORM,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"force_platform", FORCE_PLATFORM, other.FORCE_PLATFORM,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"quantizing_levels", QUANTIZING_LEVELS, other.QUANTIZING_LEVELS,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"binary_tag_name", BINARY_TAG_NAME, other.BINARY_TAG_NAME,thisRole,otherRole);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compares the covariate report lists.
|
||||
*
|
||||
* @param diffs map where to annotate the difference.
|
||||
* @param other the argument collection to compare against.
|
||||
* @param thisRole the name for this argument collection that makes sense to the user.
|
||||
* @param otherRole the name for the other argument collection that makes sense to the end user.
|
||||
*
|
||||
* @return <code>true</code> if a difference was found.
|
||||
*/
|
||||
@Requires("diffs != null && other != null && thisRole != null && otherRole != null")
|
||||
private boolean compareRequestedCovariates(final Map<String,String> diffs,
|
||||
final RecalibrationArgumentCollection other, final String thisRole, final String otherRole) {
|
||||
|
||||
final Set<String> beforeNames = new HashSet<>(this.COVARIATES.length);
|
||||
final Set<String> afterNames = new HashSet<>(other.COVARIATES.length);
|
||||
Utils.addAll(beforeNames, this.COVARIATES);
|
||||
Utils.addAll(afterNames,other.COVARIATES);
|
||||
final Set<String> intersect = new HashSet<>(Math.min(beforeNames.size(),afterNames.size()));
|
||||
intersect.addAll(beforeNames);
|
||||
intersect.retainAll(afterNames);
|
||||
|
||||
String diffMessage = null;
|
||||
if (intersect.size() == 0) { // In practice this is not possible due to required covariates but...
|
||||
diffMessage = String.format("There are no common covariates between '%s' and '%s'"
|
||||
+ " recalibrator reports. Covariates in '%s': {%s}. Covariates in '%s': {%s}.",thisRole,otherRole,
|
||||
thisRole,Utils.join(", ",this.COVARIATES),
|
||||
otherRole,Utils.join(",",other.COVARIATES));
|
||||
} else if (intersect.size() != beforeNames.size() || intersect.size() != afterNames.size()) {
|
||||
beforeNames.removeAll(intersect);
|
||||
afterNames.removeAll(intersect);
|
||||
diffMessage = String.format("There are differences in the set of covariates requested in the"
|
||||
+ " '%s' and '%s' recalibrator reports. "
|
||||
+ " Exclusive to '%s': {%s}. Exclusive to '%s': {%s}.",thisRole,otherRole,
|
||||
thisRole,Utils.join(", ",beforeNames),
|
||||
otherRole,Utils.join(", ",afterNames));
|
||||
}
|
||||
if (diffMessage != null) {
|
||||
diffs.put("covariate",diffMessage);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Annotates a map with any difference encountered in a simple value report argument that differs between this an
|
||||
* another {@link RecalibrationArgumentCollection} instance.
|
||||
* <p/>
|
||||
* The key of the new entry would be the name of that argument in the report file. The value is a message
|
||||
* that explains the difference to the end user.
|
||||
* <p/>
|
||||
*
|
||||
* <p/>
|
||||
* This method should not return any exception.
|
||||
*
|
||||
* @param diffs where to annotate the differences.
|
||||
* @param name the name of the report argument to compare.
|
||||
* @param thisValue this argument collection value for that argument.
|
||||
* @param otherValue the other collection value for that argument.
|
||||
* @param thisRole the name used to refer to this RAC report that makes sense to the end user.
|
||||
* @param otherRole the name used to refer to the other RAC report that makes sense to the end user.
|
||||
*
|
||||
* @type T the argument Object value type.
|
||||
*
|
||||
* @return <code>true</code> if a difference has been spotted, thus <code>diff</code> has been modified.
|
||||
*/
|
||||
private <T> boolean compareSimpleReportArgument(final Map<String,String> diffs,
|
||||
final String name, final T thisValue, final T otherValue, final String thisRole, final String otherRole) {
|
||||
if (thisValue == null && otherValue == null) {
|
||||
return false;
|
||||
} else if (thisValue != null && thisValue.equals(otherValue)) {
|
||||
return false;
|
||||
} else {
|
||||
diffs.put(name,
|
||||
String.format("differences between '%s' {%s} and '%s' {%s}.",
|
||||
thisRole,thisValue == null ? "" : thisValue,
|
||||
otherRole,otherValue == null ? "" : otherValue));
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a shallow copy of this argument collection.
|
||||
*
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
@Override
|
||||
public RecalibrationArgumentCollection clone() {
|
||||
try {
|
||||
return (RecalibrationArgumentCollection) super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new GATKException("Unreachable code clone not supported thrown when the class "
|
||||
+ this.getClass().getName() + " is cloneable ",e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,425 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.recalibration.EventType;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* This class has all the static functionality for reading a recalibration report file into memory.
|
||||
*
|
||||
* @author carneiro
|
||||
* @since 3/26/12
|
||||
*/
|
||||
public class RecalibrationReport {
|
||||
private QuantizationInfo quantizationInfo; // histogram containing the counts for qual quantization (calculated after recalibration is done)
|
||||
private final RecalibrationTables recalibrationTables; // quick access reference to the tables
|
||||
private final Covariate[] requestedCovariates; // list of all covariates to be used in this calculation
|
||||
private final HashMap<String, Integer> optionalCovariateIndexes;
|
||||
|
||||
private final GATKReportTable argumentTable; // keep the argument table untouched just for output purposes
|
||||
private final RecalibrationArgumentCollection RAC; // necessary for quantizing qualities with the same parameter
|
||||
|
||||
private final int[] tempRGarray = new int[2];
|
||||
private final int[] tempQUALarray = new int[3];
|
||||
private final int[] tempCOVarray = new int[4];
|
||||
|
||||
public RecalibrationReport(final File recalFile) {
|
||||
this(recalFile, getReadGroups(recalFile));
|
||||
}
|
||||
|
||||
public RecalibrationReport(final File recalFile, final SortedSet<String> allReadGroups) {
|
||||
final GATKReport report = new GATKReport(recalFile);
|
||||
|
||||
argumentTable = report.getTable(RecalUtils.ARGUMENT_REPORT_TABLE_TITLE);
|
||||
RAC = initializeArgumentCollectionTable(argumentTable);
|
||||
|
||||
GATKReportTable quantizedTable = report.getTable(RecalUtils.QUANTIZED_REPORT_TABLE_TITLE);
|
||||
quantizationInfo = initializeQuantizationTable(quantizedTable);
|
||||
|
||||
Pair<ArrayList<Covariate>, ArrayList<Covariate>> covariates = RecalUtils.initializeCovariates(RAC); // initialize the required and optional covariates
|
||||
ArrayList<Covariate> requiredCovariates = covariates.getFirst();
|
||||
ArrayList<Covariate> optionalCovariates = covariates.getSecond();
|
||||
requestedCovariates = new Covariate[requiredCovariates.size() + optionalCovariates.size()];
|
||||
optionalCovariateIndexes = new HashMap<String, Integer>(optionalCovariates.size());
|
||||
int covariateIndex = 0;
|
||||
for (final Covariate covariate : requiredCovariates)
|
||||
requestedCovariates[covariateIndex++] = covariate;
|
||||
for (final Covariate covariate : optionalCovariates) {
|
||||
requestedCovariates[covariateIndex] = covariate;
|
||||
final String covariateName = covariate.getClass().getSimpleName().split("Covariate")[0]; // get the name of the covariate (without the "covariate" part of it) so we can match with the GATKReport
|
||||
optionalCovariateIndexes.put(covariateName, covariateIndex-2);
|
||||
covariateIndex++;
|
||||
}
|
||||
|
||||
for (Covariate cov : requestedCovariates)
|
||||
cov.initialize(RAC); // initialize any covariate member variables using the shared argument collection
|
||||
|
||||
recalibrationTables = new RecalibrationTables(requestedCovariates, allReadGroups.size());
|
||||
|
||||
initializeReadGroupCovariates(allReadGroups);
|
||||
|
||||
parseReadGroupTable(report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE), recalibrationTables.getReadGroupTable());
|
||||
|
||||
parseQualityScoreTable(report.getTable(RecalUtils.QUALITY_SCORE_REPORT_TABLE_TITLE), recalibrationTables.getQualityScoreTable());
|
||||
|
||||
parseAllCovariatesTable(report.getTable(RecalUtils.ALL_COVARIATES_REPORT_TABLE_TITLE), recalibrationTables);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the unique read groups in the recal file
|
||||
*
|
||||
* @param recalFile the recal file as a GATK Report
|
||||
* @return the unique read groups
|
||||
*/
|
||||
public static SortedSet<String> getReadGroups(final File recalFile) {
|
||||
return getReadGroups(new GATKReport(recalFile));
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the unique read groups in the table
|
||||
*
|
||||
* @param report the GATKReport containing the table with RecalUtils.READGROUP_REPORT_TABLE_TITLE
|
||||
* @return the unique read groups
|
||||
*/
|
||||
private static SortedSet<String> getReadGroups(final GATKReport report) {
|
||||
final GATKReportTable reportTable = report.getTable(RecalUtils.READGROUP_REPORT_TABLE_TITLE);
|
||||
final SortedSet<String> readGroups = new TreeSet<String>();
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ )
|
||||
readGroups.add(reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME).toString());
|
||||
return readGroups;
|
||||
}
|
||||
|
||||
/**
|
||||
* Combines two recalibration reports by adding all observations and errors
|
||||
*
|
||||
* Note: This method DOES NOT recalculate the empirical qualities and quantized qualities. You have to recalculate
|
||||
* them after combining. The reason for not calculating it is because this function is intended for combining a
|
||||
* series of recalibration reports, and it only makes sense to calculate the empirical qualities and quantized
|
||||
* qualities after all the recalibration reports have been combined. Having the user recalculate when appropriate,
|
||||
* makes this method faster
|
||||
*
|
||||
* Note2: The empirical quality reported, however, is recalculated given its simplicity.
|
||||
*
|
||||
* @param other the recalibration report to combine with this one
|
||||
*/
|
||||
public void combine(final RecalibrationReport other) {
|
||||
for ( int tableIndex = 0; tableIndex < recalibrationTables.numTables(); tableIndex++ ) {
|
||||
final NestedIntegerArray<RecalDatum> myTable = recalibrationTables.getTable(tableIndex);
|
||||
final NestedIntegerArray<RecalDatum> otherTable = other.recalibrationTables.getTable(tableIndex);
|
||||
RecalUtils.combineTables(myTable, otherTable);
|
||||
}
|
||||
}
|
||||
|
||||
public QuantizationInfo getQuantizationInfo() {
|
||||
return quantizationInfo;
|
||||
}
|
||||
|
||||
public RecalibrationTables getRecalibrationTables() {
|
||||
return recalibrationTables;
|
||||
}
|
||||
|
||||
public Covariate[] getRequestedCovariates() {
|
||||
return requestedCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize read group keys using the shared list of all the read groups.
|
||||
*
|
||||
* By using the same sorted set of read groups across all recalibration reports, even if
|
||||
* one report is missing a read group, all the reports use the same read group keys.
|
||||
*
|
||||
* @param allReadGroups The list of all possible read groups
|
||||
*/
|
||||
private void initializeReadGroupCovariates(final SortedSet<String> allReadGroups) {
|
||||
for (String readGroup: allReadGroups) {
|
||||
requestedCovariates[0].keyFromValue(readGroup);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the list of keys for the Covariates table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @param recalibrationTables the recalibration tables
|
||||
\ */
|
||||
private void parseAllCovariatesTable(final GATKReportTable reportTable, final RecalibrationTables recalibrationTables) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempCOVarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempCOVarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
|
||||
final String covName = (String)reportTable.get(i, RecalUtils.COVARIATE_NAME_COLUMN_NAME);
|
||||
final int covIndex = optionalCovariateIndexes.get(covName);
|
||||
final Object covValue = reportTable.get(i, RecalUtils.COVARIATE_VALUE_COLUMN_NAME);
|
||||
tempCOVarray[2] = requestedCovariates[RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex].keyFromValue(covValue);
|
||||
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempCOVarray[3] = event.ordinal();
|
||||
|
||||
recalibrationTables.getTable(RecalibrationTables.TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + covIndex).put(getRecalDatum(reportTable, i, false), tempCOVarray);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Compiles the list of keys for the QualityScore table and uses the shared parsing utility to produce the actual table
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @param qualTable the map representing this table
|
||||
*/
|
||||
private void parseQualityScoreTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> qualTable) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempQUALarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final Object qual = reportTable.get(i, RecalUtils.QUALITY_SCORE_COLUMN_NAME);
|
||||
tempQUALarray[1] = requestedCovariates[1].keyFromValue(qual);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempQUALarray[2] = event.ordinal();
|
||||
|
||||
qualTable.put(getRecalDatum(reportTable, i, false), tempQUALarray);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compiles the list of keys for the ReadGroup table and uses the shared parsing utility to produce the actual table
|
||||
*
|
||||
* @param reportTable the GATKReport table containing data for this table
|
||||
* @param rgTable the map representing this table
|
||||
*/
|
||||
private void parseReadGroupTable(final GATKReportTable reportTable, final NestedIntegerArray<RecalDatum> rgTable) {
|
||||
for ( int i = 0; i < reportTable.getNumRows(); i++ ) {
|
||||
final Object rg = reportTable.get(i, RecalUtils.READGROUP_COLUMN_NAME);
|
||||
tempRGarray[0] = requestedCovariates[0].keyFromValue(rg);
|
||||
final EventType event = EventType.eventFrom((String)reportTable.get(i, RecalUtils.EVENT_TYPE_COLUMN_NAME));
|
||||
tempRGarray[1] = event.ordinal();
|
||||
|
||||
rgTable.put(getRecalDatum(reportTable, i, true), tempRGarray);
|
||||
}
|
||||
}
|
||||
|
||||
private double asDouble(final Object o) {
|
||||
if ( o instanceof Double )
|
||||
return (Double)o;
|
||||
else if ( o instanceof Integer )
|
||||
return (Integer)o;
|
||||
else if ( o instanceof Long )
|
||||
return (Long)o;
|
||||
else
|
||||
throw new ReviewedGATKException("Object " + o + " is expected to be either a double, long or integer but it's not either: " + o.getClass());
|
||||
}
|
||||
|
||||
private long asLong(final Object o) {
|
||||
if ( o instanceof Long )
|
||||
return (Long)o;
|
||||
else if ( o instanceof Integer )
|
||||
return ((Integer)o).longValue();
|
||||
else if ( o instanceof Double )
|
||||
return ((Double)o).longValue();
|
||||
else
|
||||
throw new ReviewedGATKException("Object " + o + " is expected to be a long but it's not: " + o.getClass());
|
||||
}
|
||||
|
||||
private RecalDatum getRecalDatum(final GATKReportTable reportTable, final int row, final boolean hasEstimatedQReportedColumn) {
|
||||
final long nObservations = asLong(reportTable.get(row, RecalUtils.NUMBER_OBSERVATIONS_COLUMN_NAME));
|
||||
final double nErrors = asDouble(reportTable.get(row, RecalUtils.NUMBER_ERRORS_COLUMN_NAME));
|
||||
//final double empiricalQuality = asDouble(reportTable.get(row, RecalUtils.EMPIRICAL_QUALITY_COLUMN_NAME));
|
||||
|
||||
// the estimatedQreported column only exists in the ReadGroup table
|
||||
final double estimatedQReported = hasEstimatedQReportedColumn ?
|
||||
(Double) reportTable.get(row, RecalUtils.ESTIMATED_Q_REPORTED_COLUMN_NAME) : // we get it if we are in the read group table
|
||||
Byte.parseByte((String) reportTable.get(row, RecalUtils.QUALITY_SCORE_COLUMN_NAME)); // or we use the reported quality if we are in any other table
|
||||
|
||||
final RecalDatum datum = new RecalDatum(nObservations, nErrors, (byte)1);
|
||||
datum.setEstimatedQReported(estimatedQReported);
|
||||
//datum.setEmpiricalQuality(empiricalQuality); // don't set the value here because we will want to recompute with a different conditional Q score prior value
|
||||
return datum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the quantization table from the GATK Report and turns it into a map of original => quantized quality scores
|
||||
*
|
||||
* @param table the GATKReportTable containing the quantization mappings
|
||||
* @return an ArrayList with the quantization mappings from 0 to MAX_SAM_QUAL_SCORE
|
||||
*/
|
||||
private QuantizationInfo initializeQuantizationTable(GATKReportTable table) {
|
||||
final Byte[] quals = new Byte[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
|
||||
final Long[] counts = new Long[QualityUtils.MAX_SAM_QUAL_SCORE + 1];
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final byte originalQual = (byte)i;
|
||||
final Object quantizedObject = table.get(i, RecalUtils.QUANTIZED_VALUE_COLUMN_NAME);
|
||||
final Object countObject = table.get(i, RecalUtils.QUANTIZED_COUNT_COLUMN_NAME);
|
||||
final byte quantizedQual = Byte.parseByte(quantizedObject.toString());
|
||||
final long quantizedCount = Long.parseLong(countObject.toString());
|
||||
quals[originalQual] = quantizedQual;
|
||||
counts[originalQual] = quantizedCount;
|
||||
}
|
||||
return new QuantizationInfo(Arrays.asList(quals), Arrays.asList(counts));
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the arguments table from the GATK Report and creates a RAC object with the proper initialization values
|
||||
*
|
||||
* @param table the GATKReportTable containing the arguments and its corresponding values
|
||||
* @return a RAC object properly initialized with all the objects in the table
|
||||
*/
|
||||
private RecalibrationArgumentCollection initializeArgumentCollectionTable(GATKReportTable table) {
|
||||
final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
||||
|
||||
for ( int i = 0; i < table.getNumRows(); i++ ) {
|
||||
final String argument = table.get(i, "Argument").toString();
|
||||
Object value = table.get(i, RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
if (value.equals("null"))
|
||||
value = null; // generic translation of null values that were printed out as strings | todo -- add this capability to the GATKReport
|
||||
|
||||
if (argument.equals("covariate") && value != null)
|
||||
RAC.COVARIATES = value.toString().split(",");
|
||||
|
||||
else if (argument.equals("standard_covs"))
|
||||
RAC.DO_NOT_USE_STANDARD_COVARIATES = Boolean.parseBoolean((String) value);
|
||||
|
||||
else if (argument.equals("solid_recal_mode"))
|
||||
RAC.SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.recalModeFromString((String) value);
|
||||
|
||||
else if (argument.equals("solid_nocall_strategy"))
|
||||
RAC.SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.nocallStrategyFromString((String) value);
|
||||
|
||||
else if (argument.equals("mismatches_context_size"))
|
||||
RAC.MISMATCHES_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("indels_context_size"))
|
||||
RAC.INDELS_CONTEXT_SIZE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("mismatches_default_quality"))
|
||||
RAC.MISMATCHES_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||
|
||||
else if (argument.equals("insertions_default_quality"))
|
||||
RAC.INSERTIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||
|
||||
else if (argument.equals("deletions_default_quality"))
|
||||
RAC.DELETIONS_DEFAULT_QUALITY = Byte.parseByte((String) value);
|
||||
|
||||
else if (argument.equals("maximum_cycle_value"))
|
||||
RAC.MAXIMUM_CYCLE_VALUE = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("low_quality_tail"))
|
||||
RAC.LOW_QUAL_TAIL = Byte.parseByte((String) value);
|
||||
|
||||
else if (argument.equals("default_platform"))
|
||||
RAC.DEFAULT_PLATFORM = (String) value;
|
||||
|
||||
else if (argument.equals("force_platform"))
|
||||
RAC.FORCE_PLATFORM = (String) value;
|
||||
|
||||
else if (argument.equals("quantizing_levels"))
|
||||
RAC.QUANTIZING_LEVELS = Integer.parseInt((String) value);
|
||||
|
||||
else if (argument.equals("recalibration_report"))
|
||||
RAC.existingRecalibrationReport = (value == null) ? null : new File((String) value);
|
||||
|
||||
else if (argument.equals("binary_tag_name"))
|
||||
RAC.BINARY_TAG_NAME = (value == null) ? null : (String) value;
|
||||
|
||||
else if (argument.equals("sort_by_all_columns"))
|
||||
RAC.SORT_BY_ALL_COLUMNS = Boolean.parseBoolean((String) value);
|
||||
}
|
||||
|
||||
return RAC;
|
||||
}
|
||||
|
||||
/**
|
||||
* this functionality avoids recalculating the empirical qualities, estimated reported quality
|
||||
* and quantization of the quality scores during every call of combine(). Very useful for the BQSRGatherer.
|
||||
*/
|
||||
public void calculateQuantizedQualities() {
|
||||
quantizationInfo = new QuantizationInfo(recalibrationTables, RAC.QUANTIZING_LEVELS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates the recalibration report. Report can then be written to a stream via GATKReport.print(PrintStream).
|
||||
*
|
||||
* @return newly created recalibration report
|
||||
*/
|
||||
public GATKReport createGATKReport() {
|
||||
return RecalUtils.createRecalibrationGATKReport(argumentTable, quantizationInfo, recalibrationTables, requestedCovariates, RAC.SORT_BY_ALL_COLUMNS);
|
||||
}
|
||||
|
||||
public RecalibrationArgumentCollection getRAC() {
|
||||
return RAC;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @deprecated use {@link #getRequestedCovariates()} instead.
|
||||
*/
|
||||
@Deprecated
|
||||
public Covariate[] getCovariates() {
|
||||
return requestedCovariates;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the report has no data
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
return recalibrationTables.isEmpty();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import org.broadinstitute.gatk.utils.collections.LoggingNestedIntegerArray;
|
||||
import org.broadinstitute.gatk.utils.recalibration.EventType;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.gatk.utils.collections.NestedIntegerArray;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Utility class to facilitate on-the-fly base quality score recalibration.
|
||||
*
|
||||
* User: ebanks
|
||||
* Date: 6/20/12
|
||||
*/
|
||||
|
||||
public final class RecalibrationTables {
|
||||
public enum TableType {
|
||||
READ_GROUP_TABLE,
|
||||
QUALITY_SCORE_TABLE,
|
||||
OPTIONAL_COVARIATE_TABLES_START;
|
||||
}
|
||||
|
||||
private final ArrayList<NestedIntegerArray<RecalDatum>> tables;
|
||||
private final int qualDimension;
|
||||
private final int eventDimension = EventType.values().length;
|
||||
private final int numReadGroups;
|
||||
private final PrintStream log;
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates) {
|
||||
this(covariates, covariates[TableType.READ_GROUP_TABLE.ordinal()].maximumKeyValue() + 1, null);
|
||||
}
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups) {
|
||||
this(covariates, numReadGroups, null);
|
||||
}
|
||||
|
||||
public RecalibrationTables(final Covariate[] covariates, final int numReadGroups, final PrintStream log) {
|
||||
tables = new ArrayList<NestedIntegerArray<RecalDatum>>(covariates.length);
|
||||
for ( int i = 0; i < covariates.length; i++ )
|
||||
tables.add(i, null); // initialize so we can set below
|
||||
|
||||
qualDimension = covariates[TableType.QUALITY_SCORE_TABLE.ordinal()].maximumKeyValue() + 1;
|
||||
this.numReadGroups = numReadGroups;
|
||||
this.log = log;
|
||||
|
||||
tables.set(TableType.READ_GROUP_TABLE.ordinal(),
|
||||
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, eventDimension) :
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, "READ_GROUP_TABLE", numReadGroups, eventDimension));
|
||||
|
||||
tables.set(TableType.QUALITY_SCORE_TABLE.ordinal(), makeQualityScoreTable());
|
||||
|
||||
for (int i = TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal(); i < covariates.length; i++)
|
||||
tables.set(i,
|
||||
log == null ? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension) :
|
||||
new LoggingNestedIntegerArray<RecalDatum>(log, String.format("OPTIONAL_COVARIATE_TABLE_%d", i - TableType.OPTIONAL_COVARIATE_TABLES_START.ordinal() + 1),
|
||||
numReadGroups, qualDimension, covariates[i].maximumKeyValue()+1, eventDimension));
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getReadGroupTable() {
|
||||
return getTable(TableType.READ_GROUP_TABLE.ordinal());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getQualityScoreTable() {
|
||||
return getTable(TableType.QUALITY_SCORE_TABLE.ordinal());
|
||||
}
|
||||
|
||||
@Ensures("result != null")
|
||||
public NestedIntegerArray<RecalDatum> getTable(final int index) {
|
||||
return tables.get(index);
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public int numTables() {
|
||||
return tables.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if all the tables contain no RecalDatums
|
||||
*/
|
||||
public boolean isEmpty() {
|
||||
for( final NestedIntegerArray<RecalDatum> table : tables ) {
|
||||
if( !table.getAllValues().isEmpty() ) { return false; }
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate a new quality score table, based on requested parameters
|
||||
* in this set of tables, without any data in it. The return result
|
||||
* of this table is suitable for acting as a thread-local cache
|
||||
* for quality score values
|
||||
* @return a newly allocated, empty read group x quality score table
|
||||
*/
|
||||
public NestedIntegerArray<RecalDatum> makeQualityScoreTable() {
|
||||
return log == null
|
||||
? new NestedIntegerArray<RecalDatum>(numReadGroups, qualDimension, eventDimension)
|
||||
: new LoggingNestedIntegerArray<RecalDatum>(log, "QUALITY_SCORE_TABLE", numReadGroups, qualDimension, eventDimension);
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge all of the tables from toMerge into into this set of tables
|
||||
*/
|
||||
public void combine(final RecalibrationTables toMerge) {
|
||||
if ( numTables() != toMerge.numTables() )
|
||||
throw new IllegalArgumentException("Attempting to merge RecalibrationTables with different sizes");
|
||||
|
||||
for ( int i = 0; i < numTables(); i++ ) {
|
||||
final NestedIntegerArray<RecalDatum> myTable = this.getTable(i);
|
||||
final NestedIntegerArray<RecalDatum> otherTable = toMerge.getTable(i);
|
||||
RecalUtils.combineTables(myTable, otherTable);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,304 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.clipping.ClippingRepresentation;
|
||||
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 9/26/11
|
||||
*/
|
||||
|
||||
public class ContextCovariate implements StandardCovariate {
|
||||
private final static Logger logger = Logger.getLogger(ContextCovariate.class);
|
||||
|
||||
|
||||
|
||||
private int mismatchesContextSize;
|
||||
private int indelsContextSize;
|
||||
|
||||
private int mismatchesKeyMask;
|
||||
private int indelsKeyMask;
|
||||
|
||||
private static final int LENGTH_BITS = 4;
|
||||
private static final int LENGTH_MASK = 15;
|
||||
|
||||
// the maximum context size (number of bases) permitted; we need to keep the leftmost base free so that values are
|
||||
// not negative and we reserve 4 more bits to represent the length of the context; it takes 2 bits to encode one base.
|
||||
static final private int MAX_DNA_CONTEXT = 13;
|
||||
private byte LOW_QUAL_TAIL;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
mismatchesContextSize = RAC.MISMATCHES_CONTEXT_SIZE;
|
||||
indelsContextSize = RAC.INDELS_CONTEXT_SIZE;
|
||||
|
||||
logger.info("\t\tContext sizes: base substitution model " + mismatchesContextSize + ", indel substitution model " + indelsContextSize);
|
||||
|
||||
if (mismatchesContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("mismatches_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, mismatchesContextSize));
|
||||
if (indelsContextSize > MAX_DNA_CONTEXT)
|
||||
throw new UserException.BadArgumentValue("indels_context_size", String.format("context size cannot be bigger than %d, but was %d", MAX_DNA_CONTEXT, indelsContextSize));
|
||||
|
||||
LOW_QUAL_TAIL = RAC.LOW_QUAL_TAIL;
|
||||
|
||||
if (mismatchesContextSize <= 0 || indelsContextSize <= 0)
|
||||
throw new UserException(String.format("Context size must be positive, if you don't want to use the context covariate, just turn it off instead. Mismatches: %d Indels: %d", mismatchesContextSize, indelsContextSize));
|
||||
|
||||
mismatchesKeyMask = createMask(mismatchesContextSize);
|
||||
indelsKeyMask = createMask(indelsContextSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
|
||||
// store the original bases and then write Ns over low quality ones
|
||||
final byte[] originalBases = read.getReadBases().clone();
|
||||
// Write N's over the low quality tail of the reads to avoid adding them into the context
|
||||
final GATKSAMRecord clippedRead = ReadClipper.clipLowQualEnds(read, LOW_QUAL_TAIL, ClippingRepresentation.WRITE_NS);
|
||||
|
||||
final boolean negativeStrand = clippedRead.getReadNegativeStrandFlag();
|
||||
byte[] bases = clippedRead.getReadBases();
|
||||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
final ArrayList<Integer> mismatchKeys = contextWith(bases, mismatchesContextSize, mismatchesKeyMask);
|
||||
final ArrayList<Integer> indelKeys = contextWith(bases, indelsContextSize, indelsKeyMask);
|
||||
|
||||
final int readLength = bases.length;
|
||||
|
||||
// this is necessary to ensure that we don't keep historical data in the ReadCovariates values
|
||||
// since the context covariate may not span the entire set of values in read covariates
|
||||
// due to the clipping of the low quality bases
|
||||
if ( readLength != originalBases.length ) {
|
||||
// don't both zeroing out if we are going to overwrite the whole array
|
||||
for ( int i = 0; i < originalBases.length; i++ )
|
||||
// this base has been clipped off, so zero out the covariate values here
|
||||
values.addCovariate(0, 0, 0, i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final int readOffset = (negativeStrand ? readLength - i - 1 : i);
|
||||
final int indelKey = indelKeys.get(i);
|
||||
values.addCovariate(mismatchKeys.get(i), indelKey, indelKey, readOffset);
|
||||
}
|
||||
|
||||
// put the original bases back in
|
||||
read.setReadBases(originalBases);
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final int key) {
|
||||
if (key == -1) // this can only happen in test routines because we do not propagate null keys to the csv file
|
||||
return null;
|
||||
|
||||
return contextFromKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyFromContext((String) value);
|
||||
}
|
||||
|
||||
private static int createMask(final int contextSize) {
|
||||
int mask = 0;
|
||||
// create 2*contextSize worth of bits
|
||||
for (int i = 0; i < contextSize; i++)
|
||||
mask = (mask << 2) | 3;
|
||||
// shift 4 bits to mask out the bits used to encode the length
|
||||
return mask << LENGTH_BITS;
|
||||
}
|
||||
|
||||
/**
|
||||
* calculates the context of a base independent of the covariate mode (mismatch, insertion or deletion)
|
||||
*
|
||||
* @param bases the bases in the read to build the context from
|
||||
* @param contextSize context size to use building the context
|
||||
* @param mask mask for pulling out just the context bits
|
||||
*/
|
||||
private static ArrayList<Integer> contextWith(final byte[] bases, final int contextSize, final int mask) {
|
||||
|
||||
final int readLength = bases.length;
|
||||
final ArrayList<Integer> keys = new ArrayList<Integer>(readLength);
|
||||
|
||||
// the first contextSize-1 bases will not have enough previous context
|
||||
for (int i = 1; i < contextSize && i <= readLength; i++)
|
||||
keys.add(-1);
|
||||
|
||||
if (readLength < contextSize)
|
||||
return keys;
|
||||
|
||||
final int newBaseOffset = 2 * (contextSize - 1) + LENGTH_BITS;
|
||||
|
||||
// get (and add) the key for the context starting at the first base
|
||||
int currentKey = keyFromContext(bases, 0, contextSize);
|
||||
keys.add(currentKey);
|
||||
|
||||
// if the first key was -1 then there was an N in the context; figure out how many more consecutive contexts it affects
|
||||
int currentNPenalty = 0;
|
||||
if (currentKey == -1) {
|
||||
currentKey = 0;
|
||||
currentNPenalty = contextSize - 1;
|
||||
int offset = newBaseOffset;
|
||||
while (bases[currentNPenalty] != 'N') {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentNPenalty]);
|
||||
currentKey |= (baseIndex << offset);
|
||||
offset -= 2;
|
||||
currentNPenalty--;
|
||||
}
|
||||
}
|
||||
|
||||
for (int currentIndex = contextSize; currentIndex < readLength; currentIndex++) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(bases[currentIndex]);
|
||||
if (baseIndex == -1) { // ignore non-ACGT bases
|
||||
currentNPenalty = contextSize;
|
||||
currentKey = 0; // reset the key
|
||||
} else {
|
||||
// push this base's contribution onto the key: shift everything 2 bits, mask out the non-context bits, and add the new base and the length in
|
||||
currentKey = (currentKey >> 2) & mask;
|
||||
currentKey |= (baseIndex << newBaseOffset);
|
||||
currentKey |= contextSize;
|
||||
}
|
||||
|
||||
if (currentNPenalty == 0) {
|
||||
keys.add(currentKey);
|
||||
} else {
|
||||
currentNPenalty--;
|
||||
keys.add(-1);
|
||||
}
|
||||
}
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
||||
public static int keyFromContext(final String dna) {
|
||||
return keyFromContext(dna.getBytes(), 0, dna.length());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a int representation of a given dna string.
|
||||
*
|
||||
* @param dna the dna sequence
|
||||
* @param start the start position in the byte array (inclusive)
|
||||
* @param end the end position in the array (exclusive)
|
||||
* @return the key representing the dna sequence
|
||||
*/
|
||||
private static int keyFromContext(final byte[] dna, final int start, final int end) {
|
||||
|
||||
int key = end - start;
|
||||
int bitOffset = LENGTH_BITS;
|
||||
for (int i = start; i < end; i++) {
|
||||
final int baseIndex = BaseUtils.simpleBaseToBaseIndex(dna[i]);
|
||||
if (baseIndex == -1) // ignore non-ACGT bases
|
||||
return -1;
|
||||
key |= (baseIndex << bitOffset);
|
||||
bitOffset += 2;
|
||||
}
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a key into the dna string representation.
|
||||
*
|
||||
* @param key the key representing the dna sequence
|
||||
* @return the dna sequence represented by the key
|
||||
*/
|
||||
public static String contextFromKey(final int key) {
|
||||
if (key < 0)
|
||||
throw new ReviewedGATKException("dna conversion cannot handle negative numbers. Possible overflow?");
|
||||
|
||||
final int length = key & LENGTH_MASK; // the first bits represent the length (in bp) of the context
|
||||
int mask = 48; // use the mask to pull out bases
|
||||
int offset = LENGTH_BITS;
|
||||
|
||||
StringBuilder dna = new StringBuilder();
|
||||
for (int i = 0; i < length; i++) {
|
||||
final int baseIndex = (key & mask) >> offset;
|
||||
dna.append((char)BaseUtils.baseIndexToSimpleBase(baseIndex));
|
||||
mask = mask << 2; // move the mask over to the next 2 bits
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return dna.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
// the maximum value is T (11 in binary) for each base in the context
|
||||
int length = Math.max(mismatchesContextSize, indelsContextSize); // the length of the context
|
||||
int key = length;
|
||||
int bitOffset = LENGTH_BITS;
|
||||
for (int i = 0; i <length ; i++) {
|
||||
key |= (3 << bitOffset);
|
||||
bitOffset += 2;
|
||||
}
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* The Covariate interface. A Covariate is a feature used in the recalibration that can be picked out of the read.
|
||||
* In general most error checking and adjustments to the data are done before the call to the covariates getValue methods in order to speed up the code.
|
||||
* This unfortunately muddies the code, but most of these corrections can be done per read while the covariates get called per base, resulting in a big speed up.
|
||||
*/
|
||||
|
||||
public interface Covariate {
|
||||
|
||||
/**
|
||||
* Initialize any member variables using the command-line arguments passed to the walker
|
||||
*
|
||||
* @param RAC the recalibration argument collection
|
||||
*/
|
||||
public void initialize(final RecalibrationArgumentCollection RAC);
|
||||
|
||||
/**
|
||||
* Calculates covariate values for all positions in the read.
|
||||
*
|
||||
* @param read the read to calculate the covariates on.
|
||||
* @param values the object to record the covariate values for every base in the read.
|
||||
*/
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values);
|
||||
|
||||
/**
|
||||
* Used to get the covariate's value from input (Recalibration Report) file during on-the-fly recalibration
|
||||
*
|
||||
* @param str the key in string type (read from the csv)
|
||||
* @return the key in it's correct type.
|
||||
*/
|
||||
public Object getValue(final String str);
|
||||
|
||||
/**
|
||||
* Converts the internal representation of the key to String format for file output.
|
||||
*
|
||||
* @param key the long representation of the key
|
||||
* @return a string representation of the key
|
||||
*/
|
||||
public String formatKey(final int key);
|
||||
|
||||
/**
|
||||
* Converts an Object key into a long key using only the lowest numberOfBits() bits
|
||||
*
|
||||
* Only necessary for on-the-fly recalibration when you have the object, but need to store it in memory in long format. For counting covariates
|
||||
* the getValues method already returns all values in long format.
|
||||
*
|
||||
* @param value the object corresponding to the covariate
|
||||
* @return a long representation of the object
|
||||
*/
|
||||
public int keyFromValue(final Object value);
|
||||
|
||||
/**
|
||||
* Returns the maximum value possible for any key representing this covariate
|
||||
*
|
||||
* @return the maximum value possible for any key representing this covariate
|
||||
*/
|
||||
public int maximumKeyValue();
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,286 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.NGSPlatform;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* The Cycle covariate.
|
||||
* For Solexa the cycle is simply the position in the read (counting backwards if it is a negative strand read)
|
||||
* For 454 the cycle is the TACG flow cycle, that is, each flow grabs all the TACG's in order in a single cycle
|
||||
* For example, for the read: AAACCCCGAAATTTTTACTG
|
||||
* the cycle would be 11111111222333333344
|
||||
* For SOLiD the cycle is a more complicated mixture of ligation cycle and primer round
|
||||
*/
|
||||
|
||||
public class CycleCovariate implements StandardCovariate {
|
||||
|
||||
private int MAXIMUM_CYCLE_VALUE;
|
||||
public static final int CUSHION_FOR_INDELS = 4;
|
||||
private String default_platform = null;
|
||||
|
||||
private static final EnumSet<NGSPlatform> DISCRETE_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.ILLUMINA, NGSPlatform.SOLID, NGSPlatform.PACBIO, NGSPlatform.COMPLETE_GENOMICS);
|
||||
private static final EnumSet<NGSPlatform> FLOW_CYCLE_PLATFORMS = EnumSet.of(NGSPlatform.LS454, NGSPlatform.ION_TORRENT);
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
this.MAXIMUM_CYCLE_VALUE = RAC.MAXIMUM_CYCLE_VALUE;
|
||||
|
||||
if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM))
|
||||
throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform.");
|
||||
|
||||
if (RAC.DEFAULT_PLATFORM != null)
|
||||
default_platform = RAC.DEFAULT_PLATFORM;
|
||||
}
|
||||
|
||||
// Used to pick out the covariate's value from attributes of the read
|
||||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final int readLength = read.getReadLength();
|
||||
final NGSPlatform ngsPlatform = default_platform == null ? read.getNGSPlatform() : NGSPlatform.fromReadGroupPL(default_platform);
|
||||
|
||||
// Discrete cycle platforms
|
||||
if (DISCRETE_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
||||
final int readOrderFactor = read.getReadPairedFlag() && read.getSecondOfPairFlag() ? -1 : 1;
|
||||
final int increment;
|
||||
int cycle;
|
||||
if (read.getReadNegativeStrandFlag()) {
|
||||
cycle = readLength * readOrderFactor;
|
||||
increment = -1 * readOrderFactor;
|
||||
}
|
||||
else {
|
||||
cycle = readOrderFactor;
|
||||
increment = readOrderFactor;
|
||||
}
|
||||
|
||||
final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
|
||||
for (int i = 0; i < readLength; i++) {
|
||||
final int substitutionKey = keyFromCycle(cycle);
|
||||
final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
|
||||
values.addCovariate(substitutionKey, indelKey, indelKey, i);
|
||||
cycle += increment;
|
||||
}
|
||||
}
|
||||
|
||||
// Flow cycle platforms
|
||||
else if (FLOW_CYCLE_PLATFORMS.contains(ngsPlatform)) {
|
||||
|
||||
final byte[] bases = read.getReadBases();
|
||||
|
||||
// Differentiate between first and second of pair.
|
||||
// The sequencing machine cycle keeps incrementing for the second read in a pair. So it is possible for a read group
|
||||
// to have an error affecting quality at a particular cycle on the first of pair which carries over to the second of pair.
|
||||
// Therefore the cycle covariate must differentiate between first and second of pair reads.
|
||||
// This effect can not be corrected by pulling out the first of pair and second of pair flags into a separate covariate because
|
||||
// the current sequential model would consider the effects independently instead of jointly.
|
||||
final boolean multiplyByNegative1 = read.getReadPairedFlag() && read.getSecondOfPairFlag();
|
||||
|
||||
int cycle = multiplyByNegative1 ? -1 : 1; // todo -- check if this is the right behavior for mate paired reads in flow cycle platforms.
|
||||
|
||||
// BUGBUG: Consider looking at degradation of base quality scores in homopolymer runs to detect when the cycle incremented even though the nucleotide didn't change
|
||||
// For example, AAAAAAA was probably read in two flow cycles but here we count it as one
|
||||
if (!read.getReadNegativeStrandFlag()) { // Forward direction
|
||||
int iii = 0;
|
||||
while (iii < readLength) {
|
||||
while (iii < readLength && bases[iii] == (byte) 'T') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'A') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'C') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
while (iii < readLength && bases[iii] == (byte) 'G') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
if (iii < readLength) {
|
||||
if (multiplyByNegative1)
|
||||
cycle--;
|
||||
else
|
||||
cycle++;
|
||||
}
|
||||
if (iii < readLength && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii++;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
else { // Negative direction
|
||||
int iii = readLength - 1;
|
||||
while (iii >= 0) {
|
||||
while (iii >= 0 && bases[iii] == (byte) 'T') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'A') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'C') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
while (iii >= 0 && bases[iii] == (byte) 'G') {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
if (iii >= 0) {
|
||||
if (multiplyByNegative1)
|
||||
cycle--;
|
||||
else
|
||||
cycle++;
|
||||
}
|
||||
if (iii >= 0 && !BaseUtils.isRegularBase(bases[iii])) {
|
||||
final int key = keyFromCycle(cycle);
|
||||
values.addCovariate(key, key, key, iii);
|
||||
iii--;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown platforms
|
||||
else {
|
||||
throw new UserException("The platform (" + read.getReadGroup().getPlatform()
|
||||
+ ") associated with read group " + read.getReadGroup()
|
||||
+ " is not a recognized platform. Allowable options are " + NGSPlatform.knownPlatformsString());
|
||||
}
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Object getValue(final String str) {
|
||||
return Integer.parseInt(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final int key) {
|
||||
int cycle = key >> 1; // shift so we can remove the "sign" bit
|
||||
if ( (key & 1) != 0 ) // is the last bit set?
|
||||
cycle *= -1; // then the cycle is negative
|
||||
return String.format("%d", cycle);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? keyFromCycle(Integer.parseInt((String) value)) : keyFromCycle((Integer) value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return (MAXIMUM_CYCLE_VALUE << 1) + 1;
|
||||
}
|
||||
|
||||
private int keyFromCycle(final int cycle) {
|
||||
// no negative values because values must fit into the first few bits of the long
|
||||
int result = Math.abs(cycle);
|
||||
if ( result > MAXIMUM_CYCLE_VALUE )
|
||||
throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle (" + result + ") was detected. Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
|
||||
|
||||
result = result << 1; // shift so we can add the "sign" bit
|
||||
if ( cycle < 0 )
|
||||
result++; // negative cycles get the lower-most bit set
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Output</h3>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Examples</h3>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface ExperimentalCovariate extends Covariate {}
|
||||
|
|
@ -0,0 +1,129 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 3, 2009
|
||||
*
|
||||
* The Reported Quality Score covariate.
|
||||
*/
|
||||
|
||||
public class QualityScoreCovariate implements RequiredCovariate {
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {}
|
||||
|
||||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final byte[] baseQualities = read.getBaseQualities();
|
||||
final byte[] baseInsertionQualities = read.getBaseInsertionQualities();
|
||||
final byte[] baseDeletionQualities = read.getBaseDeletionQualities();
|
||||
|
||||
for (int i = 0; i < baseQualities.length; i++) {
|
||||
values.addCovariate((int)baseQualities[i], (int)baseInsertionQualities[i], (int)baseDeletionQualities[i], i);
|
||||
}
|
||||
}
|
||||
|
||||
// Used to get the covariate's value from input csv file during on-the-fly recalibration
|
||||
@Override
|
||||
public final Object getValue(final String str) {
|
||||
return Byte.parseByte(str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String formatKey(final int key) {
|
||||
return String.format("%d", key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int keyFromValue(final Object value) {
|
||||
return (value instanceof String) ? (int)Byte.parseByte((String) value) : (int)(Byte) value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int maximumKeyValue() {
|
||||
return QualityUtils.MAX_SAM_QUAL_SCORE;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMReadGroupRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Oct 30, 2009
|
||||
*
|
||||
* The Read Group covariate.
|
||||
*/
|
||||
|
||||
public class ReadGroupCovariate implements RequiredCovariate {
|
||||
|
||||
private final HashMap<String, Integer> readGroupLookupTable = new HashMap<String, Integer>();
|
||||
private final HashMap<Integer, String> readGroupReverseLookupTable = new HashMap<Integer, String>();
|
||||
private int nextId = 0;
|
||||
private String forceReadGroup;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
forceReadGroup = RAC.FORCE_READGROUP;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
final String readGroupId = readGroupValueFromRG(read.getReadGroup());
|
||||
final int key = keyForReadGroup(readGroupId);
|
||||
|
||||
final int l = read.getReadLength();
|
||||
for (int i = 0; i < l; i++)
|
||||
values.addCovariate(key, key, key, i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String formatKey(final int key) {
|
||||
// This method is synchronized so that we don't attempt to do a get()
|
||||
// from the reverse lookup table while that table is being updated
|
||||
return readGroupReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyForReadGroup((String) value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the mapping from read group names to integer key values for all read groups in this covariate
|
||||
* @return a set of mappings from read group names -> integer key values
|
||||
*/
|
||||
public Set<Map.Entry<String, Integer>> getKeyMap() {
|
||||
return readGroupLookupTable.entrySet();
|
||||
}
|
||||
|
||||
private int keyForReadGroup(final String readGroupId) {
|
||||
// Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR),
|
||||
// synchronize only the table updates.
|
||||
|
||||
// Before entering the synchronized block, check to see if this read group is not in our tables.
|
||||
// If it's not, either we will have to insert it, OR another thread will insert it first.
|
||||
// This preliminary check avoids doing any synchronization most of the time.
|
||||
if ( ! readGroupLookupTable.containsKey(readGroupId) ) {
|
||||
|
||||
synchronized ( this ) {
|
||||
|
||||
// Now we need to make sure the key is STILL not there, since another thread may have come along
|
||||
// and inserted it while we were waiting to enter this synchronized block!
|
||||
if ( ! readGroupLookupTable.containsKey(readGroupId) ) {
|
||||
readGroupLookupTable.put(readGroupId, nextId);
|
||||
readGroupReverseLookupTable.put(nextId, readGroupId);
|
||||
nextId++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return readGroupLookupTable.get(readGroupId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int maximumKeyValue() {
|
||||
// Synchronized so that we don't query table size while the tables are being updated
|
||||
return readGroupLookupTable.size() - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* If the sample has a PU tag annotation, return that. If not, return the read group id.
|
||||
*
|
||||
* @param rg the read group record
|
||||
* @return platform unit or readgroup id
|
||||
*/
|
||||
private String readGroupValueFromRG(final GATKSAMReadGroupRecord rg) {
|
||||
if ( forceReadGroup != null )
|
||||
return forceReadGroup;
|
||||
|
||||
final String platformUnit = rg.getPlatformUnit();
|
||||
return platformUnit == null ? rg.getId() : platformUnit;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,285 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public abstract class RepeatCovariate implements ExperimentalCovariate {
|
||||
protected int MAX_REPEAT_LENGTH;
|
||||
protected int MAX_STR_UNIT_LENGTH;
|
||||
private final HashMap<String, Integer> repeatLookupTable = new HashMap<String, Integer>();
|
||||
private final HashMap<Integer, String> repeatReverseLookupTable = new HashMap<Integer, String>();
|
||||
private int nextId = 0;
|
||||
|
||||
// Initialize any member variables using the command-line arguments passed to the walkers
|
||||
@Override
|
||||
public void initialize(final RecalibrationArgumentCollection RAC) {
|
||||
MAX_STR_UNIT_LENGTH = RAC.MAX_STR_UNIT_LENGTH;
|
||||
MAX_REPEAT_LENGTH = RAC.MAX_REPEAT_LENGTH;
|
||||
}
|
||||
|
||||
public void initialize(final int MAX_STR_UNIT_LENGTH, final int MAX_REPEAT_LENGTH) {
|
||||
this.MAX_STR_UNIT_LENGTH = MAX_STR_UNIT_LENGTH;
|
||||
this.MAX_REPEAT_LENGTH = MAX_REPEAT_LENGTH;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
|
||||
// store the original bases and then write Ns over low quality ones
|
||||
final byte[] originalBases = read.getReadBases().clone();
|
||||
|
||||
final boolean negativeStrand = read.getReadNegativeStrandFlag();
|
||||
byte[] bases = read.getReadBases();
|
||||
if (negativeStrand)
|
||||
bases = BaseUtils.simpleReverseComplement(bases);
|
||||
|
||||
// don't record reads with N's
|
||||
if (!BaseUtils.isAllRegularBases(bases))
|
||||
return;
|
||||
|
||||
for (int i = 0; i < bases.length; i++) {
|
||||
final Pair<byte[], Integer> res = findTandemRepeatUnits(bases, i);
|
||||
// to merge repeat unit and repeat length to get covariate value:
|
||||
final String repeatID = getCovariateValueFromUnitAndLength(res.first, res.second);
|
||||
final int key = keyForRepeat(repeatID);
|
||||
|
||||
final int readOffset = (negativeStrand ? bases.length - i - 1 : i);
|
||||
values.addCovariate(key, key, key, readOffset);
|
||||
}
|
||||
|
||||
// put the original bases back in
|
||||
read.setReadBases(originalBases);
|
||||
|
||||
}
|
||||
|
||||
public Pair<byte[], Integer> findTandemRepeatUnits(byte[] readBases, int offset) {
|
||||
int maxBW = 0;
|
||||
byte[] bestBWRepeatUnit = new byte[]{readBases[offset]};
|
||||
for (int str = 1; str <= MAX_STR_UNIT_LENGTH; str++) {
|
||||
// fix repeat unit length
|
||||
//edge case: if candidate tandem repeat unit falls beyond edge of read, skip
|
||||
if (offset+1-str < 0)
|
||||
break;
|
||||
|
||||
// get backward repeat unit and # repeats
|
||||
byte[] backwardRepeatUnit = Arrays.copyOfRange(readBases, offset - str + 1, offset + 1);
|
||||
maxBW = GATKVariantContextUtils.findNumberOfRepetitions(backwardRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
if (maxBW > 1) {
|
||||
bestBWRepeatUnit = backwardRepeatUnit.clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
byte[] bestRepeatUnit = bestBWRepeatUnit;
|
||||
int maxRL = maxBW;
|
||||
|
||||
if (offset < readBases.length-1) {
|
||||
byte[] bestFWRepeatUnit = new byte[]{readBases[offset+1]};
|
||||
int maxFW = 0;
|
||||
for (int str = 1; str <= MAX_STR_UNIT_LENGTH; str++) {
|
||||
// fix repeat unit length
|
||||
//edge case: if candidate tandem repeat unit falls beyond edge of read, skip
|
||||
if (offset+str+1 > readBases.length)
|
||||
break;
|
||||
|
||||
// get forward repeat unit and # repeats
|
||||
byte[] forwardRepeatUnit = Arrays.copyOfRange(readBases, offset +1, offset+str+1);
|
||||
maxFW = GATKVariantContextUtils.findNumberOfRepetitions(forwardRepeatUnit, Arrays.copyOfRange(readBases, offset + 1, readBases.length), true);
|
||||
if (maxFW > 1) {
|
||||
bestFWRepeatUnit = forwardRepeatUnit.clone();
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if FW repeat unit = BW repeat unit it means we're in the middle of a tandem repeat - add FW and BW components
|
||||
if (Arrays.equals(bestFWRepeatUnit, bestBWRepeatUnit)) {
|
||||
maxRL = maxBW + maxFW;
|
||||
bestRepeatUnit = bestFWRepeatUnit; // arbitrary
|
||||
}
|
||||
else {
|
||||
// tandem repeat starting forward from current offset.
|
||||
// It could be the case that best BW unit was differnet from FW unit, but that BW still contains FW unit.
|
||||
// For example, TTCTT(C) CCC - at (C) place, best BW unit is (TTC)2, best FW unit is (C)3.
|
||||
// but correct representation at that place might be (C)4.
|
||||
// Hence, if the FW and BW units don't match, check if BW unit can still be a part of FW unit and add
|
||||
// representations to total
|
||||
maxBW = GATKVariantContextUtils.findNumberOfRepetitions(bestFWRepeatUnit, Arrays.copyOfRange(readBases, 0, offset + 1), false);
|
||||
maxRL = maxFW + maxBW;
|
||||
bestRepeatUnit = bestFWRepeatUnit;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; }
|
||||
return new Pair<byte[], Integer>(bestRepeatUnit, maxRL);
|
||||
|
||||
}
|
||||
@Override
|
||||
public final Object getValue(final String str) {
|
||||
return str;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized String formatKey(final int key) {
|
||||
// This method is synchronized so that we don't attempt to do a get()
|
||||
// from the reverse lookup table while that table is being updated
|
||||
return repeatReverseLookupTable.get(key);
|
||||
}
|
||||
|
||||
@Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"})
|
||||
@Ensures("result != null")
|
||||
protected abstract String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength);
|
||||
|
||||
|
||||
@Override
|
||||
public int keyFromValue(final Object value) {
|
||||
return keyForRepeat((String) value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the mapping from read group names to integer key values for all read groups in this covariate
|
||||
* @return a set of mappings from read group names -> integer key values
|
||||
*/
|
||||
public Set<Map.Entry<String, Integer>> getKeyMap() {
|
||||
return repeatLookupTable.entrySet();
|
||||
}
|
||||
|
||||
private int keyForRepeat(final String repeatID) {
|
||||
// Rather than synchronize this entire method (which would be VERY expensive for walkers like the BQSR),
|
||||
// synchronize only the table updates.
|
||||
|
||||
// Before entering the synchronized block, check to see if this read group is not in our tables.
|
||||
// If it's not, either we will have to insert it, OR another thread will insert it first.
|
||||
// This preliminary check avoids doing any synchronization most of the time.
|
||||
if ( ! repeatLookupTable.containsKey(repeatID) ) {
|
||||
|
||||
synchronized ( this ) {
|
||||
|
||||
// Now we need to make sure the key is STILL not there, since another thread may have come along
|
||||
// and inserted it while we were waiting to enter this synchronized block!
|
||||
if ( ! repeatLookupTable.containsKey(repeatID) ) {
|
||||
repeatLookupTable.put(repeatID, nextId);
|
||||
repeatReverseLookupTable.put(nextId, repeatID);
|
||||
nextId++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return repeatLookupTable.get(repeatID);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Splits repeat unit and num repetitions from covariate value.
|
||||
* For example, if value if "ATG4" it returns (ATG,4)
|
||||
* @param value Covariate value
|
||||
* @return Split pair
|
||||
*/
|
||||
@Requires("value != null")
|
||||
@Ensures({"result.first != null","result.second>=0"})
|
||||
public static Pair<String,Integer> getRUandNRfromCovariate(final String value) {
|
||||
|
||||
int k = 0;
|
||||
for ( k=0; k < value.length(); k++ ) {
|
||||
if (!BaseUtils.isRegularBase(value.getBytes()[k]))
|
||||
break;
|
||||
}
|
||||
Integer nr = Integer.valueOf(value.substring(k,value.length())); // will throw NumberFormatException if format illegal
|
||||
if (k == value.length() || nr <= 0)
|
||||
throw new IllegalStateException("Covariate is not of form (Repeat Unit) + Integer");
|
||||
|
||||
return new Pair<String,Integer>(value.substring(0,k), nr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets bases from tandem repeat representation (Repeat Unit),(Number of Repeats).
|
||||
* For example, (AGC),3 returns AGCAGCAGC
|
||||
* @param repeatUnit Tandem repeat unit
|
||||
* @param numRepeats Number of repeats
|
||||
* @return Expanded String
|
||||
*/
|
||||
@Requires({"numRepeats > 0","repeatUnit != null"})
|
||||
@Ensures("result != null")
|
||||
public static String getBasesFromRUandNR(final String repeatUnit, final int numRepeats) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (int i=0; i < numRepeats; i++)
|
||||
sb.append(repeatUnit);
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
// version given covariate key
|
||||
public static String getBasesFromRUandNR(final String covariateValue) {
|
||||
Pair<String,Integer> pair = getRUandNRfromCovariate(covariateValue);
|
||||
return getBasesFromRUandNR(pair.getFirst(), pair.getSecond());
|
||||
}
|
||||
|
||||
@Override
|
||||
public abstract int maximumKeyValue();
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,74 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
public class RepeatLengthCovariate extends RepeatCovariate {
|
||||
|
||||
@Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"})
|
||||
@Ensures("result != null")
|
||||
protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) {
|
||||
return String.format("%d",repeatLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int maximumKeyValue() {
|
||||
// Synchronized so that we don't query table size while the tables are being updated
|
||||
//return repeatLookupTable.size() - 1;
|
||||
// max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH,
|
||||
// so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values
|
||||
return (1+MAX_REPEAT_LENGTH);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
|
||||
|
||||
public class RepeatUnitAndLengthCovariate extends RepeatCovariate {
|
||||
|
||||
@Requires({"repeatLength>=0", "repeatFromUnitAndLength != null"})
|
||||
@Ensures("result != null")
|
||||
protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) {
|
||||
return new String(repeatFromUnitAndLength) + String.format("%d",repeatLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int maximumKeyValue() {
|
||||
// Synchronized so that we don't query table size while the tables are being updated
|
||||
//return repeatLookupTable.size() - 1;
|
||||
// max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH,
|
||||
// so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values
|
||||
return (1<<(2*MAX_STR_UNIT_LENGTH)) * MAX_REPEAT_LENGTH +1;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 11/3/12
|
||||
*/
|
||||
|
||||
public class RepeatUnitCovariate extends RepeatCovariate {
|
||||
|
||||
protected String getCovariateValueFromUnitAndLength(final byte[] repeatFromUnitAndLength, final int repeatLength) {
|
||||
return new String(repeatFromUnitAndLength);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public synchronized int maximumKeyValue() {
|
||||
// Synchronized so that we don't query table size while the tables are being updated
|
||||
//return repeatLookupTable.size() - 1;
|
||||
// max possible values of covariate: for repeat unit, length is up to MAX_STR_UNIT_LENGTH,
|
||||
// so we have 4^MAX_STR_UNIT_LENGTH * MAX_REPEAT_LENGTH possible values
|
||||
return (1<<(2*MAX_STR_UNIT_LENGTH)) +1;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Output</h3>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Examples</h3>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface RequiredCovariate extends Covariate {}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.engine.recalibration.covariates;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Input</h3>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Output</h3>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h3>Examples</h3>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public interface StandardCovariate extends Covariate {}
|
||||
|
|
@ -58,9 +58,9 @@ import htsjdk.variant.vcf.VCFConstants;
|
|||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineCount;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeAlleleCounts;
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
|
|
@ -65,6 +65,7 @@ import htsjdk.variant.vcf.VCFHeaderLine;
|
|||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.ChromosomeCountConstants;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
||||
|
|
|
|||
|
|
@ -52,23 +52,15 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import cern.jet.math.Arithmetic;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.MostLikelyAllele;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
|
|
|||
|
|
@ -51,19 +51,15 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
|
|||
|
|
@ -51,15 +51,14 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
|
|
|
|||
|
|
@ -51,10 +51,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
@ -70,7 +70,6 @@ import org.broadinstitute.gatk.utils.pileup.PileupElement;
|
|||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.gatk.utils.sam.AlignmentUtils;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,13 +52,12 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import htsjdk.tribble.util.popgen.HardyWeinbergCalculation;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
|
|
|
|||
|
|
@ -51,16 +51,16 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.samples.Trio;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.MendelianViolation;
|
||||
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
|
|||
|
|
@ -51,10 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.samples.Sample;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.samples.Trio;
|
||||
import org.broadinstitute.gatk.engine.walkers.Walker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
|
|
@ -62,7 +61,7 @@ import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAn
|
|||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.gatk.utils.MendelianViolation;
|
||||
import org.broadinstitute.gatk.engine.samples.MendelianViolation;
|
||||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
|
|
|||
|
|
@ -51,10 +51,10 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
@ -189,7 +189,7 @@ public class QualByDepth extends InfoFieldAnnotation implements StandardAnnotati
|
|||
if ( QD < MAX_QD_BEFORE_FIXING ) {
|
||||
return QD;
|
||||
} else {
|
||||
return IDEAL_HIGH_QD + GenomeAnalysisEngine.getRandomGenerator().nextGaussian() * JITTER_SIGMA;
|
||||
return IDEAL_HIGH_QD + Utils.getRandomGenerator().nextGaussian() * JITTER_SIGMA;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
|
|||
|
|
@ -52,9 +52,9 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
|
|
|
|||
|
|
@ -55,12 +55,11 @@ import htsjdk.variant.variantcontext.Allele;
|
|||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.vcf.VCFFormatHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFHeaderLine;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
|
|
|
|||
|
|
@ -51,12 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import htsjdk.variant.variantcontext.Genotype;
|
||||
import htsjdk.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.StandardAnnotation;
|
||||
|
|
|
|||
|
|
@ -51,12 +51,11 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.samples.Sample;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.RodRequiringAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
|
|
|
|||
|
|
@ -51,9 +51,9 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
|
|
|
|||
|
|
@ -56,17 +56,17 @@ import org.broadinstitute.gatk.utils.commandline.Argument;
|
|||
import org.broadinstitute.gatk.utils.commandline.Input;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.RodWalker;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.utils.recalibration.RecalUtils;
|
||||
import org.broadinstitute.gatk.utils.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.gatk.utils.recalibration.BaseRecalibration;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalUtils;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationReport;
|
||||
import org.broadinstitute.gatk.engine.recalibration.BaseRecalibration;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
|
|
|
|||
|
|
@ -1,139 +0,0 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.bqsr;
|
||||
|
||||
import org.apache.commons.collections.CollectionUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.commandline.Gatherer;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.recalibration.RecalibrationReport;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* User: carneiro
|
||||
* Date: 3/29/11
|
||||
*/
|
||||
|
||||
|
||||
public class BQSRGatherer extends Gatherer {
|
||||
|
||||
private static final Logger logger = Logger.getLogger(BQSRGatherer.class);
|
||||
private static final String EMPTY_INPUT_LIST = "list of inputs files is empty or there is no usable data in any input file";
|
||||
private static final String MISSING_OUTPUT_FILE = "missing output file name";
|
||||
private static final String MISSING_READ_GROUPS = "Missing read group(s)";
|
||||
|
||||
@Override
|
||||
public void gather(final List<File> inputs, final File output) {
|
||||
final PrintStream outputFile;
|
||||
try {
|
||||
outputFile = new PrintStream(output);
|
||||
} catch(FileNotFoundException e) {
|
||||
throw new UserException.MissingArgument("output", MISSING_OUTPUT_FILE);
|
||||
}
|
||||
final GATKReport report = gatherReport(inputs);
|
||||
report.print(outputFile);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gathers the input recalibration reports into a single report.
|
||||
*
|
||||
* @param inputs Input recalibration GATK reports
|
||||
* @return gathered recalibration GATK report
|
||||
*/
|
||||
public static GATKReport gatherReport(final List<File> inputs) {
|
||||
final SortedSet<String> allReadGroups = new TreeSet<String>();
|
||||
final LinkedHashMap<File, Set<String>> inputReadGroups = new LinkedHashMap<File, Set<String>>();
|
||||
|
||||
// Get the read groups from each input report
|
||||
for (final File input : inputs) {
|
||||
final Set<String> readGroups = RecalibrationReport.getReadGroups(input);
|
||||
inputReadGroups.put(input, readGroups);
|
||||
allReadGroups.addAll(readGroups);
|
||||
}
|
||||
|
||||
// Log the read groups that are missing from specific inputs
|
||||
for (Map.Entry<File, Set<String>> entry: inputReadGroups.entrySet()) {
|
||||
final File input = entry.getKey();
|
||||
final Set<String> readGroups = entry.getValue();
|
||||
if (allReadGroups.size() != readGroups.size()) {
|
||||
// Since this is not completely unexpected, more than debug, but less than a proper warning.
|
||||
logger.info(MISSING_READ_GROUPS + ": " + input.getAbsolutePath());
|
||||
for (final Object readGroup: CollectionUtils.subtract(allReadGroups, readGroups)) {
|
||||
logger.info(" " + readGroup);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
RecalibrationReport generalReport = null;
|
||||
for (File input : inputs) {
|
||||
final RecalibrationReport inputReport = new RecalibrationReport(input, allReadGroups);
|
||||
if( inputReport.isEmpty() ) { continue; }
|
||||
|
||||
if (generalReport == null)
|
||||
generalReport = inputReport;
|
||||
else
|
||||
generalReport.combine(inputReport);
|
||||
}
|
||||
if (generalReport == null)
|
||||
throw new ReviewedGATKException(EMPTY_INPUT_LIST);
|
||||
|
||||
generalReport.calculateQuantizedQualities();
|
||||
|
||||
return generalReport.createGATKReport();
|
||||
}
|
||||
}
|
||||
|
|
@ -55,15 +55,16 @@ import htsjdk.samtools.reference.IndexedFastaSequenceFile;
|
|||
import htsjdk.samtools.CigarElement;
|
||||
import htsjdk.samtools.SAMFileHeader;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.engine.recalibration.*;
|
||||
import org.broadinstitute.gatk.engine.walkers.*;
|
||||
import org.broadinstitute.gatk.utils.commandline.Advanced;
|
||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.filters.*;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||
|
|
@ -74,7 +75,7 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.utils.recalibration.*;
|
||||
import org.broadinstitute.gatk.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ import com.google.java.contract.Ensures;
|
|||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.recalibration.EventType;
|
||||
import org.broadinstitute.gatk.utils.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,420 +0,0 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.bqsr;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.tribble.Feature;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.Utils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.GATKException;
|
||||
import org.broadinstitute.gatk.utils.recalibration.RecalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Nov 27, 2009
|
||||
*
|
||||
* A collection of the arguments that are used for BQSR. Used to be common to both CovariateCounterWalker and TableRecalibrationWalker.
|
||||
* This set of arguments will also be passed to the constructor of every Covariate when it is instantiated.
|
||||
*/
|
||||
|
||||
public class RecalibrationArgumentCollection implements Cloneable {
|
||||
|
||||
/**
|
||||
* This algorithm treats every reference mismatch as an indication of error. However, real genetic variation is expected to mismatch the reference,
|
||||
* so it is critical that a database of known polymorphic sites is given to the tool in order to skip over those sites. This tool accepts any number of RodBindings (VCF, Bed, etc.)
|
||||
* for use as this database. For users wishing to exclude an interval list of known variation simply use -XL my.interval.list to skip over processing those sites.
|
||||
* Please note however that the statistics reported by the tool will not accurately reflected those sites skipped by the -XL argument.
|
||||
*/
|
||||
@Input(fullName = "knownSites", shortName = "knownSites", doc = "A database of known polymorphic sites to skip over in the recalibration algorithm", required = false)
|
||||
public List<RodBinding<Feature>> knownSites = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate. Use '/dev/stdout' to print to standard out.
|
||||
*/
|
||||
@Gather(BQSRGatherer.class)
|
||||
@Output(doc = "The output recalibration table file to create", required = true)
|
||||
public File RECAL_TABLE_FILE = null;
|
||||
public PrintStream RECAL_TABLE;
|
||||
|
||||
/**
|
||||
* Note that the --list argument requires a fully resolved and correct command-line to work.
|
||||
*/
|
||||
@Argument(fullName = "list", shortName = "ls", doc = "List the available covariates and exit", required = false)
|
||||
public boolean LIST_ONLY = false;
|
||||
|
||||
/**
|
||||
* Note that the ReadGroup and QualityScore covariates are required and do not need to be specified.
|
||||
* Also, unless --no_standard_covs is specified, the Cycle and Context covariates are standard and are included by default.
|
||||
* Use the --list argument to see the available covariates.
|
||||
*/
|
||||
@Argument(fullName = "covariate", shortName = "cov", doc = "One or more covariates to be used in the recalibration. Can be specified multiple times", required = false)
|
||||
public String[] COVARIATES = null;
|
||||
|
||||
/*
|
||||
* The Cycle and Context covariates are standard and are included by default unless this argument is provided.
|
||||
* Note that the ReadGroup and QualityScore covariates are required and cannot be excluded.
|
||||
*/
|
||||
@Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
|
||||
public boolean DO_NOT_USE_STANDARD_COVARIATES = false;
|
||||
|
||||
/**
|
||||
* This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
|
||||
public boolean RUN_WITHOUT_DBSNP = false;
|
||||
|
||||
/**
|
||||
* BaseRecalibrator accepts a --solid_recal_mode <MODE> flag which governs how the recalibrator handles the
|
||||
* reads which have had the reference inserted because of color space inconsistencies.
|
||||
*/
|
||||
@Argument(fullName = "solid_recal_mode", shortName = "sMode", required = false, doc = "How should we recalibrate solid bases in which the reference was inserted? Options = DO_NOTHING, SET_Q_ZERO, SET_Q_ZERO_BASE_N, or REMOVE_REF_BIAS")
|
||||
public RecalUtils.SOLID_RECAL_MODE SOLID_RECAL_MODE = RecalUtils.SOLID_RECAL_MODE.SET_Q_ZERO;
|
||||
|
||||
/**
|
||||
* BaseRecalibrator accepts a --solid_nocall_strategy <MODE> flag which governs how the recalibrator handles
|
||||
* no calls in the color space tag. Unfortunately because of the reference inserted bases mentioned above, reads with no calls in
|
||||
* their color space tag can not be recalibrated.
|
||||
*/
|
||||
@Argument(fullName = "solid_nocall_strategy", shortName = "solid_nocall_strategy", doc = "Defines the behavior of the recalibrator when it encounters no calls in the color space. Options = THROW_EXCEPTION, LEAVE_READ_UNRECALIBRATED, or PURGE_READ", required = false)
|
||||
public RecalUtils.SOLID_NOCALL_STRATEGY SOLID_NOCALL_STRATEGY = RecalUtils.SOLID_NOCALL_STRATEGY.THROW_EXCEPTION;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate its covariate value for base mismatches. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||||
*/
|
||||
@Argument(fullName = "mismatches_context_size", shortName = "mcs", doc = "Size of the k-mer context to be used for base mismatches", required = false)
|
||||
public int MISMATCHES_CONTEXT_SIZE = 2;
|
||||
|
||||
/**
|
||||
* The context covariate will use a context of this size to calculate its covariate value for base insertions and deletions. Must be between 1 and 13 (inclusive). Note that higher values will increase runtime and required java heap size.
|
||||
*/
|
||||
@Argument(fullName = "indels_context_size", shortName = "ics", doc = "Size of the k-mer context to be used for base insertions and deletions", required = false)
|
||||
public int INDELS_CONTEXT_SIZE = 3;
|
||||
|
||||
/**
|
||||
* The cycle covariate will generate an error if it encounters a cycle greater than this value.
|
||||
* This argument is ignored if the Cycle covariate is not used.
|
||||
*/
|
||||
@Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "The maximum cycle value permitted for the Cycle covariate", required = false)
|
||||
public int MAXIMUM_CYCLE_VALUE = 500;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is off]
|
||||
*/
|
||||
@Argument(fullName = "mismatches_default_quality", shortName = "mdq", doc = "default quality for the base mismatches covariate", required = false)
|
||||
public byte MISMATCHES_DEFAULT_QUALITY = -1;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the insertion covariate model. This parameter is used for all reads without insertion quality scores for each base. [default is on]
|
||||
*/
|
||||
@Argument(fullName = "insertions_default_quality", shortName = "idq", doc = "default quality for the base insertions covariate", required = false)
|
||||
public byte INSERTIONS_DEFAULT_QUALITY = 45;
|
||||
|
||||
/**
|
||||
* A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off. [default is on]
|
||||
*/
|
||||
@Argument(fullName = "deletions_default_quality", shortName = "ddq", doc = "default quality for the base deletions covariate", required = false)
|
||||
public byte DELETIONS_DEFAULT_QUALITY = 45;
|
||||
|
||||
/**
|
||||
* Reads with low quality bases on either tail (beginning or end) will not be considered in the context. This parameter defines the quality below which (inclusive) a tail is considered low quality
|
||||
*/
|
||||
@Argument(fullName = "low_quality_tail", shortName = "lqt", doc = "minimum quality for the bases in the tail of the reads to be considered", required = false)
|
||||
public byte LOW_QUAL_TAIL = 2;
|
||||
|
||||
/**
|
||||
* BQSR generates a quantization table for quick quantization later by subsequent tools. BQSR does not quantize the base qualities, this is done by the engine with the -qq or -BQSR options.
|
||||
* This parameter tells BQSR the number of levels of quantization to use to build the quantization table.
|
||||
*/
|
||||
@Argument(fullName = "quantizing_levels", shortName = "ql", required = false, doc = "number of distinct quality scores in the quantized output")
|
||||
public int QUANTIZING_LEVELS = 16;
|
||||
|
||||
/**
|
||||
* The tag name for the binary tag covariate (if using it)
|
||||
*/
|
||||
@Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
|
||||
public String BINARY_TAG_NAME = null;
|
||||
|
||||
/*
|
||||
* whether GATK report tables should have rows in sorted order, starting from leftmost column
|
||||
*/
|
||||
@Argument(fullName = "sort_by_all_columns", shortName = "sortAllCols", doc = "Sort the rows in the tables of reports", required = false)
|
||||
public Boolean SORT_BY_ALL_COLUMNS = false;
|
||||
|
||||
/////////////////////////////
|
||||
// Debugging-only Arguments
|
||||
/////////////////////////////
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String DEFAULT_PLATFORM = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
|
||||
public String FORCE_PLATFORM = null;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "force_readgroup", shortName = "fRG", required = false, doc = "If provided, the read group of EVERY read will be forced to be the provided String.")
|
||||
public String FORCE_READGROUP = null;
|
||||
|
||||
@Hidden
|
||||
@Output(fullName = "recal_table_update_log", shortName = "recal_table_update_log", required = false, doc = "If provided, log all updates to the recalibration tables to the given file. For debugging/testing purposes only", defaultToStdout = false)
|
||||
public PrintStream RECAL_TABLE_UPDATE_LOG = null;
|
||||
|
||||
/**
|
||||
* The repeat covariate will use a context of this size to calculate it's covariate value for base insertions and deletions
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "max_str_unit_length", shortName = "maxstr", doc = "Max size of the k-mer context to be used for repeat covariates", required = false)
|
||||
public int MAX_STR_UNIT_LENGTH = 8;
|
||||
|
||||
@Hidden
|
||||
@Argument(fullName = "max_repeat_length", shortName = "maxrep", doc = "Max number of repetitions to be used for repeat covariates", required = false)
|
||||
public int MAX_REPEAT_LENGTH = 20;
|
||||
|
||||
|
||||
public File existingRecalibrationReport = null;
|
||||
|
||||
public GATKReportTable generateReportTable(final String covariateNames) {
|
||||
GATKReportTable argumentsTable;
|
||||
if(SORT_BY_ALL_COLUMNS) {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2, GATKReportTable.TableSortingWay.SORT_BY_COLUMN);
|
||||
} else {
|
||||
argumentsTable = new GATKReportTable("Arguments", "Recalibration argument collection values used in this run", 2);
|
||||
}
|
||||
argumentsTable.addColumn("Argument");
|
||||
argumentsTable.addColumn(RecalUtils.ARGUMENT_VALUE_COLUMN_NAME);
|
||||
argumentsTable.addRowID("covariate", true);
|
||||
argumentsTable.set("covariate", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, covariateNames);
|
||||
argumentsTable.addRowID("no_standard_covs", true);
|
||||
argumentsTable.set("no_standard_covs", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DO_NOT_USE_STANDARD_COVARIATES);
|
||||
argumentsTable.addRowID("run_without_dbsnp", true);
|
||||
argumentsTable.set("run_without_dbsnp", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, RUN_WITHOUT_DBSNP);
|
||||
argumentsTable.addRowID("solid_recal_mode", true);
|
||||
argumentsTable.set("solid_recal_mode", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_RECAL_MODE);
|
||||
argumentsTable.addRowID("solid_nocall_strategy", true);
|
||||
argumentsTable.set("solid_nocall_strategy", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, SOLID_NOCALL_STRATEGY);
|
||||
argumentsTable.addRowID("mismatches_context_size", true);
|
||||
argumentsTable.set("mismatches_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("indels_context_size", true);
|
||||
argumentsTable.set("indels_context_size", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INDELS_CONTEXT_SIZE);
|
||||
argumentsTable.addRowID("mismatches_default_quality", true);
|
||||
argumentsTable.set("mismatches_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MISMATCHES_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("deletions_default_quality", true);
|
||||
argumentsTable.set("deletions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DELETIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("insertions_default_quality", true);
|
||||
argumentsTable.set("insertions_default_quality", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, INSERTIONS_DEFAULT_QUALITY);
|
||||
argumentsTable.addRowID("maximum_cycle_value", true);
|
||||
argumentsTable.set("maximum_cycle_value", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, MAXIMUM_CYCLE_VALUE);
|
||||
argumentsTable.addRowID("low_quality_tail", true);
|
||||
argumentsTable.set("low_quality_tail", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, LOW_QUAL_TAIL);
|
||||
argumentsTable.addRowID("default_platform", true);
|
||||
argumentsTable.set("default_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, DEFAULT_PLATFORM);
|
||||
argumentsTable.addRowID("force_platform", true);
|
||||
argumentsTable.set("force_platform", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, FORCE_PLATFORM);
|
||||
argumentsTable.addRowID("quantizing_levels", true);
|
||||
argumentsTable.set("quantizing_levels", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, QUANTIZING_LEVELS);
|
||||
argumentsTable.addRowID("recalibration_report", true);
|
||||
argumentsTable.set("recalibration_report", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, existingRecalibrationReport == null ? "null" : existingRecalibrationReport.getAbsolutePath());
|
||||
argumentsTable.addRowID("binary_tag_name", true);
|
||||
argumentsTable.set("binary_tag_name", RecalUtils.ARGUMENT_VALUE_COLUMN_NAME, BINARY_TAG_NAME == null ? "null" : BINARY_TAG_NAME);
|
||||
return argumentsTable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map with the arguments that differ between this an
|
||||
* another {@link RecalibrationArgumentCollection} instance.
|
||||
* <p/>
|
||||
* The key is the name of that argument in the report file. The value is a message
|
||||
* that explains the difference to the end user.
|
||||
* <p/>
|
||||
* Thus, a empty map indicates that there is no differences between both argument collection that
|
||||
* is relevant to report comparison.
|
||||
* <p/>
|
||||
* This method should not throw any exception.
|
||||
*
|
||||
* @param other the argument-collection to compare against.
|
||||
* @param thisRole the name used to refer to this RAC report that makes sense to the end user.
|
||||
* @param otherRole the name used to refer to the other RAC report that makes sense to the end user.
|
||||
*
|
||||
* @return never <code>null</code>, but a zero-size collection if there are no differences.
|
||||
*/
|
||||
@Requires("other != null && thisRole != null && otherRole != null && !thisRole.equalsIgnoreCase(otherRole)")
|
||||
Map<String,? extends CharSequence> compareReportArguments(final RecalibrationArgumentCollection other,final String thisRole, final String otherRole) {
|
||||
final Map<String,String> result = new LinkedHashMap<>(15);
|
||||
compareRequestedCovariates(result, other, thisRole, otherRole);
|
||||
compareSimpleReportArgument(result,"no_standard_covs", DO_NOT_USE_STANDARD_COVARIATES, other.DO_NOT_USE_STANDARD_COVARIATES, thisRole, otherRole);
|
||||
compareSimpleReportArgument(result,"run_without_dbsnp",RUN_WITHOUT_DBSNP,other.RUN_WITHOUT_DBSNP,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"solid_recal_mode", SOLID_RECAL_MODE, other.SOLID_RECAL_MODE,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"solid_nocall_strategy", SOLID_NOCALL_STRATEGY, other.SOLID_NOCALL_STRATEGY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"mismatches_context_size", MISMATCHES_CONTEXT_SIZE,other.MISMATCHES_CONTEXT_SIZE,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"mismatches_default_quality", MISMATCHES_DEFAULT_QUALITY, other.MISMATCHES_DEFAULT_QUALITY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"deletions_default_quality", DELETIONS_DEFAULT_QUALITY, other.DELETIONS_DEFAULT_QUALITY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"insertions_default_quality", INSERTIONS_DEFAULT_QUALITY, other.INSERTIONS_DEFAULT_QUALITY,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"maximum_cycle_value", MAXIMUM_CYCLE_VALUE, other.MAXIMUM_CYCLE_VALUE,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"low_quality_tail", LOW_QUAL_TAIL, other.LOW_QUAL_TAIL,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"default_platform", DEFAULT_PLATFORM, other.DEFAULT_PLATFORM,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"force_platform", FORCE_PLATFORM, other.FORCE_PLATFORM,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"quantizing_levels", QUANTIZING_LEVELS, other.QUANTIZING_LEVELS,thisRole,otherRole);
|
||||
compareSimpleReportArgument(result,"binary_tag_name", BINARY_TAG_NAME, other.BINARY_TAG_NAME,thisRole,otherRole);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compares the covariate report lists.
|
||||
*
|
||||
* @param diffs map where to annotate the difference.
|
||||
* @param other the argument collection to compare against.
|
||||
* @param thisRole the name for this argument collection that makes sense to the user.
|
||||
* @param otherRole the name for the other argument collection that makes sense to the end user.
|
||||
*
|
||||
* @return <code>true</code> if a difference was found.
|
||||
*/
|
||||
@Requires("diffs != null && other != null && thisRole != null && otherRole != null")
|
||||
private boolean compareRequestedCovariates(final Map<String,String> diffs,
|
||||
final RecalibrationArgumentCollection other, final String thisRole, final String otherRole) {
|
||||
|
||||
final Set<String> beforeNames = new HashSet<>(this.COVARIATES.length);
|
||||
final Set<String> afterNames = new HashSet<>(other.COVARIATES.length);
|
||||
Utils.addAll(beforeNames, this.COVARIATES);
|
||||
Utils.addAll(afterNames,other.COVARIATES);
|
||||
final Set<String> intersect = new HashSet<>(Math.min(beforeNames.size(),afterNames.size()));
|
||||
intersect.addAll(beforeNames);
|
||||
intersect.retainAll(afterNames);
|
||||
|
||||
String diffMessage = null;
|
||||
if (intersect.size() == 0) { // In practice this is not possible due to required covariates but...
|
||||
diffMessage = String.format("There are no common covariates between '%s' and '%s'"
|
||||
+ " recalibrator reports. Covariates in '%s': {%s}. Covariates in '%s': {%s}.",thisRole,otherRole,
|
||||
thisRole,Utils.join(", ",this.COVARIATES),
|
||||
otherRole,Utils.join(",",other.COVARIATES));
|
||||
} else if (intersect.size() != beforeNames.size() || intersect.size() != afterNames.size()) {
|
||||
beforeNames.removeAll(intersect);
|
||||
afterNames.removeAll(intersect);
|
||||
diffMessage = String.format("There are differences in the set of covariates requested in the"
|
||||
+ " '%s' and '%s' recalibrator reports. "
|
||||
+ " Exclusive to '%s': {%s}. Exclusive to '%s': {%s}.",thisRole,otherRole,
|
||||
thisRole,Utils.join(", ",beforeNames),
|
||||
otherRole,Utils.join(", ",afterNames));
|
||||
}
|
||||
if (diffMessage != null) {
|
||||
diffs.put("covariate",diffMessage);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Annotates a map with any difference encountered in a simple value report argument that differs between this an
|
||||
* another {@link RecalibrationArgumentCollection} instance.
|
||||
* <p/>
|
||||
* The key of the new entry would be the name of that argument in the report file. The value is a message
|
||||
* that explains the difference to the end user.
|
||||
* <p/>
|
||||
*
|
||||
* <p/>
|
||||
* This method should not return any exception.
|
||||
*
|
||||
* @param diffs where to annotate the differences.
|
||||
* @param name the name of the report argument to compare.
|
||||
* @param thisValue this argument collection value for that argument.
|
||||
* @param otherValue the other collection value for that argument.
|
||||
* @param thisRole the name used to refer to this RAC report that makes sense to the end user.
|
||||
* @param otherRole the name used to refer to the other RAC report that makes sense to the end user.
|
||||
*
|
||||
* @type T the argument Object value type.
|
||||
*
|
||||
* @return <code>true</code> if a difference has been spotted, thus <code>diff</code> has been modified.
|
||||
*/
|
||||
private <T> boolean compareSimpleReportArgument(final Map<String,String> diffs,
|
||||
final String name, final T thisValue, final T otherValue, final String thisRole, final String otherRole) {
|
||||
if (thisValue == null && otherValue == null) {
|
||||
return false;
|
||||
} else if (thisValue != null && thisValue.equals(otherValue)) {
|
||||
return false;
|
||||
} else {
|
||||
diffs.put(name,
|
||||
String.format("differences between '%s' {%s} and '%s' {%s}.",
|
||||
thisRole,thisValue == null ? "" : thisValue,
|
||||
otherRole,otherValue == null ? "" : otherValue));
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a shallow copy of this argument collection.
|
||||
*
|
||||
* @return never <code>null</code>.
|
||||
*/
|
||||
@Override
|
||||
public RecalibrationArgumentCollection clone() {
|
||||
try {
|
||||
return (RecalibrationArgumentCollection) super.clone();
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new GATKException("Unreachable code clone not supported thrown when the class "
|
||||
+ this.getClass().getName() + " is cloneable ",e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -52,9 +52,13 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.bqsr;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.gatk.engine.recalibration.ReadCovariates;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalDatum;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalUtils;
|
||||
import org.broadinstitute.gatk.engine.recalibration.RecalibrationTables;
|
||||
import org.broadinstitute.gatk.utils.collections.NestedIntegerArray;
|
||||
import org.broadinstitute.gatk.utils.recalibration.*;
|
||||
import org.broadinstitute.gatk.utils.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.Covariate;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
|
|||
|
|
@ -54,10 +54,10 @@ package org.broadinstitute.gatk.tools.walkers.diagnostics;
|
|||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.engine.walkers.LocusWalker;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
|
|||
|
|
@ -54,9 +54,9 @@ package org.broadinstitute.gatk.tools.walkers.diagnostics;
|
|||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.ActiveRegionTraversalParameters;
|
||||
import org.broadinstitute.gatk.engine.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.gatk.engine.walkers.PartitionBy;
|
||||
|
|
|
|||
|
|
@ -56,12 +56,11 @@ import org.broadinstitute.gatk.engine.walkers.*;
|
|||
import org.broadinstitute.gatk.utils.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.downsampling.DownsampleType;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.gatk.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
|
|
@ -70,6 +69,7 @@ import org.broadinstitute.gatk.utils.help.HelpConstants;
|
|||
import htsjdk.variant.variantcontext.*;
|
||||
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
||||
import htsjdk.variant.vcf.*;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.*;
|
||||
|
|
@ -154,7 +154,7 @@ public class DiagnoseTargets extends LocusWalker<Long, Long> {
|
|||
intervalListIterator = new PeekableIterator<GenomeLoc>(getToolkit().getIntervals().iterator());
|
||||
|
||||
// get all of the unique sample names for the VCF Header
|
||||
samples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
samples = ReadUtils.getSAMFileSamples(getToolkit().getSAMFileHeader());
|
||||
vcfWriter.writeHeader(new VCFHeader(getHeaderInfo(), samples));
|
||||
|
||||
// pre load all the statistics classes because it is costly to operate on the JVM and we only want to do it once.
|
||||
|
|
|
|||
|
|
@ -51,8 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.diagnostics.diagnosetargets;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
import org.broadinstitute.gatk.utils.pileup.ReadBackedPileup;
|
||||
|
|
|
|||
|
|
@ -56,11 +56,11 @@ import org.broadinstitute.gatk.utils.commandline.Argument;
|
|||
import org.broadinstitute.gatk.utils.commandline.Gather;
|
||||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReport;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReportGatherer;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReportGatherer;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocSortedSet;
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleListPermutation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,11 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.clipping.ReadClipper;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.gatk.utils.collections.Pair;
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.tools.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.ExactACset;
|
||||
import org.broadinstitute.gatk.tools.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
|
|
|
|||
|
|
@ -77,10 +77,10 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
|
|||
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.*;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.exceptions.ReviewedGATKException;
|
||||
|
|
|
|||
|
|
@ -52,7 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleListUtils;
|
||||
|
||||
/**
|
||||
* Encapsulates the data use to make the genotype calls.
|
||||
|
|
|
|||
|
|
@ -58,11 +58,12 @@ import htsjdk.variant.vcf.VCFConstants;
|
|||
import htsjdk.variant.vcf.VCFHeaderLineType;
|
||||
import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculator;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
||||
|
|
|
|||
|
|
@ -53,6 +53,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
|
|||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
|
||||
/**
|
||||
* Common interface for genotyping models.
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
|
||||
/**
|
||||
* {@link PloidyModel} implementation tailored to work with a homogeneous constant ploidy
|
||||
* across samples and positions.
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.indels.PairHMMIndelErrorModel;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
|
|
|
|||
|
|
@ -53,6 +53,9 @@ package org.broadinstitute.gatk.tools.walkers.genotyper;
|
|||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.GenotypeLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleListPermutation;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleListUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
|
||||
/**
|
||||
* Information about the number of chromosome per sample at a given location.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -52,10 +52,10 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,231 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorImplementation;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.utils.collections.DefaultHashMap;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Modifier;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: 8/20/12
|
||||
* A collection of arguments that are common to the various callers.
|
||||
* This is pulled out so that every caller isn't exposed to the arguments from every other caller.
|
||||
*/
|
||||
|
||||
public class StandardCallerArgumentCollection implements Cloneable {
|
||||
|
||||
@ArgumentCollection
|
||||
public GenotypeCalculationArgumentCollection genotypeArgs = new GenotypeCalculationArgumentCollection();
|
||||
|
||||
@Argument(fullName = "genotyping_mode", shortName = "gt_mode", doc = "Specifies how to determine the alternate alleles to use for genotyping", required = false)
|
||||
public GenotypingOutputMode genotypingOutputMode = GenotypingOutputMode.DISCOVERY;
|
||||
|
||||
/**
|
||||
* When the UnifiedGenotyper is put into GENOTYPE_GIVEN_ALLELES mode it will genotype the samples using only the alleles provide in this rod binding
|
||||
*/
|
||||
@Input(fullName="alleles", shortName = "alleles", doc="The set of alleles at which to genotype when --genotyping_mode is GENOTYPE_GIVEN_ALLELES", required=false)
|
||||
public RodBinding<VariantContext> alleles;
|
||||
|
||||
/**
|
||||
* If this fraction is greater is than zero, the caller will aggressively attempt to remove contamination through biased down-sampling of reads.
|
||||
* Basically, it will ignore the contamination fraction of reads for each alternate allele. So if the pileup contains N total bases, then we
|
||||
* will try to remove (N * contamination fraction) bases for each alternate allele.
|
||||
*/
|
||||
@Argument(fullName = "contamination_fraction_to_filter", shortName = "contamination", doc = "Fraction of contamination in sequencing data (for all samples) to aggressively remove", required = false)
|
||||
public double CONTAMINATION_FRACTION = DEFAULT_CONTAMINATION_FRACTION;
|
||||
public static final double DEFAULT_CONTAMINATION_FRACTION = 0.0;
|
||||
|
||||
/**
|
||||
* This argument specifies a file with two columns "sample" and "contamination" specifying the contamination level for those samples.
|
||||
* Samples that do not appear in this file will be processed with CONTAMINATION_FRACTION.
|
||||
**/
|
||||
@Advanced
|
||||
@Argument(fullName = "contamination_fraction_per_sample_file", shortName = "contaminationFile", doc = "Tab-separated File containing fraction of contamination in sequencing data (per sample) to aggressively remove. Format should be \"<SampleID><TAB><Contamination>\" (Contamination is double) per line; No header.", required = false)
|
||||
public File CONTAMINATION_FRACTION_FILE = null;
|
||||
|
||||
/**
|
||||
* Indicates whether there is some sample contamination present.
|
||||
*/
|
||||
private boolean sampleContaminationWasLoaded = false;
|
||||
|
||||
/**
|
||||
*
|
||||
* @return an _Immutable_ copy of the Sample-Contamination Map, defaulting to CONTAMINATION_FRACTION so that if the sample isn't in the map map(sample)==CONTAMINATION_FRACTION
|
||||
*/
|
||||
public Map<String,Double> getSampleContamination(){
|
||||
//make sure that the default value is set up right
|
||||
sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
|
||||
if (!Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0)
|
||||
sampleContaminationWasLoaded = true;
|
||||
return Collections.unmodifiableMap(sampleContamination);
|
||||
}
|
||||
|
||||
public void setSampleContamination(DefaultHashMap<String, Double> sampleContamination) {
|
||||
this.sampleContamination.clear();
|
||||
this.sampleContaminationWasLoaded = !Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0;
|
||||
if (!sampleContaminationWasLoaded)
|
||||
for (final Double d : sampleContamination.values())
|
||||
if (!Double.isNaN(d) && d > 0.0) {
|
||||
sampleContaminationWasLoaded = true;
|
||||
break;
|
||||
}
|
||||
this.sampleContamination.putAll(sampleContamination);
|
||||
this.sampleContamination.setDefaultValue(CONTAMINATION_FRACTION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there is some sample contamination present, false otherwise.
|
||||
* @return {@code true} iff there is some sample contamination
|
||||
*/
|
||||
public boolean isSampleContaminationPresent() {
|
||||
return (!Double.isNaN(CONTAMINATION_FRACTION) && CONTAMINATION_FRACTION > 0.0) || sampleContaminationWasLoaded;
|
||||
}
|
||||
|
||||
//Needs to be here because it uses CONTAMINATION_FRACTION
|
||||
private DefaultHashMap<String,Double> sampleContamination = new DefaultHashMap<String,Double>(CONTAMINATION_FRACTION);
|
||||
|
||||
/**
|
||||
* Controls the model used to calculate the probability that a site is variant plus the various sample genotypes in the data at a given locus.
|
||||
*/
|
||||
@Hidden
|
||||
@Argument(fullName = "p_nonref_model", shortName = "pnrm", doc = "Non-reference probability calculation model to employ", required = false)
|
||||
public AFCalculatorImplementation requestedAlleleFrequencyCalculationModel;
|
||||
|
||||
@Hidden
|
||||
@Argument(shortName = "logExactCalls", doc="x", required=false)
|
||||
public File exactCallsLog = null;
|
||||
|
||||
@Argument(fullName = "output_mode", shortName = "out_mode", doc = "Specifies which type of calls we should output", required = false)
|
||||
public OutputMode outputMode = OutputMode.EMIT_VARIANTS_ONLY;
|
||||
|
||||
/**
|
||||
* Advanced, experimental argument: if SNP likelihood model is specified, and if EMIT_ALL_SITES output mode is set, when we set this argument then we will also emit PLs at all sites.
|
||||
* This will give a measure of reference confidence and a measure of which alt alleles are more plausible (if any).
|
||||
* WARNINGS:
|
||||
* - This feature will inflate VCF file size considerably.
|
||||
* - All SNP ALT alleles will be emitted with corresponding 10 PL values.
|
||||
* - An error will be emitted if EMIT_ALL_SITES is not set, or if anything other than diploid SNP model is used
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "allSitePLs", shortName = "allSitePLs", doc = "Annotate all sites with PLs", required = false)
|
||||
public boolean annotateAllSitesWithPLs = false;
|
||||
|
||||
/**
|
||||
* Creates a Standard caller argument collection with default values.
|
||||
*/
|
||||
public StandardCallerArgumentCollection() { }
|
||||
|
||||
/**
|
||||
* "Casts" a caller argument collection into another type.
|
||||
*
|
||||
* <p>Common fields values are copied across</p>
|
||||
* @param clazz the class of the result.
|
||||
* @param <T> result argument collection class.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public <T extends StandardCallerArgumentCollection> T cloneTo(final Class<T> clazz) {
|
||||
// short cut: just use regular clone if it happens to be the same class.
|
||||
if (clazz == getClass())
|
||||
return (T) clone();
|
||||
try {
|
||||
final T result = clazz.newInstance();
|
||||
for (final Field field : getClass().getFields()) {
|
||||
// just copy common fields.
|
||||
if (!field.getDeclaringClass().isAssignableFrom(clazz))
|
||||
continue;
|
||||
final int fieldModifiers = field.getModifiers();
|
||||
if ((fieldModifiers & UNCOPYABLE_MODIFIER_MASK) != 0) continue;
|
||||
//Use the clone() method if appropriate
|
||||
if (Cloneable.class.isAssignableFrom(field.getType())) {
|
||||
Method clone = field.getType().getMethod("clone");
|
||||
field.set(result, clone.invoke(field.get(this)));
|
||||
} else
|
||||
field.set(result,field.get(this));
|
||||
}
|
||||
return result;
|
||||
} catch (final Exception ex) {
|
||||
throw new IllegalStateException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a copy of this configuration.
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
@Override
|
||||
public StandardCallerArgumentCollection clone() {
|
||||
try {
|
||||
StandardCallerArgumentCollection cloned = (StandardCallerArgumentCollection) super.clone();
|
||||
cloned.genotypeArgs = genotypeArgs.clone();
|
||||
return cloned;
|
||||
} catch (CloneNotSupportedException e) {
|
||||
throw new IllegalStateException("unreachable code");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Holds a modifiers mask that identifies those fields that cannot be copied between
|
||||
* StandardCallerArgumentCollections.
|
||||
*/
|
||||
private final int UNCOPYABLE_MODIFIER_MASK = Modifier.PRIVATE | Modifier.STATIC | Modifier.FINAL;
|
||||
}
|
||||
|
|
@ -52,7 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.genotyper;
|
||||
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.pairhmm.PairHMM;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
|
||||
|
|
|
|||
|
|
@ -58,25 +58,27 @@ import htsjdk.variant.vcf.*;
|
|||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.gatk.engine.downsampling.DownsampleType;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
|
||||
import org.broadinstitute.gatk.engine.filters.BadMateFilter;
|
||||
import org.broadinstitute.gatk.engine.filters.MappingQualityUnavailableFilter;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedSampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.FixedAFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.utils.SampleUtils;
|
||||
import org.broadinstitute.gatk.utils.baq.BAQ;
|
||||
import org.broadinstitute.gatk.utils.commandline.*;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
import org.broadinstitute.gatk.utils.sam.ReadUtils;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -267,7 +269,7 @@ public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, Unif
|
|||
sampleNameSet = Collections.singleton(GenotypeLikelihoodsCalculationModel.DUMMY_SAMPLE_NAME);
|
||||
} else {
|
||||
// get all of the unique sample names
|
||||
sampleNameSet = SampleUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
||||
sampleNameSet = ReadUtils.getSAMFileSamples(toolkit.getSAMFileHeader());
|
||||
if ( UAC.referenceSampleName != null )
|
||||
sampleNameSet.remove(UAC.referenceSampleName);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,10 +56,11 @@ import htsjdk.variant.variantcontext.GenotypesContext;
|
|||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculationResult;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.utils.BaseUtils;
|
||||
|
|
|
|||
|
|
@ -54,8 +54,8 @@ package org.broadinstitute.gatk.tools.walkers.genotyper.afcalc;
|
|||
import org.apache.log4j.ConsoleAppender;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.apache.log4j.TTCCLayout;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReport;
|
||||
import org.broadinstitute.gatk.engine.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReport;
|
||||
import org.broadinstitute.gatk.utils.report.GATKReportTable;
|
||||
import org.broadinstitute.gatk.utils.GenomeLocParser;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.SimpleTimer;
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.arguments.GenotypeCalculationArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.StandardCallerArgumentCollection;
|
||||
|
||||
/**
|
||||
* A single fixed instance AF calculator provider.
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@
|
|||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.SeqGraph;
|
||||
import org.broadinstitute.gatk.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
|
|
|
|||
|
|
@ -53,9 +53,9 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
|||
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.MultiSampleEdge;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Path;
|
||||
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.graphs.Route;
|
||||
|
|
|
|||
|
|
@ -59,16 +59,18 @@ import htsjdk.variant.vcf.*;
|
|||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.gatk.engine.arguments.DbsnpArgumentCollection;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.gatk.engine.downsampling.DownsampleType;
|
||||
import org.broadinstitute.gatk.engine.downsampling.DownsamplingUtils;
|
||||
import org.broadinstitute.gatk.engine.io.stubs.SAMFileWriterStub;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.downsampling.AlleleBiasedDownsamplingUtils;
|
||||
import org.broadinstitute.gatk.utils.downsampling.DownsampleType;
|
||||
import org.broadinstitute.gatk.utils.downsampling.DownsamplingUtils;
|
||||
import org.broadinstitute.gatk.engine.filters.BadMateFilter;
|
||||
import org.broadinstitute.gatk.engine.io.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.utils.genotyper.*;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMFileWriter;
|
||||
import org.broadinstitute.gatk.engine.iterators.ReadTransformer;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.VariantAnnotatorEngine;
|
||||
import org.broadinstitute.gatk.tools.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
|
|
@ -88,12 +90,9 @@ import org.broadinstitute.gatk.utils.exceptions.UserException;
|
|||
import org.broadinstitute.gatk.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.gatk.utils.fragments.FragmentCollection;
|
||||
import org.broadinstitute.gatk.utils.fragments.FragmentUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.gga.GenotypingGivenAllelesUtils;
|
||||
import org.broadinstitute.gatk.utils.gvcf.GVCFWriter;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.haplotype.LDMerger;
|
||||
import org.broadinstitute.gatk.utils.haplotype.MergeVariantsAcrossHaplotypes;
|
||||
import org.broadinstitute.gatk.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
||||
import org.broadinstitute.gatk.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.gatk.utils.help.HelpConstants;
|
||||
|
|
@ -1167,7 +1166,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
} catch ( final Exception e ) {
|
||||
// Capture any exception that might be thrown, and write out the assembly failure BAM if requested
|
||||
if ( captureAssemblyFailureBAM ) {
|
||||
final SAMFileWriter writer = ReadUtils.createSAMFileWriter("assemblyFailure.bam", getToolkit());
|
||||
final SAMFileWriter writer = SAMFileWriterStub.createSAMFileWriter("assemblyFailure.bam", getToolkit());
|
||||
for ( final GATKSAMRecord read : activeRegion.getReads() ) {
|
||||
writer.addAlignment(read);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@
|
|||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.gatk.engine.arguments.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.StandardCallerArgumentCollection;
|
||||
import org.broadinstitute.gatk.utils.commandline.Advanced;
|
||||
import org.broadinstitute.gatk.utils.commandline.Argument;
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,10 @@ package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.variant.variantcontext.*;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.*;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.afcalc.AFCalculatorProvider;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
|
|
@ -64,7 +67,6 @@ import org.broadinstitute.gatk.utils.collections.Pair;
|
|||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.EventMap;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.haplotype.MergeVariantsAcrossHaplotypes;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils;
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,204 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleListUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleListUtils;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Computes the likelihood based probability that haplotypes for first and second variant contexts
|
||||
* only appear in their fully linked form (x11 and x22) given a set of haplotypes where they might occur
|
||||
* and read likelihoods per sample
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 3/29/13
|
||||
* Time: 9:23 AM
|
||||
*/
|
||||
public class HaplotypeLDCalculator {
|
||||
private final List<Haplotype> haplotypes;
|
||||
private final ReadLikelihoods<Haplotype> readLikelihoods;
|
||||
private List<Map<Haplotype, Double>> haplotypeLikelihoodsPerSample = null;
|
||||
|
||||
// linear contigency table with table[0] == [0][0], table[1] = [0][1], table[2] = [1][0], table[3] = [1][1]
|
||||
private final double[] table = new double[4];
|
||||
|
||||
/**
|
||||
* For testing
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
protected HaplotypeLDCalculator() {
|
||||
haplotypes = Collections.emptyList();
|
||||
final AlleleList<Haplotype> alleleList = AlleleListUtils.emptyList();
|
||||
readLikelihoods = new ReadLikelihoods<>(SampleListUtils.emptyList(),
|
||||
alleleList, Collections.EMPTY_MAP);
|
||||
}
|
||||
|
||||
public HaplotypeLDCalculator(final List<Haplotype> haplotypes, final ReadLikelihoods<Haplotype> haplotypeReadMap) {
|
||||
this.haplotypes = haplotypes;
|
||||
this.readLikelihoods = haplotypeReadMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the cached list of summed haplotype likelihoods per sample if it
|
||||
* hasn't already been computed. This data structure is lazy created but only
|
||||
* needs to be made once when we make 1 merge decision as the data doesn't change
|
||||
* no matter how many calls to computeProbOfBeingPhased
|
||||
*/
|
||||
private void buildHaplotypeLikelihoodsPerSampleIfNecessary() {
|
||||
if ( haplotypeLikelihoodsPerSample == null ) {
|
||||
// do the lazy computation
|
||||
final Set<String> samples = new LinkedHashSet<>(readLikelihoods.samples());
|
||||
haplotypeLikelihoodsPerSample = new LinkedList<>();
|
||||
for( final String sample : samples ) {
|
||||
final Map<Haplotype, Double> map = new HashMap<>(haplotypes.size());
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
// count up the co-occurrences of the events for the R^2 calculation
|
||||
final double haplotypeLikelihood = PairHMMLikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, readLikelihoods, Collections.singletonList(h), false)[0][0];
|
||||
map.put(h, haplotypeLikelihood);
|
||||
}
|
||||
haplotypeLikelihoodsPerSample.add(map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the likelihood based probability that that haplotypes for first and second are only x11 and x22
|
||||
*
|
||||
* As opposed to the hypothesis that all four haplotypes (x11, x12, x21, and x22) exist in the population
|
||||
*
|
||||
* @param first a non-null VariantContext
|
||||
* @param second a non-null VariantContext
|
||||
* @return the probability that only x11 and x22 exist among the samples
|
||||
*/
|
||||
protected double computeProbOfBeingPhased(final VariantContext first, final VariantContext second) {
|
||||
buildHaplotypeLikelihoodsPerSampleIfNecessary();
|
||||
|
||||
Arrays.fill(table, Double.NEGATIVE_INFINITY);
|
||||
|
||||
for ( final Map<Haplotype, Double> entry : haplotypeLikelihoodsPerSample ) {
|
||||
for ( final Map.Entry<Haplotype, Double> haplotypeLikelihood : entry.entrySet() ) {
|
||||
final Haplotype h = haplotypeLikelihood.getKey();
|
||||
// count up the co-occurrences of the events for the R^2 calculation
|
||||
final VariantContext thisHapVC = h.getEventMap().get(first.getStart());
|
||||
final VariantContext nextHapVC = h.getEventMap().get(second.getStart()); // TODO -- add function to take a VC
|
||||
final int i = thisHapVC == null ? 0 : 1;
|
||||
final int j = nextHapVC == null ? 0 : 1;
|
||||
final int index = 2 * i + j;
|
||||
table[index] = MathUtils.approximateLog10SumLog10(table[index], haplotypeLikelihood.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
return pPhased(table);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute probability that two variants are in phase with each other and that no
|
||||
* compound hets exist in the population.
|
||||
*
|
||||
* Implemented as a likelihood ratio test of the hypothesis:
|
||||
*
|
||||
* x11 and x22 are the only haplotypes in the populations
|
||||
*
|
||||
* vs.
|
||||
*
|
||||
* all four haplotype combinations (x11, x12, x21, and x22) all exist in the population.
|
||||
*
|
||||
* Now, since we have to have both variants in the population, we exclude the x11 & x11 state. So the
|
||||
* p of having just x11 and x22 is P(x11 & x22) + p(x22 & x22).
|
||||
*
|
||||
* Alternatively, we might have any configuration that gives us both 1 and 2 alts, which are:
|
||||
*
|
||||
* - P(x11 & x12 & x21) -- we have hom-ref and both hets
|
||||
* - P(x22 & x12 & x21) -- we have hom-alt and both hets
|
||||
* - P(x22 & x12) -- one haplotype is 22 and the other is het 12
|
||||
* - P(x22 & x21) -- one haplotype is 22 and the other is het 21
|
||||
*
|
||||
* The probability is just p11_22 / (p11_22 + p hets)
|
||||
*
|
||||
* @param table linear contigency table with table[0] == [0][0], table[1] = [0][1], table[2] = [1][0], table[3] = [1][1]
|
||||
* doesn't have to be normalized as this function does the normalization internally
|
||||
* @return the real space probability that the data is phased
|
||||
*/
|
||||
@Requires("table.length == 4")
|
||||
protected double pPhased( double[] table ) {
|
||||
final double[] normTable = MathUtils.normalizeFromLog10(table, true);
|
||||
|
||||
final double x11 = normTable[0], x12 = normTable[1], x21 = normTable[2], x22 = normTable[3];
|
||||
|
||||
// probability that we are only x11 && x22
|
||||
final double p11_22 = MathUtils.approximateLog10SumLog10(x11 + x22, x22 + x22);
|
||||
|
||||
// probability of having any of the other pairs
|
||||
final double p11_12_21 = MathUtils.approximateLog10SumLog10(x11 + x12, x11 + x21, x12 + x21);
|
||||
final double p22_12_21 = MathUtils.approximateLog10SumLog10(x22 + x12, x22 + x21, x12 + x21);
|
||||
final double p22_12 = x22 + x12;
|
||||
final double p22_21 = x22 + x21;
|
||||
final double pOthers = MathUtils.approximateLog10SumLog10(new double[]{p11_12_21, p22_12_21, p22_12, p22_21});
|
||||
|
||||
// probability of being phases is the ratio of p11_22 / pOthers which in log space is just a substraction
|
||||
final double log10phased = p11_22 - (MathUtils.approximateLog10SumLog10(p11_22, pOthers));
|
||||
|
||||
return Math.pow(10.0, log10phased);
|
||||
}
|
||||
|
||||
protected double pPhasedTest( final double x11, final double x12, final double x21, final double x22 ) {
|
||||
return pPhased(new double[]{x11, x12, x21, x22});
|
||||
}
|
||||
}
|
||||
|
|
@ -56,9 +56,9 @@ import org.broadinstitute.gatk.utils.commandline.Input;
|
|||
import org.broadinstitute.gatk.utils.commandline.Output;
|
||||
import org.broadinstitute.gatk.utils.commandline.RodBinding;
|
||||
import org.broadinstitute.gatk.engine.CommandLineGATK;
|
||||
import org.broadinstitute.gatk.engine.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.engine.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.engine.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.utils.contexts.AlignmentContext;
|
||||
import org.broadinstitute.gatk.utils.contexts.ReferenceContext;
|
||||
import org.broadinstitute.gatk.utils.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.gatk.engine.walkers.Reference;
|
||||
import org.broadinstitute.gatk.engine.walkers.RodWalker;
|
||||
import org.broadinstitute.gatk.engine.walkers.Window;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,314 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import htsjdk.variant.variantcontext.VariantContext;
|
||||
import htsjdk.variant.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Merges VariantContexts in a series of haplotypes according to their pairwise LD
|
||||
*
|
||||
* User: depristo
|
||||
* Date: 3/28/13
|
||||
* Time: 6:17 PM
|
||||
*/
|
||||
public class LDMerger extends MergeVariantsAcrossHaplotypes {
|
||||
private final static Logger logger = Logger.getLogger(LDMerger.class);
|
||||
|
||||
private final boolean DEBUG;
|
||||
private final int minSamplesToMergeSNPs;
|
||||
private final int minSamplesToMergeOtherEvents;
|
||||
|
||||
public LDMerger(boolean DEBUG, int minSamplesToMergeSNPs, int minSamplesToMergeOtherEvents) {
|
||||
super();
|
||||
this.DEBUG = DEBUG;
|
||||
this.minSamplesToMergeSNPs = minSamplesToMergeSNPs;
|
||||
this.minSamplesToMergeOtherEvents = minSamplesToMergeOtherEvents;
|
||||
}
|
||||
|
||||
protected LDMerger() {
|
||||
this(false, 1, 1);
|
||||
}
|
||||
|
||||
// TODO -- should be class arguments and static variables in HC
|
||||
protected final static int MAX_DISTANCE_BETWEEN_SNPS_TO_MERGE = 6;
|
||||
protected final static int MAX_DISTANCE_BETWEEN_OTHER_EVENTS_TO_MERGE = 25;
|
||||
|
||||
/**
|
||||
* We require 99% confidence that only the phased haplotypes exist in the population to merge the records
|
||||
*/
|
||||
protected final static double MERGE_EVENTS_PROB_PHASED_THRESHOLD = 0.99;
|
||||
|
||||
/**
|
||||
* Merge as many events among the haplotypes as possible based on pairwise LD among variants
|
||||
*
|
||||
* @param haplotypes a list of haplotypes whose events we want to merge
|
||||
* @param readLikelihoods map from sample name -> read likelihoods for each haplotype
|
||||
* @param startPosKeySet a set of starting positions of all events among the haplotypes
|
||||
* @param ref the reference bases
|
||||
* @param refLoc the span of the reference bases
|
||||
*/
|
||||
@Override
|
||||
public boolean merge( final List<Haplotype> haplotypes,
|
||||
final ReadLikelihoods<Haplotype> readLikelihoods,
|
||||
final TreeSet<Integer> startPosKeySet,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc ) {
|
||||
if ( haplotypes == null ) throw new IllegalArgumentException("haplotypes cannot be null");
|
||||
if ( readLikelihoods == null ) throw new IllegalArgumentException("readLikelihoods cannot be null");
|
||||
if ( startPosKeySet == null ) throw new IllegalArgumentException("startPosKeySet cannot be null");
|
||||
if ( ref == null ) throw new IllegalArgumentException("ref cannot be null");
|
||||
if ( refLoc == null ) throw new IllegalArgumentException("refLoc cannot be null");
|
||||
if ( refLoc.size() != ref.length ) throw new IllegalArgumentException("refLoc size " + refLoc.size() + " != ref.length " + ref.length + " at " + refLoc);
|
||||
|
||||
if( startPosKeySet.size() <= 1 ) { return false; }
|
||||
|
||||
final int nSamples = readLikelihoods.sampleCount();
|
||||
final HaplotypeLDCalculator r2Calculator = new HaplotypeLDCalculator(haplotypes, readLikelihoods);
|
||||
boolean somethingWasMerged = false;
|
||||
boolean mapWasUpdated = true;
|
||||
while( mapWasUpdated ) {
|
||||
mapWasUpdated = mergeConsecutiveEventsBasedOnLDOnce(haplotypes, r2Calculator, nSamples, startPosKeySet, ref, refLoc);
|
||||
somethingWasMerged |= mapWasUpdated;
|
||||
}
|
||||
return somethingWasMerged;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge the next pair of events, if possible
|
||||
*
|
||||
* @param haplotypes a list of haplotypes whose events we want to merge
|
||||
* @param ldCalculator calculates R^2 for pairs of events on demand
|
||||
* @param startPosKeySet a set of starting positions of all events among the haplotypes
|
||||
* @param ref the reference bases
|
||||
* @param refLoc the span of the reference bases
|
||||
* @return true if something was merged, false otherwise
|
||||
*/
|
||||
protected boolean mergeConsecutiveEventsBasedOnLDOnce( final List<Haplotype> haplotypes,
|
||||
final HaplotypeLDCalculator ldCalculator,
|
||||
final int nSamples,
|
||||
final TreeSet<Integer> startPosKeySet,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc ) {
|
||||
// loop over the set of start locations and consider pairs that start near each other
|
||||
final Iterator<Integer> iter = startPosKeySet.iterator();
|
||||
int thisStart = iter.next();
|
||||
while( iter.hasNext() ) {
|
||||
final int nextStart = iter.next();
|
||||
final LDMergeData toMerge = getPairOfEventsToMerge(haplotypes, thisStart, nextStart);
|
||||
|
||||
if ( toMerge.canBeMerged(nSamples) ) {
|
||||
final double pPhased = ldCalculator.computeProbOfBeingPhased(toMerge.firstVC, toMerge.secondVC);
|
||||
|
||||
if( DEBUG ) {
|
||||
logger.info("Found consecutive biallelic events with R^2 = " + String.format("%.4f", pPhased));
|
||||
logger.info("-- " + toMerge.firstVC);
|
||||
logger.info("-- " + toMerge.secondVC);
|
||||
}
|
||||
|
||||
if( pPhased > MERGE_EVENTS_PROB_PHASED_THRESHOLD) {
|
||||
final VariantContext mergedVC = createMergedVariantContext(toMerge.firstVC, toMerge.secondVC, ref, refLoc);
|
||||
// if for some reason the merging resulting in a bad allele, mergedVC will be null, and we will just remove first and second
|
||||
replaceVariantContextsInMap(haplotypes, startPosKeySet, mergedVC, toMerge.firstVC, toMerge.secondVC);
|
||||
return true; // break out of tree set iteration since it was just updated, start over from the beginning and keep merging events
|
||||
}
|
||||
}
|
||||
|
||||
thisStart = nextStart;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Info about potential LD merge of two variant contexts
|
||||
*/
|
||||
private class LDMergeData {
|
||||
VariantContext firstVC = null, secondVC = null;
|
||||
boolean canBeMerged = true;
|
||||
|
||||
/** Tell this object that it cant be merged for some reason */
|
||||
public LDMergeData cantBeMerged() {
|
||||
canBeMerged = false;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can these two events be merged
|
||||
* @param nSamples the number of samples we're considering
|
||||
* @return true if we can merge our two variant contexts
|
||||
*/
|
||||
public boolean canBeMerged(final int nSamples) {
|
||||
if ( ! canBeMerged || firstVC == null || secondVC == null )
|
||||
return false;
|
||||
|
||||
final int distance = secondVC.getStart() - firstVC.getEnd();
|
||||
if ( firstVC.isSNP() && secondVC.isSNP() ) {
|
||||
return nSamples >= minSamplesToMergeSNPs && distance <= MAX_DISTANCE_BETWEEN_SNPS_TO_MERGE;
|
||||
} else {
|
||||
return nSamples >= minSamplesToMergeOtherEvents && distance <= MAX_DISTANCE_BETWEEN_OTHER_EVENTS_TO_MERGE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the information about the potential merge of two events starting at thisStart and nextStart
|
||||
* @param haplotypes our haplotypes
|
||||
* @param thisStart the starting position of the first event to merge
|
||||
* @param nextStart the starting position of the next event to merge
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
private LDMergeData getPairOfEventsToMerge(final List<Haplotype> haplotypes, final int thisStart, final int nextStart) {
|
||||
final LDMergeData mergeData = new LDMergeData();
|
||||
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
// only make complex substitutions out of consecutive biallelic sites
|
||||
final VariantContext thisHapVC = h.getEventMap().get(thisStart);
|
||||
if( thisHapVC != null && !thisHapVC.isSymbolic() ) { // something was found at this location on this haplotype
|
||||
if( mergeData.firstVC == null ) {
|
||||
mergeData.firstVC = thisHapVC;
|
||||
} else if( !thisHapVC.hasSameAllelesAs( mergeData.firstVC) ) {
|
||||
return mergeData.cantBeMerged();
|
||||
}
|
||||
}
|
||||
final VariantContext nextHapVC = h.getEventMap().get(nextStart);
|
||||
if( nextHapVC != null && !nextHapVC.isSymbolic() ) { // something was found at the next location on this haplotype
|
||||
if( mergeData.secondVC == null ) {
|
||||
mergeData.secondVC = nextHapVC;
|
||||
} else if( !nextHapVC.hasSameAllelesAs( mergeData.secondVC) ) {
|
||||
return mergeData.cantBeMerged();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// don't try to merge overlapping events
|
||||
if ( mergeData.firstVC != null && mergeData.secondVC != null && mergeData.firstVC.getEnd() >= mergeData.secondVC.getStart() )
|
||||
return mergeData.cantBeMerged();
|
||||
|
||||
return mergeData;
|
||||
}
|
||||
|
||||
// BUGBUG: make this merge function more general
|
||||
protected VariantContext createMergedVariantContext( final VariantContext thisVC, final VariantContext nextVC, final byte[] ref, final GenomeLoc refLoc ) {
|
||||
final int thisStart = thisVC.getStart();
|
||||
final int nextStart = nextVC.getStart();
|
||||
byte[] refBases = new byte[]{};
|
||||
byte[] altBases = new byte[]{};
|
||||
refBases = ArrayUtils.addAll(refBases, thisVC.getReference().getBases());
|
||||
altBases = ArrayUtils.addAll(altBases, thisVC.getAlternateAllele(0).getBases());
|
||||
int locus;
|
||||
for( locus = thisStart + refBases.length; locus < nextStart; locus++ ) {
|
||||
final byte refByte = ref[locus - refLoc.getStart()];
|
||||
refBases = ArrayUtils.add(refBases, refByte);
|
||||
altBases = ArrayUtils.add(altBases, refByte);
|
||||
}
|
||||
refBases = ArrayUtils.addAll(refBases, ArrayUtils.subarray(nextVC.getReference().getBases(), locus > nextStart ? 1 : 0, nextVC.getReference().getBases().length)); // special case of deletion including the padding base of consecutive indel
|
||||
altBases = ArrayUtils.addAll(altBases, nextVC.getAlternateAllele(0).getBases());
|
||||
|
||||
int iii = 0;
|
||||
if( refBases.length == altBases.length ) { // insertion + deletion of same length creates an MNP --> trim common prefix bases off the beginning of the allele
|
||||
while( iii < refBases.length && refBases[iii] == altBases[iii] ) { iii++; }
|
||||
if ( iii == refBases.length ) {
|
||||
// we've become a null allele, such as with CA/C + A/AA -> CA/CA => after trimming there's nothing left
|
||||
// so return a null variant context so we can eliminate the variants from consideration
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
final Allele refAllele = Allele.create( ArrayUtils.subarray(refBases, iii, refBases.length), true );
|
||||
final Allele altAllele = Allele.create( ArrayUtils.subarray(altBases, iii, altBases.length), false );
|
||||
return new VariantContextBuilder("merged", thisVC.getChr(), thisVC.getStart() + iii, nextVC.getEnd(), Arrays.asList(refAllele, altAllele)).make();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the event maps in all haplotypes to replace a replacement of update1 and 2 with replacement
|
||||
*
|
||||
* @param haplotypes the haplotypes whose event maps we need to update
|
||||
* @param startPosKeySet a sorted set of start positions that we must update
|
||||
* @param replacement a VariantContext to replace update1 and update2 with. Can be null, indicating that we just want to remove update1 and update2
|
||||
* @param update1 the first VC we want to update
|
||||
* @param update2 the second VC we want to update
|
||||
*/
|
||||
private void replaceVariantContextsInMap(final List<Haplotype> haplotypes,
|
||||
final TreeSet<Integer> startPosKeySet,
|
||||
final VariantContext replacement,
|
||||
final VariantContext update1, final VariantContext update2) {
|
||||
// remove the old event from the eventMap on every haplotype and the start pos key set, replace with merged event
|
||||
for( final Haplotype h : haplotypes ) {
|
||||
// if we had both events, add replacement. In some cases the haplotype may not have both
|
||||
// events but they were still merged because the haplotype isn't a particularly informative
|
||||
// haplotype in any case. The order of operations here is important because we are modifying the map
|
||||
final boolean shouldAdd = h.getEventMap().containsKey(update1.getStart()) && h.getEventMap().containsKey(update2.getStart());
|
||||
h.getEventMap().remove(update1.getStart());
|
||||
h.getEventMap().remove(update2.getStart());
|
||||
if ( shouldAdd && replacement != null ) {
|
||||
h.getEventMap().addVC(replacement, false); // cannot merge we other events at the same position
|
||||
}
|
||||
}
|
||||
|
||||
startPosKeySet.remove(update1.getStart());
|
||||
startPosKeySet.remove(update2.getStart());
|
||||
if ( replacement != null ) startPosKeySet.add(replacement.getStart());
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE
|
||||
* SOFTWARE LICENSE AGREEMENT
|
||||
* FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 415 Main Street, Cambridge, MA 02142 (“BROAD”) and the LICENSEE and is effective at the date the downloading is completed (“EFFECTIVE DATE”).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK3 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute.org/gatk on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. LICENSEE hereby automatically grants to BROAD a non-exclusive, royalty-free, irrevocable license to any LICENSEE bug fixes or modifications to the PROGRAM with unlimited rights to sublicense and/or distribute. LICENSEE agrees to provide any such modifications and bug fixes to BROAD promptly upon their creation.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. PHONE-HOME FEATURE
|
||||
* LICENSEE expressly acknowledges that the PROGRAM contains an embedded automatic reporting system (“PHONE-HOME”) which is enabled by default upon download. Unless LICENSEE requests disablement of PHONE-HOME, LICENSEE agrees that BROAD may collect limited information transmitted by PHONE-HOME regarding LICENSEE and its use of the PROGRAM. Such information shall include LICENSEE’S user identification, version number of the PROGRAM and tools being run, mode of analysis employed, and any error reports generated during run-time. Collection of such information is used by BROAD solely to monitor usage rates, fulfill reporting requirements to BROAD funding agencies, drive improvements to the PROGRAM, and facilitate adjustments to PROGRAM-related documentation.
|
||||
*
|
||||
* 4. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012-2014 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK3 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 5. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 6. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 7. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 8. MISCELLANEOUS
|
||||
* 8.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 8.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 8.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 8.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 8.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 8.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 8.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.gatk.tools.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.gatk.utils.GenomeLoc;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Baseclass for code that wants to merge variants together in the haplotype caller
|
||||
*
|
||||
* This root class is basically a no-op, and can be used to not do any merging
|
||||
*/
|
||||
public class MergeVariantsAcrossHaplotypes {
|
||||
/**
|
||||
* Merge variants across the haplotypes, updating the haplotype event maps and startPos set as appropriate
|
||||
*
|
||||
* @param haplotypes a list of haplotypes whose events we want to merge
|
||||
* @param readLikelihoods map from sample name -> read likelihoods for each haplotype
|
||||
* @param startPosKeySet a set of starting positions of all events among the haplotypes
|
||||
* @param ref the reference bases
|
||||
* @param refLoc the span of the reference bases
|
||||
* @return true if anything was merged
|
||||
*/
|
||||
public boolean merge( final List<Haplotype> haplotypes,
|
||||
final ReadLikelihoods<Haplotype> readLikelihoods,
|
||||
final TreeSet<Integer> startPosKeySet,
|
||||
final byte[] ref,
|
||||
final GenomeLoc refLoc ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -56,17 +56,17 @@ import com.google.java.contract.Requires;
|
|||
import htsjdk.samtools.SAMUtils;
|
||||
import htsjdk.variant.variantcontext.Allele;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.tools.walkers.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.AlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.IndexedAlleleList;
|
||||
import org.broadinstitute.gatk.utils.genotyper.SampleList;
|
||||
import org.broadinstitute.gatk.utils.MathUtils;
|
||||
import org.broadinstitute.gatk.utils.QualityUtils;
|
||||
import org.broadinstitute.gatk.utils.exceptions.UserException;
|
||||
import org.broadinstitute.gatk.utils.genotyper.ReadLikelihoods;
|
||||
import org.broadinstitute.gatk.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.gatk.utils.pairhmm.*;
|
||||
import org.broadinstitute.gatk.utils.recalibration.covariates.RepeatCovariate;
|
||||
import org.broadinstitute.gatk.utils.recalibration.covariates.RepeatLengthCovariate;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.RepeatCovariate;
|
||||
import org.broadinstitute.gatk.engine.recalibration.covariates.RepeatLengthCovariate;
|
||||
import org.broadinstitute.gatk.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -78,8 +78,6 @@ import java.util.*;
|
|||
public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalculationEngine {
|
||||
private final static Logger logger = Logger.getLogger(PairHMMLikelihoodCalculationEngine.class);
|
||||
|
||||
public static final byte BASE_QUALITY_SCORE_THRESHOLD = (byte) 18; // Base quals less than this value are squashed down to min possible qual
|
||||
|
||||
private final byte constantGCP;
|
||||
|
||||
private final double log10globalReadMismappingRate;
|
||||
|
|
@ -189,7 +187,7 @@ public class PairHMMLikelihoodCalculationEngine implements ReadLikelihoodCalcula
|
|||
private void capMinimumReadQualities(GATKSAMRecord read, byte[] readQuals, byte[] readInsQuals, byte[] readDelQuals) {
|
||||
for( int kkk = 0; kkk < readQuals.length; kkk++ ) {
|
||||
readQuals[kkk] = (byte) Math.min( 0xff & readQuals[kkk], read.getMappingQuality()); // cap base quality by mapping quality, as in UG
|
||||
readQuals[kkk] = ( readQuals[kkk] < BASE_QUALITY_SCORE_THRESHOLD ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
readQuals[kkk] = ( readQuals[kkk] < PairHMM.BASE_QUALITY_SCORE_THRESHOLD ? QualityUtils.MIN_USABLE_Q_SCORE : readQuals[kkk] );
|
||||
readInsQuals[kkk] = ( readInsQuals[kkk] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : readInsQuals[kkk] );
|
||||
readDelQuals[kkk] = ( readDelQuals[kkk] < QualityUtils.MIN_USABLE_Q_SCORE ? QualityUtils.MIN_USABLE_Q_SCORE : readDelQuals[kkk] );
|
||||
}
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue