Merge remote-tracking branch 'unstable/master'
This commit is contained in:
commit
72492bb875
|
|
@ -103,14 +103,14 @@ for arg in "${@}" ; do
|
|||
mvn_args="${mvn_args} -Dsting.packagetests.enabled=true"
|
||||
mvn_args="${mvn_args} -Dsting.packagecommittests.skipped=false"
|
||||
|
||||
# TODO: This runs only the pipeline tests (full, non-dry run), but not the commit tests for Queue.
|
||||
# TODO: This runs only the queue tests (full, non-dry run), but not the commit tests for Queue.
|
||||
elif [[ "${arg}" == "queuefull.binary.release.tests" ]] ; then
|
||||
local_repo="sitetemprepo"
|
||||
mvn_args="install -Dmaven.repo.local=${local_repo} && mvn verify"
|
||||
mvn_args="${mvn_args} -Dmaven.repo.local=${local_repo}"
|
||||
mvn_args="${mvn_args} -Dsting.packagetests.enabled=true"
|
||||
mvn_args="${mvn_args} -Dsting.packagepipelinetests.skipped=false"
|
||||
mvn_args="${mvn_args} -Dsting.pipelinetests.run=true"
|
||||
mvn_args="${mvn_args} -Dsting.packagequeuetests.skipped=false"
|
||||
mvn_args="${mvn_args} -Dsting.queuetests.run=true"
|
||||
|
||||
elif [[ "${arg}" == "committests" ]] ; then
|
||||
mvn_args="verify -Dsting.committests.skipped=false"
|
||||
|
|
@ -130,11 +130,11 @@ for arg in "${@}" ; do
|
|||
elif [[ "${arg}" == "knowledgebasetest" ]] ; then
|
||||
mvn_args="verify -Dsting.knowledgebasetests.skipped=false"
|
||||
|
||||
elif [[ "${arg}" == "pipelinetest" ]] ; then
|
||||
mvn_args="verify -Dsting.pipelinetests.skipped=false"
|
||||
elif [[ "${arg}" == "queuetest" ]] ; then
|
||||
mvn_args="verify -Dsting.queuetests.skipped=false"
|
||||
|
||||
elif [[ "${arg}" == "pipelinetestrun" ]] ; then
|
||||
mvn_args="verify -Dsting.pipelinetests.skipped=false -Dsting.pipelinetests.run=true"
|
||||
elif [[ "${arg}" == "queuetestrun" ]] ; then
|
||||
mvn_args="verify -Dsting.queuetests.skipped=false -Dsting.queuetests.run=true"
|
||||
|
||||
elif [[ "${arg}" == "fasttest" ]] ; then
|
||||
mvn_args="verify -Dsting.committests.skipped=false -pl private/gatk-private -am -Dresource.bundle.skip=true"
|
||||
|
|
|
|||
28
pom.xml
28
pom.xml
|
|
@ -13,7 +13,7 @@
|
|||
<parent>
|
||||
<groupId>org.broadinstitute.sting</groupId>
|
||||
<artifactId>sting-root</artifactId>
|
||||
<version>3.0</version>
|
||||
<version>3.1</version>
|
||||
<relativePath>public/sting-root</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
@ -48,7 +48,7 @@
|
|||
<sting.packagecommittests.skipped>true</sting.packagecommittests.skipped>
|
||||
<sting.packageunittests.skipped>${sting.packagecommittests.skipped}</sting.packageunittests.skipped>
|
||||
<sting.packageintegrationtests.skipped>${sting.packagecommittests.skipped}</sting.packageintegrationtests.skipped>
|
||||
<sting.packagepipelinetests.skipped>${sting.packagecommittests.skipped}</sting.packagepipelinetests.skipped>
|
||||
<sting.packagequeuetests.skipped>${sting.packagecommittests.skipped}</sting.packagequeuetests.skipped>
|
||||
<sting.packagelargescaletests.skipped>true</sting.packagelargescaletests.skipped>
|
||||
<sting.packageknowledgebasetests.skipped>true</sting.packageknowledgebasetests.skipped>
|
||||
|
||||
|
|
@ -62,7 +62,7 @@
|
|||
<sting.serialcommittests.skipped>true</sting.serialcommittests.skipped>
|
||||
<sting.serialunittests.skipped>${sting.serialcommittests.skipped}</sting.serialunittests.skipped>
|
||||
<sting.serialintegrationtests.skipped>${sting.serialcommittests.skipped}</sting.serialintegrationtests.skipped>
|
||||
<sting.serialpipelinetests.skipped>${sting.serialcommittests.skipped}</sting.serialpipelinetests.skipped>
|
||||
<sting.serialqueuetests.skipped>${sting.serialcommittests.skipped}</sting.serialqueuetests.skipped>
|
||||
<sting.seriallargescaletests.skipped>true</sting.seriallargescaletests.skipped>
|
||||
<sting.serialknowledgebasetests.skipped>true</sting.serialknowledgebasetests.skipped>
|
||||
</properties>
|
||||
|
|
@ -340,6 +340,18 @@
|
|||
<exclude>org.broadinstitute.sting:*:tar.bz2:example-resources</exclude>
|
||||
</excludes>
|
||||
</artifactSet>
|
||||
<filters>
|
||||
<!--
|
||||
NOTE: Removing cofoja's annotation service to allow "javac -cp GenomeAnalysisTK.jar ..." without
|
||||
needing an additional -proc:none argument. Using *:* to catch shaded GATK in Queue package.
|
||||
-->
|
||||
<filter>
|
||||
<artifact>*:*</artifact>
|
||||
<excludes>
|
||||
<exclude>META-INF/services/javax.annotation.processing.Processor</exclude>
|
||||
</excludes>
|
||||
</filter>
|
||||
</filters>
|
||||
<transformers>
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
|
||||
<manifestEntries>
|
||||
|
|
@ -561,7 +573,7 @@
|
|||
<sting.packagetests.artifactId>${sting.packagetests.artifactId}</sting.packagetests.artifactId>
|
||||
<sting.packagetests.testClasses>${project.build.testOutputDirectory}</sting.packagetests.testClasses>
|
||||
<sting.packagetests.basedir>${project.basedir}</sting.packagetests.basedir>
|
||||
<sting.pipelinetests.run>${sting.pipelinetests.run}</sting.pipelinetests.run>
|
||||
<sting.queuetests.run>${sting.queuetests.run}</sting.queuetests.run>
|
||||
<maven.surefire.debug>${maven.surefire.debug}</maven.surefire.debug>
|
||||
<maven.failsafe.debug>${maven.failsafe.debug}</maven.failsafe.debug>
|
||||
</properties>
|
||||
|
|
@ -613,7 +625,7 @@
|
|||
</configuration>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>package-pipelinetests</id>
|
||||
<id>package-queuetests</id>
|
||||
<goals>
|
||||
<goal>integration-test</goal>
|
||||
<goal>verify</goal>
|
||||
|
|
@ -622,11 +634,11 @@
|
|||
<goals>
|
||||
<goal>verify</goal>
|
||||
</goals>
|
||||
<reportsDirectory>${project.build.directory}/invoker-reports/pipeline/${it.test}</reportsDirectory>
|
||||
<skipInvocation>${sting.packagepipelinetests.skipped}</skipInvocation>
|
||||
<reportsDirectory>${project.build.directory}/invoker-reports/queuetest/${it.test}</reportsDirectory>
|
||||
<skipInvocation>${sting.packagequeuetests.skipped}</skipInvocation>
|
||||
<properties>
|
||||
<integrationtests.profile.enabled>true</integrationtests.profile.enabled>
|
||||
<sting.packagepipelinetests.skipped>${sting.packagepipelinetests.skipped}</sting.packagepipelinetests.skipped>
|
||||
<sting.packagequeuetests.skipped>${sting.packagequeuetests.skipped}</sting.packagequeuetests.skipped>
|
||||
</properties>
|
||||
</configuration>
|
||||
</execution>
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>org.broadinstitute.sting</groupId>
|
||||
<artifactId>sting-aggregator</artifactId>
|
||||
<version>3.0</version>
|
||||
<version>3.1</version>
|
||||
<relativePath>../..</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
@ -99,7 +99,7 @@
|
|||
<id>package-knowledgebasetests</id>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>package-pipelinetests</id>
|
||||
<id>package-queuetests</id>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
|
|
|||
|
|
@ -53,13 +53,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
|
|
@ -81,7 +78,7 @@ import java.util.*;
|
|||
* <h3>Caveat</h3>
|
||||
* <p>The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.</p>
|
||||
*/
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
public class FisherStrand extends StrandBiasTest implements StandardAnnotation, ActiveRegionBasedAnnotation {
|
||||
private final static boolean ENABLE_DEBUGGING = false;
|
||||
private final static Logger logger = Logger.getLogger(FisherStrand.class);
|
||||
|
||||
|
|
@ -100,7 +97,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return null;
|
||||
|
||||
if ( vc.hasGenotypes() ) {
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes() );
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
return pValueForBestTable(tableFromPerSampleAnnotations, null);
|
||||
}
|
||||
|
|
@ -116,8 +113,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
|
||||
// either SNP with no alignment context, or indels: per-read likelihood map needed
|
||||
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc);
|
||||
// logger.info("VC " + vc);
|
||||
// printTable(table, 0.0);
|
||||
//logger.info("VC " + vc);
|
||||
//printTable(table, 0.0);
|
||||
return pValueForBestTable(table, null);
|
||||
}
|
||||
else
|
||||
|
|
@ -126,45 +123,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the FisherStrand table by retrieving the per-sample strand bias annotation and adding them together
|
||||
* @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
|
||||
* @return the table used for the FisherStrand p-value calculation, will be null if none of the genotypes contain the per-sample SB annotation
|
||||
*/
|
||||
private int[][] getTableFromSamples( final GenotypesContext genotypes ) {
|
||||
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
|
||||
|
||||
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
|
||||
boolean foundData = false;
|
||||
|
||||
for( final Genotype g : genotypes ) {
|
||||
if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) )
|
||||
continue;
|
||||
|
||||
foundData = true;
|
||||
final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
|
||||
final int[] data = encodeSBBS(sbbsString);
|
||||
if ( passesMinimumThreshold(data) ) {
|
||||
for( int index = 0; index < sbArray.length; index++ ) {
|
||||
sbArray[index] += data[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ( foundData ? decodeSBBS(sbArray) : null );
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this strand data array pass the minimum threshold for inclusion?
|
||||
*
|
||||
* @param data the array
|
||||
* @return true if it passes the minimum threshold, false otherwise
|
||||
*/
|
||||
private static boolean passesMinimumThreshold(final int[] data) {
|
||||
// the ref and alt totals must each be greater than MIN_COUNT
|
||||
return data[0] + data[1] > MIN_COUNT && data[2] + data[3] > MIN_COUNT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
|
||||
*
|
||||
|
|
@ -190,7 +148,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
* @param pValue
|
||||
* @return a hash map from FS -> phred-scaled pValue
|
||||
*/
|
||||
private Map<String, Object> annotationForOneTable(final double pValue) {
|
||||
protected Map<String, Object> annotationForOneTable(final double pValue) {
|
||||
final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs
|
||||
return Collections.singletonMap(FS, value);
|
||||
}
|
||||
|
|
@ -218,36 +176,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
list.add(table[1][1]);
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to parse the genotype annotation into the SB annotation array
|
||||
* @param string the string that is returned by genotype.getAnnotation("SB")
|
||||
* @return the array used by the per-sample Strand Bias annotation
|
||||
*/
|
||||
private static int[] encodeSBBS( final String string ) {
|
||||
final int[] array = new int[4];
|
||||
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
|
||||
for( int index = 0; index < 4; index++ ) {
|
||||
array[index] = Integer.parseInt(tokenizer.nextToken());
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to turn the SB annotation array into the FisherStrand table
|
||||
* @param array the array used by the per-sample Strand Bias annotation
|
||||
* @return the table used by the FisherStrand annotation
|
||||
*/
|
||||
private static int[][] decodeSBBS( final int[] array ) {
|
||||
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
|
||||
final int[][] table = new int[2][2];
|
||||
table[0][0] = array[0];
|
||||
table[0][1] = array[1];
|
||||
table[1][0] = array[2];
|
||||
table[1][1] = array[3];
|
||||
return table;
|
||||
}
|
||||
|
||||
private Double pValueForContingencyTable(int[][] originalTable) {
|
||||
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
|
||||
|
||||
|
|
@ -419,7 +347,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
final GATKSAMRecord read = el.getKey();
|
||||
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
|
||||
}
|
||||
if ( passesMinimumThreshold(myTable) )
|
||||
if ( passesMinimumThreshold(myTable, MIN_COUNT) )
|
||||
copyToMainTable(myTable, table);
|
||||
}
|
||||
|
||||
|
|
@ -464,7 +392,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
|
|||
|
||||
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
|
||||
}
|
||||
if ( passesMinimumThreshold(myTable) )
|
||||
|
||||
if ( passesMinimumThreshold( myTable, MIN_COUNT ) )
|
||||
copyToMainTable(myTable, table);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,128 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.GenotypesContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Class of tests to detect strand bias.
|
||||
*/
|
||||
public abstract class StrandBiasTest extends InfoFieldAnnotation {
|
||||
/**
|
||||
* Create the contingency table by retrieving the per-sample strand bias annotation and adding them together
|
||||
* @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
|
||||
* @param minCount minimum threshold for the sample strand bias counts for each ref and alt.
|
||||
* If both ref and alt counts are above minCount the whole sample strand bias is added to the resulting table
|
||||
* @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
|
||||
*/
|
||||
protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
|
||||
if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
|
||||
|
||||
final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
|
||||
boolean foundData = false;
|
||||
|
||||
for( final Genotype g : genotypes ) {
|
||||
if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) )
|
||||
continue;
|
||||
|
||||
foundData = true;
|
||||
final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
|
||||
final int[] data = encodeSBBS(sbbsString);
|
||||
if ( passesMinimumThreshold(data, minCount) ) {
|
||||
for( int index = 0; index < sbArray.length; index++ ) {
|
||||
sbArray[index] += data[index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ( foundData ? decodeSBBS(sbArray) : null );
|
||||
}
|
||||
/**
|
||||
* Does this strand data array pass the minimum threshold for inclusion?
|
||||
*
|
||||
* @param data the array
|
||||
* @minCount The minimum threshold of counts in the array
|
||||
* @return true if it passes the minimum threshold, false otherwise
|
||||
*/
|
||||
protected static boolean passesMinimumThreshold(final int[] data, final int minCount) {
|
||||
// the ref and alt totals must each be greater than MIN_COUNT
|
||||
return data[0] + data[1] > minCount && data[2] + data[3] > minCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to parse the genotype annotation into the SB annotation array
|
||||
* @param string the string that is returned by genotype.getAnnotation("SB")
|
||||
* @return the array used by the per-sample Strand Bias annotation
|
||||
*/
|
||||
private static int[] encodeSBBS( final String string ) {
|
||||
final int[] array = new int[4];
|
||||
final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
|
||||
for( int index = 0; index < 4; index++ ) {
|
||||
array[index] = Integer.parseInt(tokenizer.nextToken());
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to turn the SB annotation array into a contingency table
|
||||
* @param array the array used by the per-sample Strand Bias annotation
|
||||
* @return the table used by the StrandOddsRatio annotation
|
||||
*/
|
||||
private static int[][] decodeSBBS( final int[] array ) {
|
||||
if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
|
||||
final int[][] table = new int[2][2];
|
||||
table[0][0] = array[0];
|
||||
table[0][1] = array[1];
|
||||
table[1][0] = array[2];
|
||||
table[1][1] = array[3];
|
||||
return table;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
import org.broadinstitute.variant.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Symmetric Odds Ratio to detect strand bias
|
||||
*
|
||||
* <p> Odds Ratios in the 2x2 contingency table below are R = (X[0][0] * X[1][1]) / (X[0][1] * X[1][0]) and its inverse
|
||||
* + strand - strand
|
||||
* Ref X[0][0] X[0][1]
|
||||
* Alt X[1][0] X[1][0]
|
||||
* The sum R + 1/R is used to detect a difference in strand bias for ref and for alt (the sum makes it symmetric):
|
||||
* A high value is indicative of large difference where one entry is very small compared to the others.
|
||||
* </p>
|
||||
*/
|
||||
public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBasedAnnotation {
|
||||
private final static double AUGMENTATION_CONSTANT = 0.1;
|
||||
private static final int MIN_COUNT = 0;
|
||||
|
||||
private static final String SOR = "SOR";
|
||||
|
||||
public Map<String, Object> annotate(final RefMetaDataTracker tracker,
|
||||
final AnnotatorCompatible walker,
|
||||
final ReferenceContext ref,
|
||||
final Map<String,AlignmentContext> stratifiedContexts,
|
||||
final VariantContext vc,
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedPerReadAlleleLikelihoodMap) {
|
||||
if ( !vc.isVariant() )
|
||||
return null;
|
||||
|
||||
if ( vc.hasGenotypes() ) {
|
||||
final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
|
||||
if ( tableFromPerSampleAnnotations != null ) {
|
||||
final double ratio = symmetricOddsRatio(tableFromPerSampleAnnotations);
|
||||
return annotationForOneTable(ratio);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the symmetric odds ratio of a table after augmentation.
|
||||
* Augmentation avoids quotient by zero.
|
||||
*
|
||||
* @param originalTable The table before augmentation
|
||||
* @return the symmetric odds ratio
|
||||
*/
|
||||
final protected double symmetricOddsRatio(final int[][] originalTable) {
|
||||
final double[][] augmentedTable = augmentContingencyTable(originalTable);
|
||||
|
||||
double ratio = 0;
|
||||
|
||||
ratio += (augmentedTable[0][0] / augmentedTable[0][1]) * (augmentedTable[1][1] / augmentedTable[1][0]);
|
||||
ratio += (augmentedTable[0][1] / augmentedTable[0][0]) * (augmentedTable[1][0] / augmentedTable[1][1]);
|
||||
|
||||
return ratio;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds the small value AUGMENTATION_CONSTANT to all the entries of the table.
|
||||
*
|
||||
* @param table the table to augment
|
||||
* @return the augmented table
|
||||
*/
|
||||
private static double[][] augmentContingencyTable(final int[][] table) {
|
||||
double[][] augmentedTable = new double[2][2];
|
||||
for ( int i = 0; i < 2; i++ ) {
|
||||
for ( int j = 0; j < 2; j++ )
|
||||
augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
|
||||
}
|
||||
|
||||
return augmentedTable;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an annotation result given a ratio
|
||||
*
|
||||
* @param ratio the symmetric odds ratio of the contingency table
|
||||
* @return a hash map from SOR
|
||||
*/
|
||||
protected Map<String, Object> annotationForOneTable(final double ratio) {
|
||||
final Object value = String.format("%.3f", ratio);
|
||||
return Collections.singletonMap(SOR, value);
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Collections.singletonList(new VCFInfoHeaderLine(SOR, 1, VCFHeaderLineType.Float, "Symmetric Odds Ratio of 2x2 contingency table to detect strand bias"));
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Collections.singletonList(SOR);
|
||||
}
|
||||
}
|
||||
|
|
@ -155,4 +155,8 @@ public class GraphBasedLikelihoodCalculationEngine implements LikelihoodCalculat
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -289,6 +289,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
@Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
|
||||
protected boolean dontRecoverDanglingTails = false;
|
||||
|
||||
@Advanced
|
||||
@Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false)
|
||||
protected boolean consensusMode = false;
|
||||
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
// general advanced arguments to control haplotype caller behavior
|
||||
// -----------------------------------------------------------------------------------------------
|
||||
|
|
@ -575,7 +579,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
|
||||
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
|
||||
// HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine
|
||||
UAC.OutputMode = SCAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
|
||||
UAC.OutputMode = SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES)
|
||||
? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
|
||||
|
||||
|
|
@ -598,6 +602,10 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
|
||||
}
|
||||
|
||||
if( SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) && consensusMode ) {
|
||||
throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
|
||||
}
|
||||
|
||||
// initialize the output VCF header
|
||||
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
|
||||
|
||||
|
|
@ -878,7 +886,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
regionForGenotyping.getLocation(),
|
||||
getToolkit().getGenomeLocParser(),
|
||||
metaDataTracker,
|
||||
activeAllelesToGenotype, emitReferenceConfidence() );
|
||||
( consensusMode ? Collections.<VariantContext>emptyList() : activeAllelesToGenotype ),
|
||||
emitReferenceConfidence() );
|
||||
|
||||
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
|
||||
if ( bamWriter != null ) {
|
||||
|
|
@ -1051,7 +1060,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
referenceConfidenceModel.close();
|
||||
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
||||
//TODO (open & close) at the walker, not the engine.
|
||||
//likelihoodCalculationEngine.close();
|
||||
likelihoodCalculationEngine.close();
|
||||
logger.info("Ran local assembly on " + result + " active regions");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,4 +89,6 @@ public interface LikelihoodCalculationEngine {
|
|||
*/
|
||||
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods(AssemblyResultSet assemblyResultSet,
|
||||
Map<String, List<GATKSAMRecord>> perSampleReadList);
|
||||
|
||||
public void close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -213,16 +213,18 @@ public abstract class LocalAssemblyEngine {
|
|||
final Map<SeqGraph,AssemblyResult> assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
|
||||
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
|
||||
final Set<Haplotype> returnHaplotypes = new LinkedHashSet<>();
|
||||
returnHaplotypes.add( refHaplotype );
|
||||
|
||||
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
|
||||
final ArrayList<KBestHaplotypeFinder> finders = new ArrayList<>(graphs.size());
|
||||
|
||||
for( final SeqGraph graph : graphs ) {
|
||||
final SeqVertex source = graph.getReferenceSourceVertex();
|
||||
final SeqVertex sink = graph.getReferenceSinkVertex();
|
||||
if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph);
|
||||
final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph,source,sink);
|
||||
finders.add(haplotypeFinder);
|
||||
final Iterator<KBestHaplotype> bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
|
||||
|
||||
while (bestHaplotypes.hasNext()) {
|
||||
final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
|
||||
final Haplotype h = kBestHaplotype.haplotype();
|
||||
|
|
@ -256,9 +258,19 @@ public abstract class LocalAssemblyEngine {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
if ( returnHaplotypes.size() < returnHaplotypes.size() )
|
||||
logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
|
||||
// Make sure that the ref haplotype is amongst the return haplotypes and calculate its score as
|
||||
// the first returned by any finder.
|
||||
if (!returnHaplotypes.contains(refHaplotype)) {
|
||||
double refScore = Double.NaN;
|
||||
for (final KBestHaplotypeFinder finder : finders) {
|
||||
final double candidate = finder.score(refHaplotype);
|
||||
if (Double.isNaN(candidate)) continue;
|
||||
refScore = candidate;
|
||||
break;
|
||||
}
|
||||
refHaplotype.setScore(refScore);
|
||||
returnHaplotypes.add(refHaplotype);
|
||||
}
|
||||
|
||||
if( debug ) {
|
||||
if( returnHaplotypes.size() > 1 ) {
|
||||
|
|
|
|||
|
|
@ -90,6 +90,18 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
return new LoglessPairHMM();
|
||||
else
|
||||
return new CnyPairHMM();
|
||||
case VECTOR_LOGLESS_CACHING:
|
||||
try
|
||||
{
|
||||
return new VectorLoglessPairHMM();
|
||||
}
|
||||
catch(UnsatisfiedLinkError ule)
|
||||
{
|
||||
logger.debug("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
|
||||
return new LoglessPairHMM();
|
||||
}
|
||||
case DEBUG_VECTOR_LOGLESS_CACHING:
|
||||
return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING);
|
||||
case ARRAY_LOGLESS:
|
||||
if (noFpga || !CnyPairHMM.isAvailable())
|
||||
return new ArrayLoglessPairHMM();
|
||||
|
|
@ -162,10 +174,13 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if ( likelihoodsStream != null ) likelihoodsStream.close();
|
||||
pairHMMThreadLocal.get().close();
|
||||
}
|
||||
|
||||
|
||||
private void writeDebugLikelihoods(final GATKSAMRecord processedRead, final Haplotype haplotype, final double log10l){
|
||||
if ( WRITE_LIKELIHOODS_TO_FILE ) {
|
||||
likelihoodsStream.printf("%s %s %s %s %s %s %f%n",
|
||||
|
|
@ -316,8 +331,8 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
int X_METRIC_LENGTH = 0;
|
||||
for( final Map.Entry<String, List<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
|
||||
for( final GATKSAMRecord read : sample.getValue() ) {
|
||||
final int readLength = read.getReadLength();
|
||||
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
|
||||
final int readLength = read.getReadLength();
|
||||
if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
|
||||
}
|
||||
}
|
||||
int Y_METRIC_LENGTH = 0;
|
||||
|
|
@ -327,7 +342,12 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
}
|
||||
|
||||
// initialize arrays to hold the probabilities of being in the match, insertion and deletion cases
|
||||
pairHMMThreadLocal.get().initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH);
|
||||
}
|
||||
|
||||
private void finalizePairHMM()
|
||||
{
|
||||
pairHMMThreadLocal.get().finalizeRegion();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -341,12 +361,14 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
// Add likelihoods for each sample's reads to our stratifiedReadMap
|
||||
final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = new LinkedHashMap<>();
|
||||
for( final Map.Entry<String, List<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
|
||||
// evaluate the likelihood of the reads given those haplotypes
|
||||
// evaluate the likelihood of the reads given those haplotypes
|
||||
final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue());
|
||||
|
||||
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
|
||||
stratifiedReadMap.put(sampleEntry.getKey(), map);
|
||||
}
|
||||
//Used mostly by the JNI implementation(s) to free arrays
|
||||
finalizePairHMM();
|
||||
|
||||
return stratifiedReadMap;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -79,4 +79,9 @@ public class RandomLikelihoodCalculationEngine implements LikelihoodCalculationE
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,21 +45,21 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.PriorityQueue;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders.
|
||||
*
|
||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||
*/
|
||||
class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||
class AggregatedSubHaplotypeFinder<F extends KBestSubHaplotypeFinder> implements KBestSubHaplotypeFinder {
|
||||
|
||||
/**
|
||||
* Collection of subFinders that provided the actual solutions.
|
||||
*/
|
||||
private final Collection<? extends KBestSubHaplotypeFinder> subFinders;
|
||||
protected final Collection<F> subFinders;
|
||||
|
||||
/**
|
||||
* Flag indicating whether the sub-finders have been processed or not.
|
||||
|
|
@ -89,17 +89,53 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
|||
* Creates a new aggregated sub-haplotype finder given its sub-finders.
|
||||
* @param finders set of sub-finders.
|
||||
*/
|
||||
public AggregatedSubHaplotypeFinder(final Collection<? extends KBestSubHaplotypeFinder> finders) {
|
||||
public AggregatedSubHaplotypeFinder(final Collection<F> finders) {
|
||||
if (finders == null) throw new IllegalArgumentException("finder collection cannot be null");
|
||||
this.subFinders = finders;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String id() {
|
||||
final StringBuilder resultBuilder = new StringBuilder();
|
||||
for (final KBestSubHaplotypeFinder subFinder : subFinders)
|
||||
resultBuilder.append(subFinder.id());
|
||||
return resultBuilder.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String label() {
|
||||
return "<OR>";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||
final int subFinderCount = subFinders.size();
|
||||
final String edgeCost = String.format("%.2f",-Math.log10((double) subFinderCount));
|
||||
final Set<Pair<? extends KBestSubHaplotypeFinder,String>> result = new LinkedHashSet<>(subFinderCount);
|
||||
for (final KBestSubHaplotypeFinder subFinder : subFinders)
|
||||
result.add(new Pair<>(subFinder,edgeCost));
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCount() {
|
||||
processSubFindersIfNeeded();
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double score(final byte[] bases, final int offset, final int length) {
|
||||
if (bases == null) throw new IllegalArgumentException("bases cannot be null");
|
||||
if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
|
||||
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||
if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
|
||||
for (final KBestSubHaplotypeFinder subFinder : subFinders) {
|
||||
final double score = subFinder.score(bases,offset,length);
|
||||
if (!Double.isNaN(score)) return score;
|
||||
}
|
||||
return Double.NaN;
|
||||
}
|
||||
|
||||
private void processSubFindersIfNeeded() {
|
||||
if (processedSubFinders) return;
|
||||
|
||||
|
|
@ -144,6 +180,11 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
|||
return rankedSubHaplotype.get(k);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results.
|
||||
*/
|
||||
|
|
@ -167,7 +208,7 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int score() {
|
||||
public double score() {
|
||||
return result.score();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ import com.google.java.contract.Requires;
|
|||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.jgrapht.EdgeFactory;
|
||||
import org.jgrapht.alg.CycleDetector;
|
||||
import org.jgrapht.graph.DefaultDirectedGraph;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -146,6 +147,39 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
|||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this kmer graph to a simple sequence graph.
|
||||
*
|
||||
* Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
|
||||
* graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
|
||||
*
|
||||
* @return a newly allocated SequenceGraph
|
||||
*/
|
||||
public SeqGraph convertToSequenceGraph() {
|
||||
|
||||
final SeqGraph seqGraph = new SeqGraph(kmerSize);
|
||||
final Map<V, SeqVertex> vertexMap = new HashMap<>();
|
||||
|
||||
|
||||
// create all of the equivalent seq graph vertices
|
||||
for ( final V dv : vertexSet() ) {
|
||||
final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
|
||||
sv.setAdditionalInfo(dv.additionalInfo());
|
||||
vertexMap.put(dv, sv);
|
||||
seqGraph.addVertex(sv);
|
||||
}
|
||||
|
||||
// walk through the nodes and connect them to their equivalent seq vertices
|
||||
for( final E e : edgeSet() ) {
|
||||
final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
|
||||
final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
|
||||
//logger.info("Adding edge " + seqInV + " -> " + seqOutV);
|
||||
seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
|
||||
}
|
||||
|
||||
return seqGraph;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pull out the additional sequence implied by traversing this node in the graph
|
||||
* @param v the vertex from which to pull out the additional base sequence
|
||||
|
|
@ -712,4 +746,13 @@ public class BaseGraph<V extends BaseVertex, E extends BaseEdge> extends Default
|
|||
if (!containsVertex(vertex)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks for the presence of directed cycles in the graph.
|
||||
*
|
||||
* @return {@code true} if the graph has cycles, {@code false} otherwise.
|
||||
*/
|
||||
public boolean hasCycles() {
|
||||
return new CycleDetector<>(this).detectCycles();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -122,7 +122,7 @@ public class CommonSuffixSplitter {
|
|||
} else {
|
||||
incomingTarget = prefixV;
|
||||
graph.addVertex(prefixV);
|
||||
graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 0));
|
||||
graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1));
|
||||
edgesToRemove.add(out);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,11 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Represents a trivial k-best sub haplotype finder with no solutions.
|
||||
*
|
||||
|
|
@ -65,6 +70,21 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
|||
protected DeadEndKBestSubHaplotypeFinder() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public String id() {
|
||||
return "<DEAD>";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String label() {
|
||||
return "<DEAD>";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCount() {
|
||||
return 0;
|
||||
|
|
@ -77,4 +97,18 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
|||
else
|
||||
throw new IllegalArgumentException("k cannot be equal or greater to the haplotype count");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double score(final byte[] bases, final int offset, final int length) {
|
||||
if (bases == null) throw new IllegalArgumentException("bases cannot be null");
|
||||
if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
|
||||
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||
if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
|
||||
return Double.NaN;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,12 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Trivial k-best sub-haplotype finder where the source and sink vertex are the same one.
|
||||
*
|
||||
|
|
@ -67,6 +73,21 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
|
|||
singleHaplotypePath = new MyBestHaplotypePath(graph,vertex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String id() {
|
||||
return "v" + singleHaplotypePath.head().getId();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String label() {
|
||||
return singleHaplotypePath.head().getSequenceString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getCount() {
|
||||
return 1;
|
||||
|
|
@ -81,6 +102,24 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
|
|||
return singleHaplotypePath;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return singleHaplotypePath.isReference();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double score(final byte[] bases, final int offset, final int length) {
|
||||
if (bases == null) throw new IllegalArgumentException("bases cannot be null");
|
||||
if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
|
||||
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||
if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
|
||||
final byte[] vertexBases = singleHaplotypePath.head().getSequence();
|
||||
if (length != vertexBases.length)
|
||||
return Double.NaN;
|
||||
else
|
||||
return Utils.equalRange(bases, offset, vertexBases, 0, length)? 0 : Double.NaN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Custom extension of {@link KBestHaplotype} that implements the single solution behaviour.
|
||||
*/
|
||||
|
|
@ -120,7 +159,7 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int score() {
|
||||
public double score() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
|
|||
*
|
||||
* @return 0 or greater.
|
||||
*/
|
||||
public abstract int score();
|
||||
public abstract double score();
|
||||
|
||||
/**
|
||||
* Indicates whether this result is the reference haplotype.
|
||||
|
|
@ -122,6 +122,8 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
|
|||
public Haplotype haplotype() {
|
||||
if (haplotype != null) return haplotype;
|
||||
haplotype = new Haplotype(bases(),isReference());
|
||||
if (score() > 0)
|
||||
throw new IllegalStateException("score cannot be greater than 0: " + score());
|
||||
haplotype.setScore(score());
|
||||
return haplotype;
|
||||
}
|
||||
|
|
@ -152,7 +154,35 @@ public abstract class KBestHaplotype implements Comparable<KBestHaplotype> {
|
|||
*/
|
||||
public int compareTo(final KBestHaplotype other) {
|
||||
if (other == null) throw new IllegalArgumentException("the other object cannot be null");
|
||||
return - 1 * (score() - other.score());
|
||||
return - Double.compare(score(), other.score());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return haplotype().hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object other) {
|
||||
return other == null ? false: (other instanceof KBestHaplotype ? equals((KBestHaplotype)other) : false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return haplotype().toString() + " Score = " + score();
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether both solutions are equal.
|
||||
* <p>
|
||||
* Both solutions are considered equal when the underlying haplotypes are equal. The path on the respective
|
||||
* graph might deffer though.
|
||||
* </p>
|
||||
*
|
||||
* @return {@code true} iff both haplotypes are the same (considering the ref state).
|
||||
*/
|
||||
protected boolean equals(final KBestHaplotype other) {
|
||||
return haplotype().equals(other.haplotype(),false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -45,8 +45,13 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
import org.jgrapht.alg.CycleDetector;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -233,7 +238,7 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
|||
}
|
||||
|
||||
@Override
|
||||
public KBestHaplotype get(int index) {
|
||||
public KBestHaplotype get(final int index) {
|
||||
if (index < 0 || index >= size())
|
||||
throw new IndexOutOfBoundsException();
|
||||
return topFinder.getKBest(index);
|
||||
|
|
@ -305,28 +310,28 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
|||
/**
|
||||
* Creates a finder from a vertex.
|
||||
*
|
||||
* @param source the source vertex for the finder.
|
||||
* @param vertex the source vertex for the finder.
|
||||
*
|
||||
* @return never {@code null}, perhaps a finder that returns no haplotypes though.
|
||||
*/
|
||||
protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex source) {
|
||||
KBestSubHaplotypeFinder node = finderByVertex.get(source);
|
||||
if (node == null) {
|
||||
if (sinks.contains(source))
|
||||
node = new EmptyPathHaplotypeFinderNode(graph,source);
|
||||
protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex vertex) {
|
||||
KBestSubHaplotypeFinder finder = finderByVertex.get(vertex);
|
||||
if (finder == null) {
|
||||
if (sinks.contains(vertex))
|
||||
finder = new EmptyPathHaplotypeFinderNode(graph,vertex);
|
||||
else {
|
||||
final Set<BaseEdge> outgoingEdges = graph.outgoingEdgesOf(source);
|
||||
final Set<BaseEdge> outgoingEdges = graph.outgoingEdgesOf(vertex);
|
||||
if (outgoingEdges.isEmpty())
|
||||
node = DeadEndKBestSubHaplotypeFinder.INSTANCE;
|
||||
finder = DeadEndKBestSubHaplotypeFinder.INSTANCE;
|
||||
else {
|
||||
final Map<BaseEdge,KBestSubHaplotypeFinder> undeadChildren = createChildrenFinders(outgoingEdges);
|
||||
node = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
|
||||
new RecursiveSubHaplotypeFinder(source,undeadChildren);
|
||||
finder = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
|
||||
new RecursiveSubHaplotypeFinder(graph,vertex,undeadChildren);
|
||||
}
|
||||
}
|
||||
finderByVertex.put(source, node);
|
||||
finderByVertex.put(vertex, finder);
|
||||
}
|
||||
return node;
|
||||
return finder;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -340,7 +345,7 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
|||
* @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this
|
||||
* finder.
|
||||
*/
|
||||
private Map<BaseEdge, KBestSubHaplotypeFinder> createChildrenFinders(Set<BaseEdge> baseEdges) {
|
||||
private Map<BaseEdge, KBestSubHaplotypeFinder> createChildrenFinders(final Set<BaseEdge> baseEdges) {
|
||||
final Map<BaseEdge,KBestSubHaplotypeFinder> result = new LinkedHashMap<>(baseEdges.size());
|
||||
for (final BaseEdge edge : baseEdges) {
|
||||
final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge));
|
||||
|
|
@ -349,4 +354,156 @@ public class KBestHaplotypeFinder extends AbstractList<KBestHaplotype> implement
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a DOT representation of search graph.
|
||||
*
|
||||
* @param out character stream printer where to print the DOT representation to.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code out} is {@code null}.
|
||||
*/
|
||||
public void printDOT(final PrintWriter out) {
|
||||
if (out == null)
|
||||
throw new IllegalArgumentException("the out writer cannot be null");
|
||||
out.println("digraph {");
|
||||
out.println(" rankdir = LR");
|
||||
out.println(" node [shape=box, margin=0.01]");
|
||||
out.println(" subgraph cluster_dummy { style = invis; x [label=\"\",shape=none,margin=0] }");
|
||||
final StringBuilder referenceCluster = new StringBuilder(1000);
|
||||
|
||||
referenceCluster.append(" subgraph cluster_ref {\n");
|
||||
referenceCluster.append(" node [penwidth=2]\n");
|
||||
for (final KBestSubHaplotypeFinder finder : finderByVertex.values() ) {
|
||||
final String id = finder.id();
|
||||
final String line = String.format(" %s [label=<%s>]",id,finder.label());
|
||||
if (finder.isReference())
|
||||
referenceCluster.append(" ").append(line).append('\n');
|
||||
else
|
||||
out.println(line);
|
||||
}
|
||||
referenceCluster.append(" }");
|
||||
out.println(referenceCluster.toString());
|
||||
|
||||
for (final KBestSubHaplotypeFinder finder : finderByVertex.values())
|
||||
for (final Pair<? extends KBestSubHaplotypeFinder,String> subFinderLabel : finder.subFinderLabels()) {
|
||||
final KBestSubHaplotypeFinder subFinder = subFinderLabel.getFirst();
|
||||
|
||||
final String edgeLabel = subFinderLabel.getSecond();
|
||||
out.println(String.format(" %s -> %s [label=%s]",finder.id(),subFinder.id(),edgeLabel));
|
||||
}
|
||||
out.println("}");
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a DOT representation of search graph.
|
||||
*
|
||||
* @param file file where to print the DOT representation to.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code file} is {@code null}.
|
||||
* @throws FileNotFoundException if {@code file} cannot be created or written.
|
||||
* @throws IllegalStateException if there was some trouble when writing the DOT representation.
|
||||
*/
|
||||
public void printDOT(final File file) throws FileNotFoundException {
|
||||
if (file == null)
|
||||
throw new IllegalArgumentException("the output file cannot be null");
|
||||
final PrintWriter out = new PrintWriter(file);
|
||||
printDOT(out);
|
||||
if (out.checkError())
|
||||
throw new IllegalStateException("error occurred while writing k-best haplotype search graph into file '"
|
||||
+ file.getAbsolutePath() + "'");
|
||||
out.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Print a DOT representation of search graph.
|
||||
*
|
||||
* @param fileName name of the file where to print the DOT representation to.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code fileName} is {@code null}.
|
||||
* @throws FileNotFoundException if no file named {@code fileName} cannot be created or written.
|
||||
* @throws IllegalStateException if there was some trouble when writing the DOT representation.
|
||||
*/
|
||||
@SuppressWarnings("unused") // Available for debugging purposes.
|
||||
public void printDOTFile(final String fileName) throws FileNotFoundException {
|
||||
printDOT(new File(fileName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the score of a give sequence of bases
|
||||
*
|
||||
* @param bases the base sequence.
|
||||
*
|
||||
* @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
|
||||
* throw this finder.
|
||||
*/
|
||||
public double score(final byte[] bases) {
|
||||
return topFinder.score(bases,0,bases.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the score of a give sequence of bases
|
||||
*
|
||||
* @param haplotype the haplotype.
|
||||
*
|
||||
* @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
|
||||
* throw this finder.
|
||||
*/
|
||||
public double score(final Haplotype haplotype) {
|
||||
return score(haplotype.getBases());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a unique list of haplotypes solutions.
|
||||
* <p>
|
||||
* The result will not contain more than one haplotype with the same base sequence. The solution of the best
|
||||
* score is returned.
|
||||
* </p>
|
||||
* <p>
|
||||
* This makes sense when there are more than one possible path through the graph to create the same haplotype.
|
||||
* </p>
|
||||
* <p>
|
||||
* The resulting list is sorted by the score with more likely haplotype search results first.
|
||||
* </p>
|
||||
*
|
||||
* @param maxSize maximum number of unique results to return.
|
||||
*
|
||||
* @throws IllegalArgumentException if {@code maxSize} is negative.
|
||||
*
|
||||
* @return never {@code null}, perhaps an empty list.
|
||||
*/
|
||||
public List<KBestHaplotype> unique(final int maxSize) {
|
||||
if (maxSize < 0) throw new IllegalArgumentException("maxSize cannot be negative");
|
||||
final int requiredCapacity = Math.min(maxSize,size());
|
||||
final Set<Haplotype> haplotypes = new HashSet<>(requiredCapacity);
|
||||
int resultSize = 0;
|
||||
final List<KBestHaplotype> result = new ArrayList<>(requiredCapacity);
|
||||
for (final KBestHaplotype kbh : this) {
|
||||
if (haplotypes.add(kbh.haplotype())) {
|
||||
result.add(kbh);
|
||||
if (resultSize == maxSize) break;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a unique list of haplotypes solutions.
|
||||
*
|
||||
* <p>
|
||||
* The result will not contain more than one haplotype with the same base sequence. The solution of the best
|
||||
* score is returned.
|
||||
* </p>
|
||||
* <p>
|
||||
* This makes sense when there are more than one possible path through the graph to create the same haplotype.
|
||||
* </p>
|
||||
* <p>
|
||||
* The resulting list is sorted by the score with more likely haplotype search results first.
|
||||
* </p>
|
||||
*
|
||||
* @return never {@code null}, perhaps an empty list.
|
||||
*/
|
||||
public List<KBestHaplotype> unique() {
|
||||
return unique(size());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,6 +45,10 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Common interface for K-Best sub-haplotype finders.
|
||||
*
|
||||
|
|
@ -52,6 +56,29 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
|||
*/
|
||||
interface KBestSubHaplotypeFinder {
|
||||
|
||||
/**
|
||||
* Return an unique id for this sub-haplotype finder to be used when outputting diagrams.
|
||||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public String id();
|
||||
|
||||
/**
|
||||
* Returns a label with human readable representation of this finder.
|
||||
*
|
||||
* <p>This is used when generating a diagram to illustrate the search space and costs</p>
|
||||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public String label();
|
||||
|
||||
/**
|
||||
* Returns the set of subfinder from this finder together with a label for the connection with the current finder.
|
||||
*
|
||||
* <p>The label is used when generating a diagram to illustrate the search space and costs</p>
|
||||
*/
|
||||
public Set<Pair<? extends KBestSubHaplotypeFinder,String>> subFinderLabels();
|
||||
|
||||
/**
|
||||
* Returns the total number of possible sub-haplotypes.
|
||||
* @return 0 or greater.
|
||||
|
|
@ -67,5 +94,22 @@ interface KBestSubHaplotypeFinder {
|
|||
*
|
||||
* @return never {@code null}.
|
||||
*/
|
||||
public abstract KBestHaplotype getKBest(int k);
|
||||
public KBestHaplotype getKBest(int k);
|
||||
|
||||
/**
|
||||
* Checks whether the top vertex for this finder is a reference haplotype vertex.
|
||||
*
|
||||
* @return {@code true} iff the top vertex for this finder is a reference vertex.
|
||||
*/
|
||||
public boolean isReference();
|
||||
|
||||
/**
|
||||
* Calculate the score of a sequence of bases.
|
||||
*
|
||||
* @param bases array containing the query base sequence.
|
||||
* @param offset first position of the query base sequence in {@code bases} .
|
||||
* @param length length of the query base sequence.
|
||||
* @return {@link Double#NaN} if there is no score for this sequence, otherwise a valid score value.
|
||||
*/
|
||||
public double score(byte[] bases, int offset, int length);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,20 +49,24 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
|||
import java.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
* edge class for connecting nodes in the graph that tracks some per-sample information
|
||||
*
|
||||
* Edge class for connecting nodes in the graph that tracks some per-sample information.
|
||||
* <p>
|
||||
* This class extends BaseEdge with the additional functionality of tracking the maximum
|
||||
* multiplicity seen within any single sample. The workflow for using this class is:
|
||||
*
|
||||
* MultiSampleEdge e = new MultiSampleEdge(ref, 1)
|
||||
* e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1
|
||||
* e.getPruningMultiplicity() // = 1
|
||||
* e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
|
||||
* e.getPruningMultiplicity() // = 2
|
||||
* e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2
|
||||
* e.getPruningMultiplicity() // = 2
|
||||
* e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
|
||||
* e.getPruningMultiplicity() // = 3
|
||||
* </p>
|
||||
* <pre>
|
||||
* {@code
|
||||
* MultiSampleEdge e = new MultiSampleEdge(ref, 1)
|
||||
* e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1
|
||||
* e.getPruningMultiplicity() // = 1
|
||||
* e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
|
||||
* e.getPruningMultiplicity() // = 2
|
||||
* e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2
|
||||
* e.getPruningMultiplicity() // = 2
|
||||
* e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
|
||||
* e.getPruningMultiplicity() // = 3
|
||||
* }
|
||||
* </pre>
|
||||
*/
|
||||
public class MultiSampleEdge extends BaseEdge {
|
||||
private int currentSingleSampleMultiplicity;
|
||||
|
|
|
|||
|
|
@ -45,9 +45,10 @@
|
|||
*/
|
||||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* General recursive sub-haplotype finder.
|
||||
|
|
@ -67,7 +68,11 @@ import java.util.Map;
|
|||
*
|
||||
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
|
||||
*/
|
||||
class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
||||
class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder<RecursiveSubHaplotypeFinder.EdgeSubHaplotypeFinder> {
|
||||
|
||||
|
||||
private final SeqVertex vertex;
|
||||
private final boolean isReference;
|
||||
|
||||
/**
|
||||
* Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges
|
||||
|
|
@ -80,20 +85,83 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
|||
* @param vertex first vertex for all sub-haplotype solutions provided by this finder
|
||||
* @param children map from outgoing edge to the corresponding sub-sub-haplotype finder.
|
||||
*/
|
||||
public RecursiveSubHaplotypeFinder(final SeqVertex vertex,
|
||||
public RecursiveSubHaplotypeFinder(final SeqGraph graph, final SeqVertex vertex,
|
||||
final Map<BaseEdge, KBestSubHaplotypeFinder> children) {
|
||||
super(createChildFinderCollection(vertex, children));
|
||||
this.vertex = vertex;
|
||||
this.isReference = graph.isReferenceNode(vertex);
|
||||
}
|
||||
|
||||
private static Collection<EdgeSubHaplotypeFinder> createChildFinderCollection(final SeqVertex vertex, final Map<BaseEdge,KBestSubHaplotypeFinder> finders) {
|
||||
/**
|
||||
* Wraps the descendant vertices finders in order to take advantage of the {@link AggregatedSubHaplotypeFinder}
|
||||
* common code.
|
||||
* <p>
|
||||
* Automatically calibrates the edge score (cost) so that it takes into account the total across all edges.
|
||||
* </p>
|
||||
*
|
||||
* @param vertex the parent vertex.
|
||||
* @param finders the child vertices indexed by the connecting edge.
|
||||
* @return never {@code null} but potentially an empty collection if there is child returning some sub-haplotype
|
||||
* solution.
|
||||
*/
|
||||
private static Collection<EdgeSubHaplotypeFinder> createChildFinderCollection(final SeqVertex vertex,
|
||||
final Map<BaseEdge,KBestSubHaplotypeFinder> finders) {
|
||||
if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null");
|
||||
final Collection<EdgeSubHaplotypeFinder> result = new ArrayList<>(finders.size());
|
||||
for (final Map.Entry<BaseEdge,KBestSubHaplotypeFinder> e : finders.entrySet())
|
||||
result.add(new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue()));
|
||||
final ArrayList<EdgeSubHaplotypeFinder> result = new ArrayList<>(finders.size());
|
||||
for (final Map.Entry<BaseEdge,KBestSubHaplotypeFinder> e : finders.entrySet()) {
|
||||
final EdgeSubHaplotypeFinder subFinder = new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue());
|
||||
if (subFinder.getCount() == 0) continue;
|
||||
result.add(subFinder);
|
||||
}
|
||||
if (result.size() == 0)
|
||||
return Collections.emptySet();
|
||||
else if (result.size() == 1) // no calibration needed, by default edgeScore is 0.
|
||||
return Collections.singleton(result.get(0));
|
||||
else {
|
||||
double totalEdgeMultiplicityAcrossEdges = 0;
|
||||
for (final EdgeSubHaplotypeFinder finder : result)
|
||||
totalEdgeMultiplicityAcrossEdges += Math.max(0.5,finder.edge.getMultiplicity());
|
||||
final double log10TotalEdgeMultiplicityAcrossEdges = Math.log10(totalEdgeMultiplicityAcrossEdges);
|
||||
for (final EdgeSubHaplotypeFinder finder : result)
|
||||
finder.calibrateEdgeScore(log10TotalEdgeMultiplicityAcrossEdges);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return isReference;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String label() {
|
||||
return vertex.getSequenceString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||
final Set<Pair<? extends KBestSubHaplotypeFinder,String>> result = new LinkedHashSet<>(subFinders.size());
|
||||
for (final EdgeSubHaplotypeFinder subFinder : subFinders)
|
||||
result.add(new Pair<>(subFinder,simplifyZeros(String.format("%.4f", subFinder.edgeScore))));
|
||||
return result;
|
||||
}
|
||||
|
||||
private static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||
/**
|
||||
* Removes zeros decimal positions from edge-labels.
|
||||
*
|
||||
* @param edgeLabel the original label to reformat.
|
||||
* @return never {@code null}, the reformatted label.
|
||||
*/
|
||||
private String simplifyZeros(final String edgeLabel) {
|
||||
if (edgeLabel.equals("0.000") || edgeLabel.equals("-0.000") )
|
||||
return "0.";
|
||||
int i = edgeLabel.length() - 1;
|
||||
while (edgeLabel.charAt(i) == '0')
|
||||
i--;
|
||||
return (i == edgeLabel.length() - 1) ? edgeLabel : edgeLabel.substring(0,i);
|
||||
}
|
||||
|
||||
protected static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
|
||||
|
||||
private final KBestSubHaplotypeFinder childFinder;
|
||||
|
||||
|
|
@ -101,10 +169,32 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
|||
|
||||
private final BaseEdge edge;
|
||||
|
||||
private double edgeScore = 0;
|
||||
|
||||
private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) {
|
||||
this.childFinder = childFinder;
|
||||
this.edge = edge;
|
||||
this.vertex = vertex;
|
||||
this.edgeScore = 0;
|
||||
}
|
||||
|
||||
private void calibrateEdgeScore(final double log10TotalMultiplicityAcrossOutgoingEdges) {
|
||||
edgeScore = Math.log10(Math.max(edge.getMultiplicity(),0.5)) - log10TotalMultiplicityAcrossOutgoingEdges;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String id() {
|
||||
return childFinder.id();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String label() {
|
||||
return childFinder.label();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Pair<? extends KBestSubHaplotypeFinder, String>> subFinderLabels() {
|
||||
return childFinder.subFinderLabels();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -114,8 +204,31 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
|||
|
||||
@Override
|
||||
public KBestHaplotype getKBest(int k) {
|
||||
return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k));
|
||||
return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k),edgeScore);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isReference() {
|
||||
return childFinder.isReference();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double score(final byte[] bases, final int offset, final int length) {
|
||||
if (length == 0)
|
||||
return 0;
|
||||
final byte[] vertexSequence = vertex.getSequence();
|
||||
if (length < vertexSequence.length) // query is not long enough to have any score.
|
||||
return Double.NaN;
|
||||
else if (!Utils.equalRange(vertexSequence,0,bases,offset,vertexSequence.length))
|
||||
return Double.NaN;
|
||||
else
|
||||
return edgeScore + childFinder.score(bases,offset + vertexSequence.length,length - vertexSequence.length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String id() {
|
||||
return "v" + vertex.getId();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -129,13 +242,14 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
|||
*/
|
||||
private static class ChildKBestSubHaplotype extends KBestHaplotype {
|
||||
|
||||
private final int score;
|
||||
private final double score;
|
||||
private final KBestHaplotype child;
|
||||
private final SeqVertex vertex;
|
||||
private final boolean isReference;
|
||||
|
||||
public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child) {
|
||||
this.score = edge.getMultiplicity() + child.score();
|
||||
|
||||
public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child, final double edgeScore) {
|
||||
this.score = edgeScore + child.score();
|
||||
this.vertex = vertex;
|
||||
this.child = child;
|
||||
this.isReference = edge.isRef() && child.isReference();
|
||||
|
|
@ -147,7 +261,7 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int score() {
|
||||
public double score() {
|
||||
return score;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -291,16 +291,9 @@ public class SeqGraph extends BaseGraph<SeqVertex, BaseEdge> {
|
|||
final SeqVertex addedVertex = mergeLinearChainVertices(linearChain);
|
||||
addVertex(addedVertex);
|
||||
|
||||
final Set<BaseEdge> inEdges = incomingEdgesOf(first);
|
||||
final Set<BaseEdge> outEdges = outgoingEdgesOf(last);
|
||||
|
||||
final int nEdges = inEdges.size() + outEdges.size();
|
||||
int sharedWeightAmongEdges = nEdges == 0 ? 0 : sumEdgeWeightAlongChain(linearChain) / nEdges;
|
||||
final BaseEdge inc = new BaseEdge(false, sharedWeightAmongEdges); // template to make .add function call easy
|
||||
|
||||
// update the incoming and outgoing edges to point to the new vertex
|
||||
for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy().add(inc)); }
|
||||
for( final BaseEdge edge : inEdges ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy().add(inc)); }
|
||||
for( final BaseEdge edge : outgoingEdgesOf(last) ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy()); }
|
||||
for( final BaseEdge edge : incomingEdgesOf(first) ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy()); }
|
||||
|
||||
removeAllVertices(linearChain);
|
||||
return true;
|
||||
|
|
@ -313,29 +306,6 @@ public class SeqGraph extends BaseGraph<SeqVertex, BaseEdge> {
|
|||
return new SeqVertex( seqsCat );
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sum of the edge weights on a linear chain of at least 2 elements
|
||||
*
|
||||
* @param chain a linear chain of vertices with at least 2 vertices
|
||||
* @return the sum of the multiplicities along all edges connecting vertices within the chain
|
||||
*/
|
||||
@Requires({"chain != null", "chain.size() >= 2"})
|
||||
private int sumEdgeWeightAlongChain(final LinkedList<SeqVertex> chain) {
|
||||
int sum = 0;
|
||||
SeqVertex prev = null;
|
||||
|
||||
for ( final SeqVertex v : chain ) {
|
||||
if ( prev != null ) {
|
||||
final BaseEdge e = getEdge(prev, v);
|
||||
if ( e == null ) throw new IllegalStateException("Something wrong with the linear chain, got a null edge between " + prev + " and " + v);
|
||||
sum += e.getMultiplicity();
|
||||
}
|
||||
prev = v;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Base class for transformation operations that need to iterate over proposed vertices, where
|
||||
* each proposed vertex is a seed vertex for a potential transformation.
|
||||
|
|
|
|||
|
|
@ -247,12 +247,12 @@ public class SharedVertexSequenceSplitter {
|
|||
|
||||
if ( needPrefixNode ) {
|
||||
outer.addVertex(prefixV);
|
||||
if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 0));
|
||||
if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 1));
|
||||
}
|
||||
|
||||
if ( needSuffixNode ) {
|
||||
outer.addVertex(suffixV);
|
||||
if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 0));
|
||||
if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 1));
|
||||
}
|
||||
|
||||
if ( topForConnect != null ) {
|
||||
|
|
|
|||
|
|
@ -52,7 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer;
|
|||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.jgrapht.alg.CycleDetector;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -88,8 +87,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
/**
|
||||
*
|
||||
*/
|
||||
|
||||
final boolean debugGraphTransformations;
|
||||
private final boolean debugGraphTransformations;
|
||||
final byte minBaseQualityToUseInAssembly;
|
||||
|
||||
protected boolean increaseCountsBackwards = true;
|
||||
|
|
@ -319,13 +317,6 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
removeAllVertices(verticesToRemove);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return true if the graph has cycles, false otherwise
|
||||
*/
|
||||
public boolean hasCycles() {
|
||||
return new CycleDetector<>(this).detectCycles();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does the graph not have enough complexity? We define low complexity as a situation where the number
|
||||
* of non-unique kmers is more than 20% of the total number of kmers.
|
||||
|
|
@ -419,39 +410,10 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
return counter.getKmersWithCountsAtLeast(2);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert this kmer graph to a simple sequence graph.
|
||||
*
|
||||
* Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
|
||||
* graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
|
||||
*
|
||||
* @return a newly allocated SequenceGraph
|
||||
*/
|
||||
// TODO -- should override base class method
|
||||
@Override
|
||||
public SeqGraph convertToSequenceGraph() {
|
||||
buildGraphIfNecessary();
|
||||
|
||||
final SeqGraph seqGraph = new SeqGraph(kmerSize);
|
||||
final Map<MultiDeBruijnVertex, SeqVertex> vertexMap = new HashMap<>();
|
||||
|
||||
|
||||
// create all of the equivalent seq graph vertices
|
||||
for ( final MultiDeBruijnVertex dv : vertexSet() ) {
|
||||
final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
|
||||
sv.setAdditionalInfo(dv.additionalInfo());
|
||||
vertexMap.put(dv, sv);
|
||||
seqGraph.addVertex(sv);
|
||||
}
|
||||
|
||||
// walk through the nodes and connect them to their equivalent seq vertices
|
||||
for( final MultiSampleEdge e : edgeSet() ) {
|
||||
final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
|
||||
final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
|
||||
//logger.info("Adding edge " + seqInV + " -> " + seqOutV);
|
||||
seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
|
||||
}
|
||||
|
||||
return seqGraph;
|
||||
return super.convertToSequenceGraph();
|
||||
}
|
||||
|
||||
private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers,
|
||||
|
|
@ -749,15 +711,15 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
|
|||
}
|
||||
|
||||
private static String pathElementId(final String element) {
|
||||
final int parentesysPos = element.indexOf('(');
|
||||
final int openBracketPosition = element.indexOf('(');
|
||||
|
||||
if (parentesysPos == -1)
|
||||
if (openBracketPosition == -1)
|
||||
return null;
|
||||
|
||||
final int closeParentesysPos = element.lastIndexOf(')');
|
||||
if (closeParentesysPos == -1)
|
||||
final int closeBracketPosition = element.lastIndexOf(')');
|
||||
if (closeBracketPosition == -1)
|
||||
throw new IllegalArgumentException("non-closed id parantesys found in element: " + element);
|
||||
final String result = element.substring(parentesysPos + 1,closeParentesysPos).trim();
|
||||
final String result = element.substring(openBracketPosition + 1,closeBracketPosition).trim();
|
||||
if (result.isEmpty())
|
||||
throw new IllegalArgumentException("empty id found in element: " + element);
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -247,7 +247,7 @@ public class VariantDataManager {
|
|||
logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." );
|
||||
} else if( trainingData.size() > VRAC.MAX_NUM_TRAINING_DATA ) {
|
||||
logger.warn( "WARNING: Very large training set detected. Downsampling to " + VRAC.MAX_NUM_TRAINING_DATA + " training variants." );
|
||||
Collections.shuffle(trainingData);
|
||||
Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
return trainingData.subList(0, VRAC.MAX_NUM_TRAINING_DATA);
|
||||
}
|
||||
return trainingData;
|
||||
|
|
@ -295,13 +295,13 @@ public class VariantDataManager {
|
|||
|
||||
public List<VariantDatum> getRandomDataForPlotting( final int numToAdd, final List<VariantDatum> trainingData, final List<VariantDatum> antiTrainingData, final List<VariantDatum> evaluationData ) {
|
||||
final List<VariantDatum> returnData = new ExpandingArrayList<>();
|
||||
Collections.shuffle(trainingData);
|
||||
Collections.shuffle(antiTrainingData);
|
||||
Collections.shuffle(evaluationData);
|
||||
Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(antiTrainingData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(evaluationData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
returnData.addAll(trainingData.subList(0, Math.min(numToAdd, trainingData.size())));
|
||||
returnData.addAll(antiTrainingData.subList(0, Math.min(numToAdd, antiTrainingData.size())));
|
||||
returnData.addAll(evaluationData.subList(0, Math.min(numToAdd, evaluationData.size())));
|
||||
Collections.shuffle(returnData);
|
||||
Collections.shuffle(returnData, GenomeAnalysisEngine.getRandomGenerator());
|
||||
return returnData;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
|
|
@ -59,6 +60,8 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.PairHMMLikelihoodCalculationEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
|
||||
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
|
@ -148,6 +151,7 @@ import java.util.*;
|
|||
* </pre>
|
||||
*
|
||||
*/
|
||||
@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
|
||||
public class CalculateGenotypePosteriors extends RodWalker<Integer,Integer> {
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -134,6 +134,9 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
@Output(doc="File to which the combined gVCF should be written")
|
||||
protected VariantContextWriter vcfWriter = null;
|
||||
|
||||
@Argument(fullName="convertToBasePairResolution", shortName="bpResolution", doc = "If specified, convert banded gVCFs to all-sites gVCFs", required=false)
|
||||
protected boolean USE_BP_RESOLUTION = false;
|
||||
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
public void initialize() {
|
||||
|
|
@ -176,7 +179,7 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
previousState.VCs.addAll(startingStates.VCs);
|
||||
}
|
||||
|
||||
if ( containsEndingContext(previousState.VCs, currentPos) ) {
|
||||
if ( USE_BP_RESOLUTION || containsEndingContext(previousState.VCs, currentPos) ) {
|
||||
endPreviousStates(previousState, currentPos, startingStates.refBases.length > 1 ? startingStates.refBases[1] : (byte)'N');
|
||||
}
|
||||
|
||||
|
|
@ -289,7 +292,8 @@ public class CombineGVCFs extends RodWalker<CombineGVCFs.PositionalState, Combin
|
|||
|
||||
// attributes
|
||||
final Map<String, Object> attrs = new HashMap<>(1);
|
||||
attrs.put(VCFConstants.END_KEY, Integer.toString(end));
|
||||
if ( !USE_BP_RESOLUTION )
|
||||
attrs.put(VCFConstants.END_KEY, Integer.toString(end));
|
||||
|
||||
// genotypes
|
||||
final GenotypesContext genotypes = GenotypesContext.create();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,517 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pairhmm;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.io.BufferedWriter;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin, carneiro
|
||||
* Date: 10/16/12
|
||||
*/
|
||||
public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||
|
||||
private static final boolean dumpSandboxOnly = false; //simulates ifdef
|
||||
private static final boolean debug = false; //simulates ifdef
|
||||
private static final boolean verify = !dumpSandboxOnly && (debug || true); //simulates ifdef
|
||||
private static final boolean debug0_1 = false; //simulates ifdef
|
||||
private static final boolean debug1 = false; //simulates ifdef
|
||||
private static final boolean debug2 = false;
|
||||
private static final boolean debug3 = false;
|
||||
|
||||
//Debugging stats
|
||||
private int numCalls = 0;
|
||||
private int numComputeLikelihoodCalls = 0;
|
||||
protected HashMap<String, BufferedWriter> filenameToWriter = new HashMap<String, BufferedWriter>();
|
||||
|
||||
private JNILoglessPairHMM jniPairHMM = null;
|
||||
public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType) {
|
||||
super();
|
||||
switch(hmmType) {
|
||||
case VECTOR_LOGLESS_CACHING:
|
||||
jniPairHMM = new VectorLoglessPairHMM();
|
||||
break;
|
||||
default:
|
||||
throw new UserException.BadArgumentValue("pairHMM","Specified JNIPairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are VECTOR_LOGLESS_CACHING");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
jniPairHMM.close();
|
||||
debugClose();
|
||||
}
|
||||
|
||||
//Used only when testing parts of the compute kernel
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
if(verify)
|
||||
super.initialize(readMaxLength, haplotypeMaxLength);
|
||||
if(debug3)
|
||||
{
|
||||
System.out.println("Java: alloc initialized readMaxLength : "+readMaxLength+" haplotypeMaxLength : "+haplotypeMaxLength);
|
||||
debugDump("lengths_java.txt", String.format("%d %d\n",readMaxLength, haplotypeMaxLength),
|
||||
true);
|
||||
}
|
||||
if(debug2)
|
||||
jniInitialize(readMaxLength, haplotypeMaxLength);
|
||||
}
|
||||
|
||||
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = null;
|
||||
//Used to transfer data to JNI
|
||||
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
|
||||
final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
if(verify)
|
||||
{
|
||||
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||
jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||
haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void finalizeRegion()
|
||||
{
|
||||
if(!dumpSandboxOnly)
|
||||
jniPairHMM.finalizeRegion();
|
||||
}
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods( final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) {
|
||||
// (re)initialize the pairHMM only if necessary
|
||||
final int readMaxLength = verify ? findMaxReadLength(reads) : 0;
|
||||
final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0;
|
||||
if(verify)
|
||||
{
|
||||
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength)
|
||||
{ initialize(readMaxLength, haplotypeMaxLength); }
|
||||
if ( ! initialized )
|
||||
throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode");
|
||||
}
|
||||
int readListSize = reads.size();
|
||||
int numHaplotypes = alleleHaplotypeMap.size();
|
||||
int numTestcases = readListSize*numHaplotypes;
|
||||
if(debug0_1)
|
||||
System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
||||
int idx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
byte [] overallGCP = GCPArrayMap.get(read);
|
||||
if(debug0_1)
|
||||
System.out.println("Java read length "+read.getReadBases().length);
|
||||
if(debug3)
|
||||
{
|
||||
for(int i=0;i<read.getReadBases().length;++i)
|
||||
{
|
||||
debugDump("reads_java.txt",String.format("%d\n",(int)read.getReadBases()[i]),true);
|
||||
debugDump("reads_java.txt",String.format("%d\n",(int)read.getBaseQualities()[i]),true);
|
||||
debugDump("reads_java.txt",String.format("%d\n",(int)read.getBaseInsertionQualities()[i]),true);
|
||||
debugDump("reads_java.txt",String.format("%d\n",(int)read.getBaseDeletionQualities()[i]),true);
|
||||
debugDump("reads_java.txt",String.format("%d\n",(int)overallGCP[i]),true);
|
||||
}
|
||||
}
|
||||
++idx;
|
||||
}
|
||||
|
||||
if(verify)
|
||||
{
|
||||
idx = 0;
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
||||
{
|
||||
byte[] haplotypeBases = currEntry.getValue().getBases();
|
||||
if(debug0_1)
|
||||
System.out.println("Java haplotype length "+haplotypeBases.length);
|
||||
if(debug3)
|
||||
{
|
||||
for(int i=0;i<haplotypeBases.length;++i)
|
||||
debugDump("haplotype_bases_java.txt",String.format("%d\n",(int)haplotypeBases[i]),true);
|
||||
}
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
double[] likelihoodArray = null;
|
||||
PerReadAlleleLikelihoodMap likelihoodMap = null;
|
||||
if(verify)
|
||||
{
|
||||
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
likelihoodArray = jniPairHMM.getLikelihoodArray();
|
||||
//to compare values
|
||||
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
}
|
||||
else
|
||||
{
|
||||
likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
likelihoodArray = new double[numTestcases];
|
||||
for(int i=0;i<numTestcases;++i)
|
||||
likelihoodArray[i] = -0.5;
|
||||
}
|
||||
if(verify || dumpSandboxOnly)
|
||||
{
|
||||
boolean toDump = dumpSandboxOnly ? true : false;
|
||||
if(verify)
|
||||
{
|
||||
//re-order values in likelihoodArray
|
||||
double[] tmpArray = new double[numHaplotypes];
|
||||
idx = 0;
|
||||
int idxInsideHaplotypeList = 0;
|
||||
int readIdx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
for(int j=0;j<numHaplotypes;++j)
|
||||
tmpArray[j] = likelihoodArray[readIdx+j];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||
{
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
|
||||
++idx;
|
||||
}
|
||||
readIdx += numHaplotypes;
|
||||
}
|
||||
//for floating point values, no exact equality
|
||||
//check whether numbers are close in terms of abs_error or relative_error
|
||||
//For very large values, relative_error is relevant
|
||||
//For very small values, abs_error is relevant
|
||||
for(int i=0;i<likelihoodArray.length;++i)
|
||||
{
|
||||
double abs_error = Math.abs(likelihoodArray[i] - mLikelihoodArray[i]);
|
||||
double relative_error = 0;
|
||||
if(mLikelihoodArray[i] == 0)
|
||||
relative_error = 0;
|
||||
else
|
||||
relative_error = Math.abs(abs_error/mLikelihoodArray[i]);
|
||||
if(abs_error > 1e-5 && relative_error > 1e-5)
|
||||
{
|
||||
toDump = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
//if numbers are not close, then dump out the data that produced the inconsistency
|
||||
if(toDump)
|
||||
{
|
||||
idx = 0;
|
||||
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
||||
boolean firstLine = true;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
byte [] overallGCP = GCPArrayMap.get(read);
|
||||
byte[] tmpByteArray = new byte[read.getReadBases().length];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
||||
{
|
||||
byte[] haplotypeBases = currEntry.getValue().getBases();
|
||||
debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
|
||||
debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
tmpByteArray[k] = (byte)((int)((read.getBaseQualities())[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
tmpByteArray[k] = (byte)((int)((read.getBaseInsertionQualities())[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
tmpByteArray[k] = (byte)((int)((read.getBaseDeletionQualities())[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
tmpByteArray[k] = (byte)((int)(overallGCP[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray),true);
|
||||
if(firstLine)
|
||||
{
|
||||
debugDump("debug_dump.txt",String.format(" %d %d\n",readListSize, numHaplotypes), true);
|
||||
firstLine = false;
|
||||
}
|
||||
else
|
||||
debugDump("debug_dump.txt","\n",true);
|
||||
if(verify)
|
||||
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
|
||||
else
|
||||
if(dumpSandboxOnly)
|
||||
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]);
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
debugClose();
|
||||
}
|
||||
++numComputeLikelihoodCalls;
|
||||
//if(numComputeLikelihoodCalls == 5)
|
||||
//jniPairHMM.close();
|
||||
//System.exit(0);
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
//Used to test parts of the compute kernel separately
|
||||
private native void jniInitialize( final int readMaxLength, final int haplotypeMaxLength);
|
||||
private native static void jniInitializeProbabilities( final double[][] transition, final byte[] insertionGOP,
|
||||
final byte[] deletionGOP, final byte[] overallGCP);
|
||||
private native double jniInitializePriorsAndUpdateCells( boolean doInitialization, final int paddedReadLength,
|
||||
final int paddedHaplotypeLength, final byte[] readBases, final byte[] haplotypeBases, final byte[] readQuals,
|
||||
final int hapStartIndex);
|
||||
private native double jniSubComputeReadLikelihoodGivenHaplotypeLog10( final int readLength, final int haplotypeLength,
|
||||
final byte[] readBases, final byte[] haplotypeBases, final byte[] readQuals, final byte[] insertionGOP,
|
||||
final byte[] deletionGOP, final byte[] overallGCP, final int hapStartIndex);
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public double subComputeReadLikelihoodGivenHaplotypeLog10( final byte[] haplotypeBases, final byte[] readBases,
|
||||
final byte[] readQuals, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP,
|
||||
final int hapStartIndex, final boolean recacheReadValues, final int nextHapStartIndex) {
|
||||
//System.out.println("#### START STACK TRACE ####");
|
||||
//for (StackTraceElement ste : Thread.currentThread().getStackTrace()) {
|
||||
//System.out.println(ste);
|
||||
//}
|
||||
//System.out.println("#### END STACK TRACE ####");
|
||||
//
|
||||
if(debug1)
|
||||
jniSubComputeReadLikelihoodGivenHaplotypeLog10(readBases.length, haplotypeBases.length,
|
||||
readBases, haplotypeBases, readQuals,
|
||||
insertionGOP, deletionGOP, overallGCP,
|
||||
hapStartIndex);
|
||||
|
||||
boolean doInitialization = (previousHaplotypeBases == null || previousHaplotypeBases.length != haplotypeBases.length);
|
||||
if (doInitialization) {
|
||||
final double initialValue = INITIAL_CONDITION / haplotypeBases.length;
|
||||
// set the initial value (free deletions in the beginning) for the first row in the deletion matrix
|
||||
for( int j = 0; j < paddedHaplotypeLength; j++ ) {
|
||||
deletionMatrix[0][j] = initialValue;
|
||||
}
|
||||
}
|
||||
|
||||
if ( ! constantsAreInitialized || recacheReadValues ) {
|
||||
initializeProbabilities(transition, insertionGOP, deletionGOP, overallGCP);
|
||||
if(debug3)
|
||||
{
|
||||
System.out.println("Java: initializeProbabilities lengths : "+insertionGOP.length+" padded "+paddedReadLength+" "+paddedHaplotypeLength);
|
||||
for(int i=0;i<insertionGOP.length;++i)
|
||||
for(int j=0;j<6;++j)
|
||||
debugDump("transitions_java.txt",String.format("%e\n",transition[i+1][j]),true);
|
||||
}
|
||||
if(debug2)
|
||||
jniInitializeProbabilities(transition, insertionGOP, deletionGOP, overallGCP);
|
||||
|
||||
// note that we initialized the constants
|
||||
constantsAreInitialized = true;
|
||||
}
|
||||
|
||||
if(debug3)
|
||||
System.out.println("Java: initializePriors : lengths "+readBases.length+" "+haplotypeBases.length+" padded "+paddedReadLength+" "+paddedHaplotypeLength + " doNotUseTristateCorrection "+doNotUseTristateCorrection);
|
||||
initializePriors(haplotypeBases, readBases, readQuals, hapStartIndex);
|
||||
|
||||
for (int i = 1; i < paddedReadLength; i++) {
|
||||
// +1 here is because hapStartIndex is 0-based, but our matrices are 1 based
|
||||
for (int j = hapStartIndex+1; j < paddedHaplotypeLength; j++) {
|
||||
updateCell(i, j, prior[i][j], transition[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// final probability is the log10 sum of the last element in the Match and Insertion state arrays
|
||||
// this way we ignore all paths that ended in deletions! (huge)
|
||||
// but we have to sum all the paths ending in the M and I matrices, because they're no longer extended.
|
||||
final int endI = paddedReadLength - 1;
|
||||
double finalSumProbabilities = 0.0;
|
||||
for (int j = 1; j < paddedHaplotypeLength; j++) {
|
||||
finalSumProbabilities += matchMatrix[endI][j] + insertionMatrix[endI][j];
|
||||
}
|
||||
if(debug2)
|
||||
jniInitializePriorsAndUpdateCells(doInitialization, paddedReadLength, paddedHaplotypeLength,
|
||||
readBases, haplotypeBases, readQuals,
|
||||
hapStartIndex);
|
||||
if(debug)
|
||||
debugDump("return_values_java.txt",String.format("%e\n",Math.log10(finalSumProbabilities) - INITIAL_CONDITION_LOG10),true);
|
||||
++numCalls;
|
||||
//if(numCalls == 100)
|
||||
//{
|
||||
//debugClose();
|
||||
//System.exit(0);
|
||||
//}
|
||||
return Math.log10(finalSumProbabilities) - INITIAL_CONDITION_LOG10;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the matrix that holds all the constants related to the editing
|
||||
* distance between the read and the haplotype.
|
||||
*
|
||||
* @param haplotypeBases the bases of the haplotype
|
||||
* @param readBases the bases of the read
|
||||
* @param readQuals the base quality scores of the read
|
||||
* @param startIndex where to start updating the distanceMatrix (in case this read is similar to the previous read)
|
||||
*/
|
||||
protected void initializePriors(final byte[] haplotypeBases, final byte[] readBases, final byte[] readQuals, final int startIndex) {
|
||||
|
||||
// initialize the pBaseReadLog10 matrix for all combinations of read x haplotype bases
|
||||
// Abusing the fact that java initializes arrays with 0.0, so no need to fill in rows and columns below 2.
|
||||
|
||||
if(debug3)
|
||||
System.out.println("hapStartIndex "+startIndex);
|
||||
|
||||
for (int i = 0; i < readBases.length; i++) {
|
||||
final byte x = readBases[i];
|
||||
final byte qual = readQuals[i];
|
||||
for (int j = startIndex; j < haplotypeBases.length; j++) {
|
||||
final byte y = haplotypeBases[j];
|
||||
prior[i+1][j+1] = ( x == y || x == (byte) 'N' || y == (byte) 'N' ?
|
||||
QualityUtils.qualToProb(qual) : (QualityUtils.qualToErrorProb(qual) / (doNotUseTristateCorrection ? 1.0 : TRISTATE_CORRECTION)) );
|
||||
if(debug3)
|
||||
debugDump("priors_java.txt",String.format("%e\n",prior[i+1][j+1]),true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the matrix that holds all the constants related to quality scores.
|
||||
*
|
||||
* @param insertionGOP insertion quality scores of the read
|
||||
* @param deletionGOP deletion quality scores of the read
|
||||
* @param overallGCP overall gap continuation penalty
|
||||
*/
|
||||
@Requires({
|
||||
"insertionGOP != null",
|
||||
"deletionGOP != null",
|
||||
"overallGCP != null"
|
||||
})
|
||||
@Ensures("constantsAreInitialized")
|
||||
protected static void initializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP) {
|
||||
PairHMMModel.qualToTransProbs(transition,insertionGOP,deletionGOP,overallGCP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a cell in the HMM matrix
|
||||
*
|
||||
* The read and haplotype indices are offset by one because the state arrays have an extra column to hold the
|
||||
* initial conditions
|
||||
|
||||
* @param indI row index in the matrices to update
|
||||
* @param indJ column index in the matrices to update
|
||||
* @param prior the likelihood editing distance matrix for the read x haplotype
|
||||
* @param transition an array with the six transition relevant to this location
|
||||
*/
|
||||
protected void updateCell( final int indI, final int indJ, final double prior, final double[] transition) {
|
||||
|
||||
matchMatrix[indI][indJ] = prior * ( matchMatrix[indI - 1][indJ - 1] * transition[matchToMatch] +
|
||||
insertionMatrix[indI - 1][indJ - 1] * transition[indelToMatch] +
|
||||
deletionMatrix[indI - 1][indJ - 1] * transition[indelToMatch] );
|
||||
insertionMatrix[indI][indJ] = matchMatrix[indI - 1][indJ] * transition[matchToInsertion] + insertionMatrix[indI - 1][indJ] * transition[insertionToInsertion];
|
||||
deletionMatrix[indI][indJ] = matchMatrix[indI][indJ - 1] * transition[matchToDeletion] + deletionMatrix[indI][indJ - 1] * transition[deletionToDeletion];
|
||||
if(debug3)
|
||||
{
|
||||
debugDump("matrices_java.txt",String.format("%e\n",matchMatrix[indI][indJ]),true);
|
||||
debugDump("matrices_java.txt",String.format("%e\n",insertionMatrix[indI][indJ]),true);
|
||||
debugDump("matrices_java.txt",String.format("%e\n",deletionMatrix[indI][indJ]),true);
|
||||
}
|
||||
}
|
||||
|
||||
protected void debugDump( String filename, String s, boolean toAppend ) {
|
||||
try {
|
||||
File file = new File(filename);
|
||||
if (!file.exists())
|
||||
file.createNewFile();
|
||||
BufferedWriter currWriter = filenameToWriter.get(filename);
|
||||
if(currWriter == null)
|
||||
{
|
||||
FileWriter fw = new FileWriter(file, toAppend);
|
||||
currWriter = new BufferedWriter(fw);
|
||||
filenameToWriter.put(filename, currWriter);
|
||||
}
|
||||
currWriter.write(s);
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
protected void debugClose() {
|
||||
for(Map.Entry<String, BufferedWriter> currEntry : filenameToWriter.entrySet()) {
|
||||
BufferedWriter currWriter = currEntry.getValue();
|
||||
try
|
||||
{
|
||||
currWriter.flush();
|
||||
currWriter.close();
|
||||
}
|
||||
catch(IOException e)
|
||||
{
|
||||
e.printStackTrace();
|
||||
|
||||
}
|
||||
}
|
||||
filenameToWriter.clear();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pairhmm;
|
||||
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin, carneiro
|
||||
* Date: 10/16/12
|
||||
*/
|
||||
public abstract class JNILoglessPairHMM extends LoglessPairHMM {
|
||||
public abstract HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap();
|
||||
protected long setupTime = 0;
|
||||
|
||||
}
|
||||
|
|
@ -66,7 +66,7 @@ public class LoglessPairHMM extends N2MemoryPairHMM {
|
|||
protected static final double TRISTATE_CORRECTION = 3.0;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,340 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.pairhmm;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
||||
import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
|
||||
import org.broadinstitute.sting.utils.haplotype.Haplotype;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.variant.variantcontext.Allele;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
//For loading library from jar
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**
|
||||
* Created with IntelliJ IDEA.
|
||||
* User: rpoplin, carneiro
|
||||
* Date: 10/16/12
|
||||
*/
|
||||
public class VectorLoglessPairHMM extends JNILoglessPairHMM {
|
||||
|
||||
//For machine capabilities
|
||||
public static final long sse41Mask = 1;
|
||||
public static final long sse42Mask = 2;
|
||||
public static final long avxMask = 4;
|
||||
public static final long enableAll = 0xFFFFFFFFFFFFFFFFl;
|
||||
|
||||
//Used to copy references to byteArrays to JNI from reads
|
||||
protected class JNIReadDataHolderClass {
|
||||
public byte[] readBases = null;
|
||||
public byte[] readQuals = null;
|
||||
public byte[] insertionGOP = null;
|
||||
public byte[] deletionGOP = null;
|
||||
public byte[] overallGCP = null;
|
||||
}
|
||||
|
||||
//Used to copy references to byteArrays to JNI from haplotypes
|
||||
protected class JNIHaplotypeDataHolderClass {
|
||||
public byte[] haplotypeBases = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return 64-bit mask representing machine capabilities
|
||||
* Bit 0 is LSB, bit 63 MSB
|
||||
* Bit 0 represents sse4.1 availability
|
||||
* Bit 1 represents sse4.2 availability
|
||||
* Bit 2 represents AVX availability
|
||||
*/
|
||||
public native long jniGetMachineType();
|
||||
|
||||
/**
|
||||
* Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
|
||||
* C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
|
||||
* change per JVM session
|
||||
* @param readDataHolderClass class type of JNIReadDataHolderClass
|
||||
* @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
|
||||
* @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
|
||||
* */
|
||||
private native void jniInitializeClassFieldsAndMachineMask(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);
|
||||
|
||||
private static Boolean isVectorLoglessPairHMMLibraryLoaded = false;
|
||||
//The constructor is called only once inside PairHMMLikelihoodCalculationEngine
|
||||
public VectorLoglessPairHMM() {
|
||||
super();
|
||||
|
||||
logger.warn("WARNING: the VectorLoglessPairHMM is an experimental implementation still under active development. " +
|
||||
"Use at your own risk!");
|
||||
|
||||
synchronized(isVectorLoglessPairHMMLibraryLoaded) {
|
||||
//Load the library and initialize the FieldIDs
|
||||
if(!isVectorLoglessPairHMMLibraryLoaded) {
|
||||
try
|
||||
{
|
||||
//Try loading from Java's library path first
|
||||
//Useful if someone builds his/her own library and wants to override the bundled
|
||||
//implementation without modifying the Java code
|
||||
System.loadLibrary("VectorLoglessPairHMM");
|
||||
logger.info("libVectorLoglessPairHMM found in JVM library path");
|
||||
}
|
||||
catch(UnsatisfiedLinkError ule)
|
||||
{
|
||||
//Could not load from Java's library path - try unpacking from jar
|
||||
try
|
||||
{
|
||||
logger.debug("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from StingUtils.jar");
|
||||
loadLibraryFromJar("/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so");
|
||||
logger.debug("libVectorLoglessPairHMM unpacked successfully from StingUtils.jar");
|
||||
}
|
||||
catch(IOException ioe)
|
||||
{
|
||||
//Throw the UnsatisfiedLinkError to make it clear to the user what failed
|
||||
throw ule;
|
||||
}
|
||||
}
|
||||
|
||||
isVectorLoglessPairHMMLibraryLoaded = true;
|
||||
jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
|
||||
//Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
|
||||
//Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
|
||||
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<Haplotype,Integer>();
|
||||
private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray;
|
||||
@Override
|
||||
public HashMap<Haplotype, Integer> getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; }
|
||||
|
||||
//Used to transfer data to JNI
|
||||
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
|
||||
final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
int numHaplotypes = haplotypes.size();
|
||||
mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
|
||||
int idx = 0;
|
||||
haplotypeToHaplotypeListIdxMap.clear();
|
||||
for(final Haplotype currHaplotype : haplotypes)
|
||||
{
|
||||
mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
|
||||
mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
|
||||
haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx);
|
||||
++idx;
|
||||
}
|
||||
jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray);
|
||||
}
|
||||
/**
|
||||
* Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
|
||||
* accessing Java memory directly, still important to release memory from C++
|
||||
*/
|
||||
private native void jniFinalizeRegion();
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public void finalizeRegion()
|
||||
{
|
||||
jniFinalizeRegion();
|
||||
}
|
||||
|
||||
/**
|
||||
* Real compute kernel
|
||||
*/
|
||||
private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
|
||||
JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods( final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap ) {
|
||||
if(doProfiling)
|
||||
startTime = System.nanoTime();
|
||||
int readListSize = reads.size();
|
||||
int numHaplotypes = alleleHaplotypeMap.size();
|
||||
int numTestcases = readListSize*numHaplotypes;
|
||||
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
|
||||
int idx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
readDataArray[idx] = new JNIReadDataHolderClass();
|
||||
readDataArray[idx].readBases = read.getReadBases();
|
||||
readDataArray[idx].readQuals = read.getBaseQualities();
|
||||
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
|
||||
readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
|
||||
readDataArray[idx].overallGCP = GCPArrayMap.get(read);
|
||||
++idx;
|
||||
}
|
||||
|
||||
mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results
|
||||
if(doProfiling)
|
||||
setupTime += (System.nanoTime() - startTime);
|
||||
//for(reads)
|
||||
// for(haplotypes)
|
||||
// compute_full_prob()
|
||||
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
|
||||
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
idx = 0;
|
||||
int idxInsideHaplotypeList = 0;
|
||||
int readIdx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||
{
|
||||
//Since the order of haplotypes in the List<Haplotype> and alleleHaplotypeMap is different,
|
||||
//get idx of current haplotype in the list and use this idx to get the right likelihoodValue
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||
likelihoodMap.add(read, currEntry.getKey(), mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
|
||||
++idx;
|
||||
}
|
||||
readIdx += numHaplotypes;
|
||||
}
|
||||
if(doProfiling)
|
||||
computeTime += (System.nanoTime() - startTime);
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
/**
|
||||
* Print final profiling information from native code
|
||||
*/
|
||||
public native void jniClose();
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9));
|
||||
super.close();
|
||||
jniClose();
|
||||
}
|
||||
|
||||
//Copied from http://frommyplayground.com/how-to-load-native-jni-library-from-jar
|
||||
/**
|
||||
* Loads library from current JAR archive
|
||||
*
|
||||
* The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting.
|
||||
* Method uses String as filename because the pathname is "abstract", not system-dependent.
|
||||
*
|
||||
* @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
|
||||
* @throws IOException If temporary file creation or read/write operation fails
|
||||
* @throws IllegalArgumentException If source file (param path) does not exist
|
||||
* @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}).
|
||||
*/
|
||||
public static void loadLibraryFromJar(String path) throws IOException {
|
||||
|
||||
if (!path.startsWith("/")) {
|
||||
throw new IllegalArgumentException("The path to be absolute (start with '/').");
|
||||
}
|
||||
|
||||
// Obtain filename from path
|
||||
String[] parts = path.split("/");
|
||||
String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
|
||||
|
||||
// Split filename to prexif and suffix (extension)
|
||||
String prefix = "";
|
||||
String suffix = null;
|
||||
if (filename != null) {
|
||||
parts = filename.split("\\.", 2);
|
||||
prefix = parts[0];
|
||||
suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-)
|
||||
}
|
||||
|
||||
// Check if the filename is okay
|
||||
if (filename == null || prefix.length() < 3) {
|
||||
throw new IllegalArgumentException("The filename has to be at least 3 characters long.");
|
||||
}
|
||||
|
||||
// Prepare temporary file
|
||||
File temp = File.createTempFile(prefix, suffix);
|
||||
//System.out.println("Temp lib file "+temp.getAbsolutePath());
|
||||
temp.deleteOnExit();
|
||||
|
||||
if (!temp.exists()) {
|
||||
throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist.");
|
||||
}
|
||||
|
||||
// Prepare buffer for data copying
|
||||
byte[] buffer = new byte[1024];
|
||||
int readBytes;
|
||||
|
||||
// Open and check input stream
|
||||
InputStream is = VectorLoglessPairHMM.class.getResourceAsStream(path);
|
||||
if (is == null) {
|
||||
throw new FileNotFoundException("File " + path + " was not found inside JAR.");
|
||||
}
|
||||
|
||||
// Open output stream and copy data between source file in JAR and the temporary file
|
||||
OutputStream os = new FileOutputStream(temp);
|
||||
try {
|
||||
while ((readBytes = is.read(buffer)) != -1) {
|
||||
os.write(buffer, 0, readBytes);
|
||||
}
|
||||
} finally {
|
||||
// If read/write fails, close streams safely before throwing an exception
|
||||
os.close();
|
||||
is.close();
|
||||
}
|
||||
|
||||
// Finally, load the library
|
||||
System.load(temp.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
|
|
@ -46,6 +46,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.MannWhitneyU;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
|
|
@ -75,9 +76,9 @@ public class RankSumUnitTest {
|
|||
makeDistribution(distribution20_40, 40, skew, observations/2);
|
||||
|
||||
// shuffle the observations
|
||||
Collections.shuffle(distribution20);
|
||||
Collections.shuffle(distribution30);
|
||||
Collections.shuffle(distribution20_40);
|
||||
Collections.shuffle(distribution20, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(distribution30, GenomeAnalysisEngine.getRandomGenerator());
|
||||
Collections.shuffle(distribution20_40, GenomeAnalysisEngine.getRandomGenerator());
|
||||
}
|
||||
|
||||
private static void makeDistribution(final List<Integer> result, final int target, final int skew, final int numObservations) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* By downloading the PROGRAM you agree to the following terms of use:
|
||||
*
|
||||
* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
|
||||
*
|
||||
* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
|
||||
*
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
|
||||
* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
|
||||
* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
|
||||
*
|
||||
* 1. DEFINITIONS
|
||||
* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
|
||||
*
|
||||
* 2. LICENSE
|
||||
* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
|
||||
* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
|
||||
* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
|
||||
* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
|
||||
*
|
||||
* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
|
||||
* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
|
||||
* Copyright 2012 Broad Institute, Inc.
|
||||
* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
|
||||
* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
|
||||
*
|
||||
* 4. INDEMNIFICATION
|
||||
* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
|
||||
*
|
||||
* 5. NO REPRESENTATIONS OR WARRANTIES
|
||||
* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
|
||||
* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
|
||||
*
|
||||
* 6. ASSIGNMENT
|
||||
* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
|
||||
*
|
||||
* 7. MISCELLANEOUS
|
||||
* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
|
||||
* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
|
||||
* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
|
||||
* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
|
||||
* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
|
||||
* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
|
||||
* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.lang.Integer;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by haasb on 3/5/14.
|
||||
*/
|
||||
public class StrandOddsRatioUnitTest {
|
||||
private static double DELTA_PRECISION = 0.001;
|
||||
|
||||
@DataProvider(name = "UsingSOR")
|
||||
public Object[][] makeUsingSORData() {
|
||||
List<Object[]> tests = new ArrayList<>();
|
||||
tests.add(new Object[]{0, 0, 0, 0, 2.0});
|
||||
tests.add(new Object[]{100000, 100000, 100000, 100000, 2.0} );
|
||||
tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, 2.0} );
|
||||
|
||||
tests.add(new Object[]{0, 0, 100000, 100000, 2.0});
|
||||
tests.add(new Object[]{0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE, 2.0});
|
||||
|
||||
tests.add(new Object[]{100000,100000,100000,0, 1000001.000001});
|
||||
tests.add(new Object[]{100,100,100,0, 1001.000999});
|
||||
tests.add(new Object[]{Integer.MAX_VALUE,Integer.MAX_VALUE,Integer.MAX_VALUE,0, 21474836471.0});
|
||||
|
||||
tests.add(new Object[]{13736,9047,41,1433, 52.95947});
|
||||
tests.add(new Object[]{66, 14, 64, 4, 3.63482});
|
||||
tests.add(new Object[]{351169, 306836, 153739, 2379, 56.48043});
|
||||
tests.add(new Object[]{116449, 131216, 289, 16957, 52.07302});
|
||||
tests.add(new Object[]{137, 159, 9, 23, 2.64460});
|
||||
tests.add(new Object[]{129, 90, 21, 20, 2.09757});
|
||||
tests.add(new Object[]{14054, 9160, 16, 7827, 745.89657});
|
||||
tests.add(new Object[]{32803, 9184, 32117, 3283, 3.10399});
|
||||
tests.add(new Object[]{2068, 6796, 1133, 0, 37235.43791});
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "UsingSOR")
|
||||
public void testUsingSOR(final int refpos, final int refneg, final int altpos, final int altneg, double expectedOddsRatio ) {
|
||||
int[][] contingencyTable = new int[2][2];
|
||||
contingencyTable[0][0] = refpos;
|
||||
contingencyTable[0][1] = refneg;
|
||||
contingencyTable[1][0] = altpos;
|
||||
contingencyTable[1][1] = altneg;
|
||||
final double ratio = new StrandOddsRatio().symmetricOddsRatio(contingencyTable);
|
||||
Assert.assertEquals(ratio, expectedOddsRatio, DELTA_PRECISION, "Pass");
|
||||
}
|
||||
}
|
||||
|
|
@ -47,38 +47,63 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class FastaAlternateReferenceIntegrationTest extends WalkerTest {
|
||||
|
||||
@Test
|
||||
public void testIntervals() {
|
||||
public void testReferenceOnly() {
|
||||
|
||||
String md5_1 = "328d2d52cedfdc52da7d1abff487633d";
|
||||
|
||||
WalkerTestSpec spec1a = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s",
|
||||
1,
|
||||
Arrays.asList(md5_1));
|
||||
executeTest("testFastaReference", spec1a);
|
||||
|
||||
WalkerTestSpec spec1b = new WalkerTestSpec(
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s",
|
||||
1,
|
||||
Arrays.asList(md5_1));
|
||||
executeTest("testFastaReference", spec1b);
|
||||
Arrays.asList("328d2d52cedfdc52da7d1abff487633d"));
|
||||
executeTest("test FastaReference", spec);
|
||||
}
|
||||
|
||||
WalkerTestSpec spec2 = new WalkerTestSpec(
|
||||
@Test
|
||||
public void testIndelsAndSnpMask() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s",
|
||||
1,
|
||||
Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65"));
|
||||
executeTest("testFastaAlternateReferenceIndels", spec2);
|
||||
executeTest("test indels", spec);
|
||||
}
|
||||
|
||||
WalkerTestSpec spec3 = new WalkerTestSpec(
|
||||
@Test
|
||||
public void testSnps() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s",
|
||||
1,
|
||||
Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641"));
|
||||
executeTest("testFastaAlternateReferenceSnps", spec3);
|
||||
executeTest("test SNPs", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadIupacInput() {
|
||||
|
||||
// cannot use 'expectedExceptions = UserException.BadInput.class' because it technically gets thrown as a RuntimeException by the engine
|
||||
try {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b36KGReference + " --useIUPAC -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s",
|
||||
1,
|
||||
Arrays.asList("FAILFAILFAILFAILFAILFAILFAILFAIL"));
|
||||
executeTest("test bad input", spec);
|
||||
} catch (Exception e) {} // do nothing
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIupac() {
|
||||
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T FastaAlternateReferenceMaker -R " + b37KGReference + " --useIUPAC -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf -L 20:61050-66380 -o %s",
|
||||
1,
|
||||
Arrays.asList("5feb2a576ff2ed1745a007eaa36448b3"));
|
||||
executeTest("test iupac", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -94,6 +94,19 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa
|
|||
@Test
|
||||
public void testHaplotypeCallerMultiSampleGGAMultiAllelic() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337",
|
||||
"f50e0b35e2240b19b1b8b6dfa0cf9796");
|
||||
"5ac3bfe1da1d411b52a98ef3debbd318");
|
||||
}
|
||||
|
||||
private void HCTestComplexConsensusMode(String bam, String args, String md5) {
|
||||
final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf";
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
|
||||
executeTest("testHaplotypeCallerComplexConsensusMode: args=" + args, spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHaplotypeCallerMultiSampleConsensusModeComplex() {
|
||||
HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337",
|
||||
"61972c7c0d378e756f3b4d99aed9d0cf");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,8 +68,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest {
|
|||
|
||||
// this functionality can be adapted to provide input data for whatever you might want in your data
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"});
|
||||
tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"});
|
||||
tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"});
|
||||
|
|
|
|||
|
|
@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("0998be22d7af4372247f5a0338f9446b"));
|
||||
Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestDBSNPAnnotationWGSGraphBased() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1,
|
||||
Arrays.asList("1aeed297a3cb41940d83eac499a2ce07"));
|
||||
Arrays.asList("eda8f91091fe462205d687ec49fc61e7"));
|
||||
executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestAggressivePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013"));
|
||||
Arrays.asList("73c52372a1a80f052ea2b728ee17bf22"));
|
||||
executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
public void HCTestConservativePcrIndelModelWGS() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1,
|
||||
Arrays.asList("dcb38cb9280f2c3059a09d323db1c633"));
|
||||
Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b"));
|
||||
executeTest("HC calling with conservative indel error modeling on WGS intervals", spec);
|
||||
}
|
||||
|
||||
|
|
@ -298,4 +298,25 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
|
|||
spec.disableShadowBCF();
|
||||
executeTest("testGraphBasedNoSuchEdgeBugFix", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLackSensitivityDueToBadHaplotypeSelectionFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||
b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list",
|
||||
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e2e6647f7c96e91aeead7301017dc800"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() {
|
||||
final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ",
|
||||
hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list",
|
||||
HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER);
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,7 +47,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
|
|
@ -61,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest {
|
|||
List<Object[]> tests = new ArrayList<>();
|
||||
|
||||
for ( final int nct : Arrays.asList(1, 2, 4) ) {
|
||||
tests.add(new Object[]{nct, "1f463bf3a06c401006858bc446ecea54"});
|
||||
tests.add(new Object[]{nct, "fd9324a574f9204f7308fc1af422fdcc"});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
|
|
|
|||
|
|
@ -52,10 +52,11 @@ import org.testng.annotations.DataProvider;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class CommonSuffixMergerUnitTest extends BaseTest {
|
||||
private final static boolean PRINT_GRAPHS = true;
|
||||
private final static boolean PRINT_GRAPHS = false;
|
||||
|
||||
@DataProvider(name = "CompleteCycleData")
|
||||
public Object[][] makeCompleteCycleData() {
|
||||
|
|
@ -134,11 +135,35 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
|
|||
return toUse.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score.
|
||||
*/
|
||||
private static final Comparator<KBestHaplotype> KBESTHAPLOTYPE_COMPARATOR = new Comparator<KBestHaplotype>() {
|
||||
|
||||
/**
|
||||
* Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score.
|
||||
*
|
||||
* @return {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public int compare(final KBestHaplotype o1,final KBestHaplotype o2) {
|
||||
final int baseCmp = o1.haplotype().getBaseString().compareTo(o2.haplotype().getBaseString());
|
||||
if (baseCmp != 0)
|
||||
return baseCmp;
|
||||
return - Double.compare(o1.score(),o2.score());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) {
|
||||
final KBestHaplotypeFinder originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
|
||||
final KBestHaplotypeFinder actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());
|
||||
final List<KBestHaplotype> sortedOriginalKBestHaplotypes = new ArrayList<>(originalKBestHaplotypes);
|
||||
Collections.sort(sortedOriginalKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR);
|
||||
final List<KBestHaplotype> sortedActualKBestHaplotypes = new ArrayList<>(actualKBestHaplotypes);
|
||||
Collections.sort(sortedActualKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR);
|
||||
try {
|
||||
final Set<String> haplotypes = new HashSet<String>();
|
||||
final List<KBestHaplotype> originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks());
|
||||
final List<KBestHaplotype> actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks());
|
||||
|
||||
for (final KBestHaplotype kbh : originalKBestHaplotypes)
|
||||
haplotypes.add(new String(kbh.bases()));
|
||||
|
|
@ -148,14 +173,16 @@ public class CommonSuffixMergerUnitTest extends BaseTest {
|
|||
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
|
||||
}
|
||||
|
||||
if ( actualKBestHaplotypes.size() == originalKBestHaplotypes.size() ) {
|
||||
for ( int i = 0; i < originalKBestHaplotypes.size(); i++ ) {
|
||||
Assert.assertTrue(actualKBestHaplotypes.get(i).haplotype().getBaseString().equals(originalKBestHaplotypes.get(i).haplotype().getBaseString()), "Paths not equal " + actualKBestHaplotypes.get(i).haplotype() + " vs. original " + originalKBestHaplotypes.get(i).haplotype());
|
||||
}
|
||||
}
|
||||
Assert.assertEquals(sortedActualKBestHaplotypes,sortedOriginalKBestHaplotypes);
|
||||
} catch ( AssertionError e ) {
|
||||
if ( PRINT_GRAPHS ) original.printGraph(new File(String.format("%s.original.dot", name, actual.vertexSet().size())), 0);
|
||||
if ( PRINT_GRAPHS ) actual.printGraph(new File(String.format("%s.actual.dot", name, actual.vertexSet().size())), 0);
|
||||
try {
|
||||
if ( PRINT_GRAPHS ) originalKBestHaplotypes.printDOTFile(String.format("%s.original.finder.dot",name));
|
||||
if ( PRINT_GRAPHS ) actualKBestHaplotypes.printDOTFile(String.format("%s.actual.finder.dot",name));
|
||||
} catch (IOException e2) {
|
||||
// do nothing.
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -65,7 +65,6 @@ import java.util.*;
|
|||
* User: rpoplin
|
||||
* Date: 1/31/13
|
||||
*/
|
||||
|
||||
public class KBestHaplotypeFinderUnitTest extends BaseTest {
|
||||
|
||||
@DataProvider(name = "BasicPathFindingData")
|
||||
|
|
@ -113,11 +112,11 @@ public class KBestHaplotypeFinderUnitTest extends BaseTest {
|
|||
final int expectedNumOfPaths = nStartNodes * nBranchesPerBubble * nEndNodes;
|
||||
Assert.assertEquals(paths.size(), expectedNumOfPaths, "Didn't find the expected number of paths");
|
||||
|
||||
int lastScore = Integer.MAX_VALUE;
|
||||
double lastScore = 0;
|
||||
for ( final KBestHaplotype kbh : paths ) {
|
||||
final Path<SeqVertex,BaseEdge> path = kbh.path();
|
||||
Assert.assertTrue(path.getScore() <= lastScore, "Paths out of order. Path " + path + " has score above previous " + lastScore);
|
||||
lastScore = path.getScore();
|
||||
Assert.assertTrue(kbh.score() <= lastScore, "Paths out of order. Path " + path + " has score " + path.getScore() + " above previous " + lastScore);
|
||||
lastScore = kbh.score();
|
||||
}
|
||||
|
||||
// get the best path, and make sure it's the same as our optimal path overall
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.testng.Assert;
|
||||
|
|
@ -226,28 +227,34 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
final Set<String> haplotypes = new HashSet<>();
|
||||
final List<KBestHaplotype> originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks());
|
||||
final KBestHaplotypeFinder originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks());
|
||||
for ( final KBestHaplotype path : originalPaths )
|
||||
haplotypes.add(new String(path.bases()));
|
||||
|
||||
final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v);
|
||||
splitter.split();
|
||||
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".original.dot"), 0);
|
||||
if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + ".split.dot"), 0);
|
||||
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0);
|
||||
if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0);
|
||||
splitter.updateGraph(top, bot);
|
||||
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".updated.dot"), 0);
|
||||
if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0);
|
||||
|
||||
final List<KBestHaplotype> splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks());
|
||||
final KBestHaplotypeFinder splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks());
|
||||
for ( final KBestHaplotype path : splitPaths ) {
|
||||
final String h = new String(path.bases());
|
||||
Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h);
|
||||
}
|
||||
|
||||
if ( splitPaths.size() == originalPaths.size() ) {
|
||||
for ( int i = 0; i < originalPaths.size(); i++ ) {
|
||||
Assert.assertTrue(splitPaths.get(i).path().equalScoreAndSequence(originalPaths.get(i).path()), "Paths not equal " + splitPaths.get(i) + " vs. original " + originalPaths.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
final List<byte[]> sortedOriginalPaths = new ArrayList<>(originalPaths.size());
|
||||
for (final KBestHaplotype kbh : originalPaths.unique())
|
||||
sortedOriginalPaths.add(kbh.bases());
|
||||
Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR);
|
||||
final List<byte[]> sortedSplitPaths = new ArrayList<>(splitPaths.size());
|
||||
for (final KBestHaplotype kbh : splitPaths.unique())
|
||||
sortedSplitPaths.add(kbh.bases());
|
||||
Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR);
|
||||
|
||||
Assert.assertEquals(sortedSplitPaths,sortedOriginalPaths,Utils.join("_", strings) + "_" + hasTop + "_" + hasBot);
|
||||
}
|
||||
|
||||
@DataProvider(name = "MeetsMinSequenceData")
|
||||
|
|
|
|||
|
|
@ -60,8 +60,8 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf";
|
||||
private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 ";
|
||||
private static final String baseCommand = baseCommandPrefix + "-o %s ";
|
||||
private static final String base_md5 = "a102dd55451799e5f053c784b762087e";
|
||||
private static final String base_md5_with_SW_or_VCF = "06b8eefcbd785e929027feaa22bb060d";
|
||||
private static final String base_md5 = "458588d68c8ea7e54443ea722604b265";
|
||||
private static final String base_md5_with_SW_or_VCF = "d5ed91bd5b2023c69078a0fc00268d3c";
|
||||
|
||||
@Test
|
||||
public void testDefaults() {
|
||||
|
|
@ -84,7 +84,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec1 = new WalkerTestSpec(
|
||||
baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels,
|
||||
1,
|
||||
Arrays.asList("1b24b0f2a20aed1adc726d1b296a3192"));
|
||||
Arrays.asList("a1b9396f4d5b65f7ae6e0062daf363a3"));
|
||||
executeTest("realigner known indels only from VCF", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
public void testLods() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "-LOD 60", base_md5 );
|
||||
e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "4bf28d3c0337682d439257874377a681" );
|
||||
e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "dea9bd14323b33348d9cf28e256415f2" );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
|
|
@ -117,7 +117,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s",
|
||||
1,
|
||||
Arrays.asList("f4f6c3b2a2be0306a0ecd3def334bafe"));
|
||||
Arrays.asList("b91c0bf803247f703dc1cb6ccdc4f18f"));
|
||||
executeTest("realigner long run", spec);
|
||||
}
|
||||
|
||||
|
|
@ -126,7 +126,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW",
|
||||
1,
|
||||
Arrays.asList("71fb521f8febfe2dc683fc636e28ae7d"));
|
||||
Arrays.asList("041e2254f271261fb46dc3878cf638f6"));
|
||||
executeTest("realigner no output tags", spec);
|
||||
}
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testMaxReadsInMemory() {
|
||||
HashMap<String, String> e = new HashMap<String, String>();
|
||||
e.put( "--maxReadsInMemory 10000", base_md5 );
|
||||
e.put( "--maxReadsInMemory 10000", "0108cd5950f1a4eb90209c3dca8f9e11" );
|
||||
e.put( "--maxReadsInMemory 40000", base_md5 );
|
||||
|
||||
for ( Map.Entry<String, String> entry : e.entrySet() ) {
|
||||
|
|
|
|||
|
|
@ -165,4 +165,12 @@ public class CombineGVCFsIntegrationTest extends WalkerTest {
|
|||
spec.disableShadowBCF();
|
||||
executeTest("testMD5s", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBasepairResolution() throws Exception {
|
||||
final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution");
|
||||
final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4"));
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testBasepairResolution", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>org.broadinstitute.sting</groupId>
|
||||
<artifactId>sting-root</artifactId>
|
||||
<version>3.0</version>
|
||||
<version>3.1</version>
|
||||
<relativePath>../public/sting-root</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,71 @@
|
|||
Implementation overview:
|
||||
Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and
|
||||
overrides functions from both LoglessPairHMM and PairHMM.
|
||||
1. Constructor: Call base class constructors. Then, load the native library located in this
|
||||
directory and call an init function (with suffix 'jniInitializeClassFieldsAndMachineMask') in the
|
||||
library to determine fields ids for the members of classes JNIReadDataHolder and
|
||||
JNIHaplotypeDataHolders. The native code stores the field ids (struct offsets) for the classes and
|
||||
re-uses them for subsequent computations. Optionally, the user can disable the vector
|
||||
implementation, by using the 'mask' argument (see comments for a more detailed explanation).
|
||||
2. When the library is loaded, it invokes the constructor of the class LoadTimeInitializer (because
|
||||
a global variable g_load_time_initializer is declared in the library). This constructor
|
||||
(LoadTimeInitializer.cc) can be used to perform various initializations. Currently, it initializes
|
||||
two global function pointers to point to the function implementation that is supported on the
|
||||
machine (AVX/SSE/un-vectorized) on which the program is being run. The two pointers are for float
|
||||
and double respectively. The global function pointers are declared in utils.cc and are assigned in
|
||||
the function initialize_function_pointers() defined in utils.cc and invoked from the constructor of
|
||||
LoadTimeInitializer.
|
||||
Other initializations in LoadTimeInitializer:
|
||||
* ConvertChar::init - sets some masks for the vector implementation
|
||||
* FTZ for performance
|
||||
* stat counters = 0
|
||||
* debug structs (which are never used in non-debug mode)
|
||||
This initialization is done only once for the whole program.
|
||||
3. initialize(): To initialize the region for PairHMM. Pass haplotype bases to native code through
|
||||
the JNIHaplotypeDataHolder class. Since the haplotype list is common across multiple samples in
|
||||
computeReadLikelihoods(), we can pass the haplotype bases to the native code once and re-use across
|
||||
multiple samples.
|
||||
4. computeLikelihoods(): Copies array references for readBases/quals etc to array of
|
||||
JNIReadDataHolder objects. Invokes the JNI function to perform the computation and updates the
|
||||
likelihoodMap.
|
||||
The JNI function copies the byte array references into an array of testcase structs and invokes the
|
||||
compute_full_prob function through the function pointers initialized earlier.
|
||||
The primary native function called is
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It uses
|
||||
standard JNI calls to get and return data from/to the Java class VectorLoglessPairHMM. The last
|
||||
argument to the function is the maximum number of OpenMP threads to use while computing PairHMM in
|
||||
C++. This option is set when the native function call is made from JNILoglessPairHMM
|
||||
computeLikelihoods - currently it is set to 12 (no logical reason).
|
||||
Note: OpenMP has been disabled for now - insufficient #testcases per call to computeLikelihoods() to
|
||||
justify multi-threading.
|
||||
5. finalizeRegion(): Releases the haplotype arrays initialized in step 3 - should be called at the
|
||||
end of every region (line 351 in PairHMMLikelihoodCalculationEngine).
|
||||
|
||||
Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java.
|
||||
|
||||
Compiling:
|
||||
Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX
|
||||
intrinsics.
|
||||
This native library is called libVectorLoglessPairHMM.so
|
||||
Using Maven:
|
||||
Type 'mvn install' in this directory - this will build the library (by invoking 'make') and copy the
|
||||
native library to the directory
|
||||
${sting-utils.basedir}/src/main/resources/org/broadinstitute/sting/utils/pairhmm
|
||||
The GATK maven build process (when run) will bundle the library into the StingUtils jar file from
|
||||
the copied directory.
|
||||
Simple build:
|
||||
cd src/main/c++
|
||||
make
|
||||
|
||||
Running:
|
||||
The default implementation of PairHMM is now VECTOR_LOGLESS_CACHING in HaplotypeCaller.java. To use
|
||||
the Java version, use the command line argument "--pair_hmm_implementation LOGLESS_CACHING". (see
|
||||
run.sh in src/main/c++).
|
||||
The native library is bundled with the StingUtils jar file. When HaplotypeCaller is invoked, then
|
||||
the library is unpacked from the jar file, copied to the /tmp directory (with a unique id) and
|
||||
loaded by the Java class VectorLoglessPairHMM in the constructor (if it has not been loaded
|
||||
already).
|
||||
The default library can be overridden by using the -Djava.library.path argument (see
|
||||
src/main/c++/run.sh for an example) for the JVM to pass the path to the library. If the library
|
||||
libVectorLoglessPairHMM.so can be found in java.library.path, then it is loaded and the 'packed'
|
||||
library is not used.
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>org.broadinstitute.sting</groupId>
|
||||
<artifactId>sting-root</artifactId>
|
||||
<version>2.8-SNAPSHOT</version>
|
||||
<relativePath>../../public/sting-root</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>VectorPairHMM</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<name>Vectorized PairHMM native libraries</name>
|
||||
|
||||
<description>Builds a GNU/Linux x86_64 library of VectorPairHMM using icc (Intel C++ compiler). During install, copies it into sting-utils. Neither tested nor expected to work on any other platform.</description>
|
||||
|
||||
<properties>
|
||||
<sourceEncoding>UTF-8</sourceEncoding>
|
||||
<project.build.sourceEncoding>${sourceEncoding}</project.build.sourceEncoding>
|
||||
<project.reporting.outputEncoding>${sourceEncoding}</project.reporting.outputEncoding>
|
||||
<sting.basedir>${project.basedir}/../..</sting.basedir>
|
||||
<sting-utils.basedir>${sting.basedir}/public/sting-utils</sting-utils.basedir>
|
||||
<!-- Where to place the library in sting-utils -->
|
||||
<pairhmm.resources.directory>${sting-utils.basedir}/src/main/resources/org/broadinstitute/sting/utils/pairhmm</pairhmm.resources.directory>
|
||||
</properties>
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- Print out the architecture - works only on GNU/Linux x86_64 systems -->
|
||||
<!-- Neither tested nor expected to work on any other platform. -->
|
||||
<!-- Requires icc (Intel C++ compiler) to be in your PATH. -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-enforcer-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>display-info</goal>
|
||||
</goals>
|
||||
<phase>validate</phase>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- Run make -->
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>exec-maven-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>exec</goal>
|
||||
</goals>
|
||||
<phase>compile</phase>
|
||||
<configuration>
|
||||
<executable>make</executable>
|
||||
<workingDirectory>src/main/c++</workingDirectory>
|
||||
<environmentVariables>
|
||||
<JRE_HOME>${java.home}</JRE_HOME>
|
||||
<OUTPUT_DIR>${project.build.directory}</OUTPUT_DIR>
|
||||
</environmentVariables>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- Don't actually install this artifact into the user's repo -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-install-plugin</artifactId>
|
||||
<configuration>
|
||||
<skip>true</skip>
|
||||
</configuration>
|
||||
</plugin>
|
||||
|
||||
<!-- Copy the built library into sting-utils -->
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-resources-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>default-install</id>
|
||||
<goals>
|
||||
<goal>copy-resources</goal>
|
||||
</goals>
|
||||
<phase>install</phase>
|
||||
<configuration>
|
||||
<outputDirectory>${pairhmm.resources.directory}</outputDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${project.build.directory}</directory>
|
||||
<includes>
|
||||
<include>**/*</include>
|
||||
</includes>
|
||||
</resource>
|
||||
</resources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
||||
<!-- pom.xml cleanup, always sort according to the definitions in sting-root -->
|
||||
<plugin>
|
||||
<groupId>com.google.code.sortpom</groupId>
|
||||
<artifactId>maven-sortpom-plugin</artifactId>
|
||||
<configuration>
|
||||
<createBackupFile>false</createBackupFile>
|
||||
<predefinedSortOrder>custom_1</predefinedSortOrder>
|
||||
<lineSeparator>\n</lineSeparator>
|
||||
<encoding>${sourceEncoding}</encoding>
|
||||
<keepBlankLines>true</keepBlankLines>
|
||||
<sortDependencies>scope</sortDependencies>
|
||||
<nrOfIndentSpace>4</nrOfIndentSpace>
|
||||
<expandEmptyElements>false</expandEmptyElements>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
.svn
|
||||
*.o
|
||||
*.so
|
||||
tests
|
||||
.deps
|
||||
hmm_Mohammad
|
||||
pairhmm-template-main
|
||||
*.swp
|
||||
*.class
|
||||
checker
|
||||
reformat
|
||||
subdir_checkout.sh
|
||||
avx/
|
||||
sse/
|
||||
triplicate.sh
|
||||
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "LoadTimeInitializer.h"
|
||||
#include "utils.h"
|
||||
using namespace std;
|
||||
char* LoadTimeInitializerStatsNames[] =
|
||||
{
|
||||
"num_regions",
|
||||
"num_reads",
|
||||
"num_haplotypes",
|
||||
"num_testcases",
|
||||
"num_double_invocations",
|
||||
"haplotype_length",
|
||||
"readlength",
|
||||
"product_read_length_haplotype_length",
|
||||
"dummy"
|
||||
};
|
||||
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded
|
||||
{
|
||||
ConvertChar::init();
|
||||
#ifndef DISABLE_FTZ
|
||||
//Very important to get good performance on Intel processors
|
||||
//Function: enabling FTZ converts denormals to 0 in hardware
|
||||
//Denormals cause microcode to insert uops into the core causing big slowdown
|
||||
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
|
||||
cout << "FTZ enabled - may decrease accuracy if denormal numbers encountered\n";
|
||||
#else
|
||||
cout << "FTZ is not set - may slow down performance if denormal numbers encountered\n";
|
||||
#endif
|
||||
//Profiling: times for compute and transfer (either bytes copied or pointers copied)
|
||||
m_compute_time = 0;
|
||||
m_data_transfer_time = 0;
|
||||
m_bytes_copied = 0;
|
||||
|
||||
//Initialize profiling counters
|
||||
for(unsigned i=0;i<TOTAL_NUMBER_STATS;++i)
|
||||
{
|
||||
m_sum_stats[i] = 0;
|
||||
m_sum_square_stats[i] = 0;
|
||||
m_max_stats[i] = 0;
|
||||
m_min_stats[i] = 0xFFFFFFFFFFFFFFFFull;
|
||||
}
|
||||
|
||||
//for debug dump
|
||||
m_filename_to_fptr.clear();
|
||||
m_written_files_set.clear();
|
||||
|
||||
//Common buffer - 8MB
|
||||
unsigned size = 1024*1024;
|
||||
m_buffer = new uint64_t[size];
|
||||
m_buffer_size = size*sizeof(uint64_t);
|
||||
|
||||
initialize_function_pointers();
|
||||
|
||||
//Initialize static members of class
|
||||
Context<float>::initializeStaticMembers();
|
||||
Context<double>::initializeStaticMembers();
|
||||
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
void LoadTimeInitializer::print_profiling()
|
||||
{
|
||||
double mean = 0;
|
||||
double variance = 0;
|
||||
uint64_t denominator = 1;
|
||||
cout << "Time spent in compute_testcases "<<m_compute_time*1e-9<<"\n";
|
||||
cout << "Time spent in data transfer (Java <--> C++) "<<m_data_transfer_time*1e-9<<"\n";
|
||||
|
||||
cout << "\nHC input stats\nstat_name,sum,sum_square,mean,variance,min,max\n";
|
||||
for(unsigned i=0;i<TOTAL_NUMBER_STATS;++i)
|
||||
{
|
||||
cout << LoadTimeInitializerStatsNames[i];
|
||||
cout << "," << m_sum_stats[i];
|
||||
cout << "," << std::scientific << m_sum_square_stats[i];
|
||||
denominator = 1;
|
||||
switch(i)
|
||||
{
|
||||
case NUM_READS_IDX:
|
||||
case NUM_HAPLOTYPES_IDX:
|
||||
case NUM_TESTCASES_IDX:
|
||||
denominator = m_sum_stats[NUM_REGIONS_IDX];
|
||||
break;
|
||||
case HAPLOTYPE_LENGTH_IDX:
|
||||
denominator = m_sum_stats[NUM_HAPLOTYPES_IDX];
|
||||
break;
|
||||
case READ_LENGTH_IDX:
|
||||
denominator = m_sum_stats[NUM_READS_IDX];
|
||||
break;
|
||||
case PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX:
|
||||
denominator = m_sum_stats[NUM_TESTCASES_IDX];
|
||||
break;
|
||||
default:
|
||||
denominator = 1;
|
||||
break;
|
||||
}
|
||||
mean = ((double)m_sum_stats[i])/denominator;
|
||||
cout << "," << std::scientific << mean;
|
||||
variance = (m_sum_square_stats[i]/denominator) - (mean*mean); //E(X^2)-(E(X))^2
|
||||
cout << "," << std::scientific << variance;
|
||||
cout << "," << m_min_stats[i];
|
||||
cout << "," << m_max_stats[i];
|
||||
cout << "\n";
|
||||
}
|
||||
cout << "\n";
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
void LoadTimeInitializer::debug_dump(string filename, string s, bool to_append, bool add_newline)
|
||||
{
|
||||
map<string, ofstream*>::iterator mI = m_filename_to_fptr.find(filename);
|
||||
ofstream* fptr = 0;
|
||||
if(mI == m_filename_to_fptr.end())
|
||||
{
|
||||
m_filename_to_fptr[filename] = new ofstream();
|
||||
fptr = m_filename_to_fptr[filename];
|
||||
//File never seen before
|
||||
if(m_written_files_set.find(filename) == m_written_files_set.end())
|
||||
{
|
||||
to_append = false;
|
||||
m_written_files_set.insert(filename);
|
||||
}
|
||||
fptr->open(filename.c_str(), to_append ? ios::app : ios::out);
|
||||
assert(fptr->is_open());
|
||||
}
|
||||
else
|
||||
fptr = (*mI).second;
|
||||
//ofstream fptr;
|
||||
//fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
|
||||
(*fptr) << s;
|
||||
if(add_newline)
|
||||
(*fptr) << "\n";
|
||||
//fptr.close();
|
||||
}
|
||||
void LoadTimeInitializer::debug_close()
|
||||
{
|
||||
for(map<string,ofstream*>::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end();
|
||||
mB != mE;mB++)
|
||||
{
|
||||
(*mB).second->close();
|
||||
delete (*mB).second;
|
||||
}
|
||||
m_filename_to_fptr.clear();
|
||||
}
|
||||
|
||||
void LoadTimeInitializer::dump_sandbox(testcase& tc, unsigned tc_idx, unsigned numReads, unsigned numHaplotypes)
|
||||
{
|
||||
unsigned haplotypeLength = tc.haplen;
|
||||
unsigned readLength = tc.rslen;
|
||||
ofstream& dumpFptr = m_sandbox_fptr;
|
||||
for(unsigned k=0;k<haplotypeLength;++k)
|
||||
dumpFptr<<(char)(tc.hap[k]);
|
||||
dumpFptr<<" ";
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
dumpFptr<<(char)(tc.rs[k]);
|
||||
dumpFptr<<" ";
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
dumpFptr<<(char)(tc.q[k]+33);
|
||||
dumpFptr<<" ";
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
dumpFptr<<(char)(tc.i[k]+33);
|
||||
dumpFptr<<" ";
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
dumpFptr<<(char)(tc.d[k]+33);
|
||||
dumpFptr<<" ";
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
dumpFptr<<(char)(tc.c[k]+33);
|
||||
if(tc_idx == 0) //new region
|
||||
dumpFptr << " "<< numReads << " "<<numHaplotypes;
|
||||
dumpFptr<<"\n";
|
||||
}
|
||||
|
||||
void LoadTimeInitializer::update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value)
|
||||
{
|
||||
m_sum_stats[stat_idx] += value;
|
||||
double v = value;
|
||||
m_sum_square_stats[stat_idx] += (v*v);
|
||||
m_max_stats[stat_idx] = std::max(m_max_stats[stat_idx], value);
|
||||
m_min_stats[stat_idx] = std::min(m_min_stats[stat_idx], value);
|
||||
}
|
||||
|
|
@ -0,0 +1,94 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef LOAD_TIME_INITIALIZER_H
|
||||
#define LOAD_TIME_INITIALIZER_H
|
||||
#include "headers.h"
|
||||
#include <jni.h>
|
||||
#include "template.h"
|
||||
|
||||
enum LoadTimeInitializerStatsEnum
|
||||
{
|
||||
NUM_REGIONS_IDX=0,
|
||||
NUM_READS_IDX,
|
||||
NUM_HAPLOTYPES_IDX,
|
||||
NUM_TESTCASES_IDX,
|
||||
NUM_DOUBLE_INVOCATIONS_IDX,
|
||||
HAPLOTYPE_LENGTH_IDX,
|
||||
READ_LENGTH_IDX,
|
||||
PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX,
|
||||
TOTAL_NUMBER_STATS
|
||||
};
|
||||
extern char* LoadTimeInitializerStatsNames[];
|
||||
|
||||
class LoadTimeInitializer
|
||||
{
|
||||
public:
|
||||
LoadTimeInitializer(); //will be called when library is loaded
|
||||
~LoadTimeInitializer()
|
||||
{
|
||||
delete[] m_buffer;
|
||||
}
|
||||
void print_profiling();
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
void debug_close();
|
||||
|
||||
void dump_sandbox(testcase& tc, unsigned tc_idx, unsigned numReads, unsigned numHaplotypes);
|
||||
void open_sandbox() { m_sandbox_fptr.open("sandbox.txt", std::ios::app); }
|
||||
void close_sandbox() { m_sandbox_fptr.close(); }
|
||||
|
||||
jfieldID m_readBasesFID;
|
||||
jfieldID m_readQualsFID;
|
||||
jfieldID m_insertionGOPFID;
|
||||
jfieldID m_deletionGOPFID;
|
||||
jfieldID m_overallGCPFID;
|
||||
jfieldID m_haplotypeBasesFID;
|
||||
//profiling - update stats
|
||||
void update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value);
|
||||
//timing in nanoseconds
|
||||
uint64_t m_compute_time;
|
||||
uint64_t m_data_transfer_time;
|
||||
//bytes copied
|
||||
uint64_t m_bytes_copied;
|
||||
unsigned get_buffer_size() { return m_buffer_size; }
|
||||
char* get_buffer() { return (char*)m_buffer; }
|
||||
private:
|
||||
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
||||
std::set<std::string> m_written_files_set;
|
||||
std::ofstream m_sandbox_fptr;
|
||||
//used to compute various stats
|
||||
uint64_t m_sum_stats[TOTAL_NUMBER_STATS];
|
||||
double m_sum_square_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
|
||||
unsigned m_buffer_size;
|
||||
uint64_t* m_buffer;
|
||||
};
|
||||
extern LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
#define SIZE_PER_TESTCASE 6*10000
|
||||
#define SIZE_PER_BUFFER 10000
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
#Copyright (c) 2012 The Broad Institute
|
||||
|
||||
#Permission is hereby granted, free of charge, to any person
|
||||
#obtaining a copy of this software and associated documentation
|
||||
#files (the "Software"), to deal in the Software without
|
||||
#restriction, including without limitation the rights to use,
|
||||
#copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
#copies of the Software, and to permit persons to whom the
|
||||
#Software is furnished to do so, subject to the following
|
||||
#conditions:
|
||||
|
||||
#The above copyright notice and this permission notice shall be
|
||||
#included in all copies or substantial portions of the Software.
|
||||
|
||||
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
#EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
#OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
#NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
#WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
#FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
#THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
|
||||
#OMPCFLAGS=-fopenmp
|
||||
#OMPLFLAGS=-fopenmp #-openmp-link static
|
||||
|
||||
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
|
||||
JRE_HOME?=/opt/jdk1.7.0_25/jre
|
||||
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux
|
||||
|
||||
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
CC=icc
|
||||
CXX=icc
|
||||
|
||||
LDFLAGS=-lm -lrt $(OMPLDFLAGS)
|
||||
ifdef DISABLE_FTZ
|
||||
COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz
|
||||
endif
|
||||
|
||||
PAPI_DIR=/home/karthikg/softwares/papi-5.3.0
|
||||
ifdef USE_PAPI
|
||||
ifeq ($(USE_PAPI),1)
|
||||
COMMON_COMPILATION_FLAGS+=-I$(PAPI_DIR)/include -DUSE_PAPI
|
||||
LDFLAGS+=-L$(PAPI_DIR)/lib -lpapi
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef DISABLE_FTZ
|
||||
COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz
|
||||
endif
|
||||
|
||||
BIN=libVectorLoglessPairHMM.so pairhmm-template-main checker
|
||||
#BIN=checker
|
||||
|
||||
DEPDIR=.deps
|
||||
DF=$(DEPDIR)/$(*).d
|
||||
|
||||
#Common across libJNI and sandbox
|
||||
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc LoadTimeInitializer.cc
|
||||
#Part of libJNI
|
||||
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc Sandbox.cc $(COMMON_SOURCES)
|
||||
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
|
||||
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
|
||||
COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o)
|
||||
|
||||
|
||||
#No vectorization for these files
|
||||
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc LoadTimeInitializer.cc Sandbox.cc
|
||||
#Use -xAVX for these files
|
||||
AVX_SOURCES=avx_function_instantiations.cc
|
||||
#Use -xSSE4.2 for these files
|
||||
SSE_SOURCES=sse_function_instantiations.cc
|
||||
|
||||
NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o)
|
||||
AVX_OBJECTS=$(AVX_SOURCES:.cc=.o)
|
||||
SSE_OBJECTS=$(SSE_SOURCES:.cc=.o)
|
||||
$(NO_VECTOR_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS)
|
||||
$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX
|
||||
$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2
|
||||
OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS)
|
||||
|
||||
all: $(BIN) Sandbox.class copied_lib
|
||||
|
||||
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
|
||||
|
||||
checker: pairhmm-1-base.o $(COMMON_OBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
libVectorLoglessPairHMM.so: $(LIBOBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -shared -static-intel -o $@ $(LIBOBJECTS) ${LDFLAGS}
|
||||
|
||||
|
||||
$(OBJECTS): %.o: %.cc
|
||||
@mkdir -p $(DEPDIR)
|
||||
$(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $<
|
||||
|
||||
Sandbox.class: Sandbox.java
|
||||
javac Sandbox.java
|
||||
|
||||
copied_lib: libVectorLoglessPairHMM.so
|
||||
ifdef OUTPUT_DIR
|
||||
mkdir -p $(OUTPUT_DIR)
|
||||
rsync -a libVectorLoglessPairHMM.so $(OUTPUT_DIR)/
|
||||
endif
|
||||
|
||||
clean:
|
||||
rm -rf $(BIN) *.o $(DEPDIR) *.class
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "Sandbox.h"
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h"
|
||||
#include "utils.h"
|
||||
#include "jni_common.h"
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniGetMachineType
|
||||
* Signature: ()J
|
||||
*/
|
||||
JNIEXPORT jlong JNICALL Java_Sandbox_jniGetMachineType
|
||||
(JNIEnv * env, jobject thisObj)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniInitializeClassFieldsAndMachineMask
|
||||
* Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeClassFieldsAndMachineMask
|
||||
(JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask)
|
||||
{
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask(env, thisObject, readDataHolderClass,
|
||||
haplotypeDataHolderClass, mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniInitializeHaplotypes
|
||||
* Signature: (I[LSandbox/JNIHaplotypeDataHolderClass;)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeHaplotypes
|
||||
(JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray)
|
||||
{
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray);
|
||||
}
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniFinalizeRegion
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniFinalizeRegion
|
||||
(JNIEnv * env, jobject thisObject)
|
||||
{
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniFinalizeRegion(env, thisObject);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniComputeLikelihoods
|
||||
* Signature: (II[LSandbox/JNIReadDataHolderClass;[LSandbox/JNIHaplotypeDataHolderClass;[DI)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniComputeLikelihoods
|
||||
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
|
||||
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
|
||||
{
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods(env, thisObject,
|
||||
numReads, numHaplotypes, readDataArray, haplotypeDataArray, likelihoodArray, maxNumThreadsToUse);
|
||||
}
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniClose
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniClose
|
||||
(JNIEnv* env, jobject thisObject)
|
||||
{ Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniClose(env, thisObject); }
|
||||
|
||||
JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative
|
||||
(JNIEnv* env, jobject thisObject, jstring fileNameString)
|
||||
{
|
||||
const char* fileName = env->GetStringUTFChars(fileNameString, 0);
|
||||
char local_array[800];
|
||||
strncpy(local_array, fileName, 200);
|
||||
env->ReleaseStringUTFChars(fileNameString, fileName);
|
||||
do_compute(local_array, true, 10000, false);
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class Sandbox */
|
||||
|
||||
#ifndef _Included_Sandbox
|
||||
#define _Included_Sandbox
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#undef Sandbox_enableAll
|
||||
#define Sandbox_enableAll -1LL
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniGetMachineType
|
||||
* Signature: ()J
|
||||
*/
|
||||
JNIEXPORT jlong JNICALL Java_Sandbox_jniGetMachineType
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniInitializeClassFieldsAndMachineMask
|
||||
* Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeClassFieldsAndMachineMask
|
||||
(JNIEnv *, jobject, jclass, jclass, jlong);
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniInitializeHaplotypes
|
||||
* Signature: (I[LSandbox/JNIHaplotypeDataHolderClass;)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeHaplotypes
|
||||
(JNIEnv *, jobject, jint, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniFinalizeRegion
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniFinalizeRegion
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniComputeLikelihoods
|
||||
* Signature: (II[LSandbox/JNIReadDataHolderClass;[LSandbox/JNIHaplotypeDataHolderClass;[DI)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniComputeLikelihoods
|
||||
(JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint);
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: jniClose
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_jniClose
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: Sandbox
|
||||
* Method: doEverythingNative
|
||||
* Signature: ([B)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative
|
||||
(JNIEnv *, jobject, jstring);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,306 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.vectorpairhmm;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.io.File;
|
||||
import java.util.Scanner;
|
||||
import java.io.IOException;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
public class Sandbox {
|
||||
|
||||
private long setupTime = 0;
|
||||
private long computeTime = 0;
|
||||
//Used to copy references to byteArrays to JNI from reads
|
||||
protected class JNIReadDataHolderClass {
|
||||
public byte[] readBases = null;
|
||||
public byte[] readQuals = null;
|
||||
public byte[] insertionGOP = null;
|
||||
public byte[] deletionGOP = null;
|
||||
public byte[] overallGCP = null;
|
||||
}
|
||||
|
||||
//Used to copy references to byteArrays to JNI from haplotypes
|
||||
protected class JNIHaplotypeDataHolderClass {
|
||||
public byte[] haplotypeBases = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return 64-bit mask representing machine capabilities
|
||||
* Bit 0 is LSB, bit 63 MSB
|
||||
* Bit 0 represents sse4.2 availability
|
||||
* Bit 1 represents AVX availability
|
||||
*/
|
||||
public native long jniGetMachineType();
|
||||
public static final long enableAll = 0xFFFFFFFFFFFFFFFFl;
|
||||
|
||||
|
||||
/**
|
||||
* Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
|
||||
* C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
|
||||
* change per JVM session
|
||||
* @param readDataHolderClass class type of JNIReadDataHolderClass
|
||||
* @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
|
||||
* @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
|
||||
* */
|
||||
private native void jniInitializeClassFieldsAndMachineMask(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);
|
||||
|
||||
private static Boolean isVectorLoglessPairHMMLibraryLoaded = false;
|
||||
//The constructor is called only once inside PairHMMLikelihoodCalculationEngine
|
||||
public Sandbox() {
|
||||
synchronized(isVectorLoglessPairHMMLibraryLoaded) {
|
||||
//Load the library and initialize the FieldIDs
|
||||
if(!isVectorLoglessPairHMMLibraryLoaded) {
|
||||
System.loadLibrary("VectorLoglessPairHMM");
|
||||
isVectorLoglessPairHMMLibraryLoaded = true;
|
||||
jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
|
||||
private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray = null;
|
||||
|
||||
//Used to transfer data to JNI
|
||||
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
|
||||
public void initialize(final List<JNIHaplotypeDataHolderClass> haplotypes) {
|
||||
int numHaplotypes = haplotypes.size();
|
||||
mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
|
||||
int idx = 0;
|
||||
for(final JNIHaplotypeDataHolderClass currHaplotype : haplotypes)
|
||||
{
|
||||
mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
|
||||
mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.haplotypeBases;
|
||||
++idx;
|
||||
}
|
||||
jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray);
|
||||
}
|
||||
/**
|
||||
* Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
|
||||
* accessing Java memory directly, still important to release memory from C++
|
||||
*/
|
||||
private native void jniFinalizeRegion();
|
||||
|
||||
|
||||
public void finalizeRegion()
|
||||
{
|
||||
jniFinalizeRegion();
|
||||
}
|
||||
|
||||
/**
|
||||
* Real compute kernel
|
||||
*/
|
||||
private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
|
||||
JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
|
||||
|
||||
public void computeLikelihoods(final List<JNIReadDataHolderClass> reads, final List<JNIHaplotypeDataHolderClass> haplotypes) {
|
||||
//System.out.println("Region : "+reads.size()+" x "+haplotypes.size());
|
||||
long startTime = System.nanoTime();
|
||||
int readListSize = reads.size();
|
||||
int numHaplotypes = haplotypes.size();
|
||||
int numTestcases = readListSize*numHaplotypes;
|
||||
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
|
||||
int idx = 0;
|
||||
for(JNIReadDataHolderClass read : reads)
|
||||
{
|
||||
readDataArray[idx] = new JNIReadDataHolderClass();
|
||||
readDataArray[idx].readBases = read.readBases;
|
||||
readDataArray[idx].readQuals = read.readQuals;
|
||||
readDataArray[idx].insertionGOP = read.insertionGOP;
|
||||
readDataArray[idx].deletionGOP = read.deletionGOP;
|
||||
readDataArray[idx].overallGCP = read.overallGCP;
|
||||
++idx;
|
||||
}
|
||||
|
||||
double[] mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results
|
||||
setupTime += (System.nanoTime() - startTime);
|
||||
//for(reads)
|
||||
// for(haplotypes)
|
||||
// compute_full_prob()
|
||||
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
|
||||
|
||||
computeTime += (System.nanoTime() - startTime);
|
||||
}
|
||||
|
||||
/**
|
||||
* Print final profiling information from native code
|
||||
*/
|
||||
public native void jniClose();
|
||||
public void close()
|
||||
{
|
||||
System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9)+" compute time : "+(computeTime*1e-9));
|
||||
jniClose();
|
||||
}
|
||||
|
||||
public void parseSandboxFile(String filename)
|
||||
{
|
||||
File file = new File(filename);
|
||||
Scanner input = null;
|
||||
try
|
||||
{
|
||||
input = new Scanner(file);
|
||||
}
|
||||
catch(FileNotFoundException e)
|
||||
{
|
||||
System.err.println("File "+filename+" cannot be found/read");
|
||||
return;
|
||||
}
|
||||
int idx = 0;
|
||||
int numReads = 0;
|
||||
int numHaplotypes = 0;
|
||||
int readIdx = 0, testCaseIdx = 0, haplotypeIdx = 0;
|
||||
LinkedList<JNIHaplotypeDataHolderClass> haplotypeList = new LinkedList<JNIHaplotypeDataHolderClass>();
|
||||
LinkedList<JNIReadDataHolderClass> readList = new LinkedList<JNIReadDataHolderClass>();
|
||||
|
||||
byte[][] byteArray = new byte[6][];
|
||||
boolean firstLine = true;
|
||||
String[] currTokens = new String[8];
|
||||
while(input.hasNextLine())
|
||||
{
|
||||
String line = input.nextLine();
|
||||
Scanner lineScanner = new Scanner(line);
|
||||
idx = 0;
|
||||
while(lineScanner.hasNext())
|
||||
currTokens[idx++] = lineScanner.next();
|
||||
if(idx == 0)
|
||||
break;
|
||||
assert(idx >= 6);
|
||||
//start of new region
|
||||
if(idx == 8)
|
||||
{
|
||||
if(!firstLine)
|
||||
{
|
||||
initialize(haplotypeList);
|
||||
computeLikelihoods(readList, haplotypeList);
|
||||
finalizeRegion();
|
||||
}
|
||||
try
|
||||
{
|
||||
numReads = Integer.parseInt(currTokens[6]);
|
||||
}
|
||||
catch(NumberFormatException e)
|
||||
{
|
||||
numReads = 1;
|
||||
}
|
||||
try
|
||||
{
|
||||
numHaplotypes = Integer.parseInt(currTokens[7]);
|
||||
}
|
||||
catch(NumberFormatException e)
|
||||
{
|
||||
numHaplotypes = 1;
|
||||
}
|
||||
haplotypeIdx = readIdx = testCaseIdx = 0;
|
||||
readList.clear();
|
||||
haplotypeList.clear();
|
||||
}
|
||||
if(haplotypeIdx < numHaplotypes)
|
||||
{
|
||||
JNIHaplotypeDataHolderClass X = new JNIHaplotypeDataHolderClass();
|
||||
X.haplotypeBases = currTokens[0].getBytes();
|
||||
haplotypeList.add(X);
|
||||
}
|
||||
if(testCaseIdx%numHaplotypes == 0)
|
||||
{
|
||||
JNIReadDataHolderClass X = new JNIReadDataHolderClass();
|
||||
X.readBases = currTokens[1].getBytes();
|
||||
for(int i=2;i<6;++i)
|
||||
{
|
||||
byteArray[i] = currTokens[i].getBytes();
|
||||
for(int j=0;j<byteArray[i].length;++j)
|
||||
byteArray[i][j] -= 33; //normalize
|
||||
}
|
||||
X.readQuals = byteArray[2];
|
||||
X.insertionGOP = byteArray[3];
|
||||
X.deletionGOP = byteArray[4];
|
||||
X.overallGCP = byteArray[5];
|
||||
readList.add(X);
|
||||
}
|
||||
++testCaseIdx;
|
||||
++haplotypeIdx;
|
||||
|
||||
lineScanner.close();
|
||||
firstLine = false;
|
||||
}
|
||||
if(haplotypeList.size() > 0 && readList.size() > 0)
|
||||
{
|
||||
initialize(haplotypeList);
|
||||
computeLikelihoods(readList, haplotypeList);
|
||||
finalizeRegion();
|
||||
}
|
||||
|
||||
close();
|
||||
input.close();
|
||||
}
|
||||
|
||||
private native void doEverythingNative(String filename);
|
||||
|
||||
public static void main(String[] args)
|
||||
{
|
||||
if(args.length <= 0)
|
||||
{
|
||||
System.err.println("Needs 1 argument - <filename>");
|
||||
System.exit(-1);
|
||||
}
|
||||
//// Get runtime
|
||||
//java.lang.Runtime rt = java.lang.Runtime.getRuntime();
|
||||
//// Start a new process: UNIX command ls
|
||||
//String cmd = "/home/karthikg/broad/gsa-unstable/public/c++/VectorPairHMM/checker "+args[0];
|
||||
//try
|
||||
//{
|
||||
//System.out.println(cmd);
|
||||
//java.lang.Process p = rt.exec(cmd);
|
||||
//try
|
||||
//{
|
||||
//p.waitFor();
|
||||
//java.io.InputStream is = p.getInputStream();
|
||||
//java.io.BufferedReader reader = new java.io.BufferedReader(new InputStreamReader(is));
|
||||
//// And print each line
|
||||
//String s = null;
|
||||
//while ((s = reader.readLine()) != null) {
|
||||
//System.out.println(s);
|
||||
//}
|
||||
//is.close();
|
||||
//}
|
||||
//catch(InterruptedException e)
|
||||
//{
|
||||
//System.err.println(e);
|
||||
//}
|
||||
//}
|
||||
//catch(IOException e)
|
||||
//{
|
||||
//System.err.println(e);
|
||||
//}
|
||||
Sandbox t = new Sandbox();
|
||||
//t.doEverythingNative(args[0]);
|
||||
t.parseSandboxFile(args[0]);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class Sandbox_JNIHaplotypeDataHolderClass */
|
||||
|
||||
#ifndef _Included_Sandbox_JNIHaplotypeDataHolderClass
|
||||
#define _Included_Sandbox_JNIHaplotypeDataHolderClass
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class Sandbox_JNIReadDataHolderClass */
|
||||
|
||||
#ifndef _Included_Sandbox_JNIReadDataHolderClass
|
||||
#define _Included_Sandbox_JNIReadDataHolderClass
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_SSE
|
||||
|
||||
#define SIMD_ENGINE avx
|
||||
#define SIMD_ENGINE_AVX
|
||||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
template double compute_full_prob_avxd<double>(testcase* tc, double* nextlog);
|
||||
template float compute_full_prob_avxs<float>(testcase* tc, float* nextlog);
|
||||
|
||||
|
|
@ -0,0 +1,167 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
using namespace std;
|
||||
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||
{
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1;
|
||||
int COLS = tc->haplen + 1;
|
||||
|
||||
Context<NUMBER> ctx;
|
||||
//#define USE_STACK_ALLOCATION 1
|
||||
#ifdef USE_STACK_ALLOCATION
|
||||
NUMBER M[ROWS][COLS];
|
||||
NUMBER X[ROWS][COLS];
|
||||
NUMBER Y[ROWS][COLS];
|
||||
NUMBER p[ROWS][6];
|
||||
#else
|
||||
//allocate on heap in way that simulates a 2D array. Having a 2D array instead of
|
||||
//a straightforward array of pointers ensures that all data lies 'close' in memory, increasing
|
||||
//the chance of being stored together in the cache. Also, prefetchers can learn memory access
|
||||
//patterns for 2D arrays, not possible for array of pointers
|
||||
//bool locally_allocated = false;
|
||||
//NUMBER* common_buffer = 0;
|
||||
NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||
//unsigned curr_size = sizeof(NUMBER)*(3*ROWS*COLS + ROWS*6);
|
||||
//if(true)
|
||||
//{
|
||||
//common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||
//locally_allocated = true;
|
||||
//}
|
||||
//else
|
||||
//common_buffer = (NUMBER*)(g_load_time_initializer.get_buffer());
|
||||
//pointers to within the allocated buffer
|
||||
NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS];
|
||||
NUMBER* ptr = common_buffer;
|
||||
unsigned i = 0;
|
||||
for(i=0;i<3*ROWS;++i, ptr+=COLS)
|
||||
common_pointer_buffer[i] = ptr;
|
||||
for(;i<4*ROWS;++i, ptr+=6)
|
||||
common_pointer_buffer[i] = ptr;
|
||||
|
||||
NUMBER** M = common_pointer_buffer;
|
||||
NUMBER** X = M + ROWS;
|
||||
NUMBER** Y = X + ROWS;
|
||||
NUMBER** p = Y + ROWS;
|
||||
#endif
|
||||
|
||||
|
||||
p[0][MM] = ctx._(0.0);
|
||||
p[0][GapM] = ctx._(0.0);
|
||||
p[0][MX] = ctx._(0.0);
|
||||
p[0][XX] = ctx._(0.0);
|
||||
p[0][MY] = ctx._(0.0);
|
||||
p[0][YY] = ctx._(0.0);
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _i = tc->i[r-1] & 127;
|
||||
int _d = tc->d[r-1] & 127;
|
||||
int _c = tc->c[r-1] & 127;
|
||||
//p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||
SET_MATCH_TO_MATCH_PROB(p[r][MM], _i, _d);
|
||||
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
p[r][MX] = ctx.ph2pr[_i];
|
||||
p[r][XX] = ctx.ph2pr[_c];
|
||||
p[r][MY] = ctx.ph2pr[_d];
|
||||
p[r][YY] = ctx.ph2pr[_c];
|
||||
//p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
//p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
}
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
M[0][c] = ctx._(0.0);
|
||||
X[0][c] = ctx._(0.0);
|
||||
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
}
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
M[r][0] = ctx._(0.0);
|
||||
X[r][0] = X[r-1][0] * p[r][XX];
|
||||
Y[r][0] = ctx._(0.0);
|
||||
}
|
||||
|
||||
NUMBER result = ctx._(0.0);
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
for (c = 1; c < COLS; c++)
|
||||
{
|
||||
fexcept_t flagp;
|
||||
char _rs = tc->rs[r-1];
|
||||
char _hap = tc->hap[c-1];
|
||||
int _q = tc->q[r-1] & 127;
|
||||
NUMBER distm = ctx.ph2pr[_q];
|
||||
if (_rs == _hap || _rs == 'N' || _hap == 'N')
|
||||
distm = ctx._(1.0) - distm;
|
||||
else
|
||||
distm = distm/3;
|
||||
|
||||
|
||||
//feclearexcept(FE_ALL_EXCEPT);
|
||||
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
|
||||
//STORE_FP_EXCEPTIONS(flagp, exceptions_array);
|
||||
|
||||
//feclearexcept(FE_ALL_EXCEPT);
|
||||
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
|
||||
//STORE_FP_EXCEPTIONS(flagp, exceptions_array);
|
||||
|
||||
//feclearexcept(FE_ALL_EXCEPT);
|
||||
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
|
||||
//STORE_FP_EXCEPTIONS(flagp, exceptions_array);
|
||||
|
||||
//CONVERT_AND_PRINT(M[r][c]);
|
||||
//CONVERT_AND_PRINT(X[r][c]);
|
||||
//CONVERT_AND_PRINT(Y[r][c]);
|
||||
|
||||
}
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
result += M[ROWS-1][c] + X[ROWS-1][c];
|
||||
}
|
||||
|
||||
if (before_last_log != NULL)
|
||||
*before_last_log = result;
|
||||
|
||||
#ifndef USE_STACK_ALLOCATION
|
||||
delete[] common_pointer_buffer;
|
||||
//if(locally_allocated)
|
||||
delete[] common_buffer;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
//return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||
}
|
||||
|
||||
template double compute_full_prob<double>(testcase* tc, double* nextbuf);
|
||||
template float compute_full_prob<float>(testcase* tc, float* nextbuf);
|
||||
|
||||
|
|
@ -0,0 +1,205 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef SIMD_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
||||
#define PRECISION d
|
||||
#define MAIN_TYPE double
|
||||
#define MAIN_TYPE_SIZE 64
|
||||
#define UNION_TYPE mix_D
|
||||
#define IF_128 IF_128d
|
||||
#define IF_MAIN_TYPE IF_64
|
||||
#define SHIFT_CONST1 8
|
||||
#define SHIFT_CONST2 1
|
||||
#define SHIFT_CONST3 8
|
||||
#define _128_TYPE __m128d
|
||||
#define SIMD_TYPE __m256d
|
||||
#define _256_INT_TYPE __m256i
|
||||
#define AVX_LENGTH 4
|
||||
#define HAP_TYPE __m128i
|
||||
#define MASK_TYPE uint64_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFF
|
||||
#define MASK_VEC MaskVec_D
|
||||
|
||||
#define SET_VEC_ZERO(__vec) \
|
||||
__vec= _mm256_setzero_pd()
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm256_or_pd(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm256_add_pd(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm256_sub_pd(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_pd(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm256_div_pd(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm256_blend_pd(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm256_blendv_pd(__v1, __v2, __maskV)
|
||||
|
||||
#define VEC_CAST_256_128(__v1) \
|
||||
_mm256_castpd256_pd128 (__v1)
|
||||
|
||||
#define VEC_EXTRACT_128(__v1, __im) \
|
||||
_mm256_extractf128_pd (__v1, __im)
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi64(__v1, __im)
|
||||
|
||||
#define VEC_SET1_VAL128(__val) \
|
||||
_mm_set1_pd(__val)
|
||||
|
||||
#define VEC_MOVE(__v1, __val) \
|
||||
_mm_move_sd(__v1, __val)
|
||||
|
||||
#define VEC_CAST_128_256(__v1) \
|
||||
_mm256_castpd128_pd256(__v1)
|
||||
|
||||
#define VEC_INSERT_VAL(__v1, __val, __pos) \
|
||||
_mm256_insertf128_pd(__v1, __val, __pos)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm256_cvtepi32_pd(__v1)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm256_set1_pd(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm256_cvtepi32_pd(_mm_set1_epi32(__ch))
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm256_cvtepi32_pd(_mm_load_si128((__m128i const *)__addr))
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm256_cmp_pd(__v1, __v2, _CMP_EQ_OQ)
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm256_set_pd(zero, zero, zero, __val);
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
__v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0)
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm256_castpd128_pd256(__vsLow) ; \
|
||||
__vdst = _mm256_insertf128_pd(__vdst, __vsHigh, 1) ;
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi64(__vs, 1)
|
||||
|
||||
|
||||
#define COMPARE_VECS(__v1, __v2, __first, __last) { \
|
||||
double* ptr1 = (double*) (&__v1) ; \
|
||||
double* ptr2 = (double*) (&__v2) ; \
|
||||
for (int ei=__first; ei <= __last; ++ei) { \
|
||||
if (ptr1[ei] != ptr2[ei]) { \
|
||||
std::cout << "Double Mismatch at " << ei << ": " \
|
||||
<< ptr1[ei] << " vs. " << ptr2[ei] << std::endl ; \
|
||||
exit(0) ; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
class BitMaskVec_double {
|
||||
|
||||
MASK_VEC low_, high_ ;
|
||||
SIMD_TYPE combined_ ;
|
||||
|
||||
public:
|
||||
inline MASK_TYPE& getLowEntry(int index) {
|
||||
return low_.masks[index] ;
|
||||
}
|
||||
inline MASK_TYPE& getHighEntry(int index) {
|
||||
return high_.masks[index] ;
|
||||
}
|
||||
|
||||
inline const SIMD_TYPE& getCombinedMask() {
|
||||
VEC_SSE_TO_AVX(low_.vecf, high_.vecf, combined_) ;
|
||||
return combined_ ;
|
||||
}
|
||||
|
||||
inline void shift_left_1bit() {
|
||||
VEC_SHIFT_LEFT_1BIT(low_.vec) ;
|
||||
VEC_SHIFT_LEFT_1BIT(high_.vec) ;
|
||||
}
|
||||
|
||||
} ;
|
||||
|
||||
#define BITMASK_VEC BitMaskVec_double
|
||||
|
|
@ -0,0 +1,206 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef SIMD_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
||||
#define PRECISION s
|
||||
|
||||
#define MAIN_TYPE float
|
||||
#define MAIN_TYPE_SIZE 32
|
||||
#define UNION_TYPE mix_F
|
||||
#define IF_128 IF_128f
|
||||
#define IF_MAIN_TYPE IF_32
|
||||
#define SHIFT_CONST1 12
|
||||
#define SHIFT_CONST2 3
|
||||
#define SHIFT_CONST3 4
|
||||
#define _128_TYPE __m128
|
||||
#define SIMD_TYPE __m256
|
||||
#define _256_INT_TYPE __m256i
|
||||
#define AVX_LENGTH 8
|
||||
#define HAP_TYPE UNION_TYPE
|
||||
#define MASK_TYPE uint32_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFF
|
||||
#define MASK_VEC MaskVec_F
|
||||
|
||||
#define SET_VEC_ZERO(__vec) \
|
||||
__vec= _mm256_setzero_ps()
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm256_or_ps(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm256_add_ps(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm256_sub_ps(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm256_div_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm256_blend_ps(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm256_blendv_ps(__v1, __v2, __maskV)
|
||||
|
||||
#define VEC_CAST_256_128(__v1) \
|
||||
_mm256_castps256_ps128 (__v1)
|
||||
|
||||
#define VEC_EXTRACT_128(__v1, __im) \
|
||||
_mm256_extractf128_ps (__v1, __im)
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi32(__v1, __im)
|
||||
|
||||
#define VEC_SET1_VAL128(__val) \
|
||||
_mm_set1_ps(__val)
|
||||
|
||||
#define VEC_MOVE(__v1, __val) \
|
||||
_mm_move_ss(__v1, __val)
|
||||
|
||||
#define VEC_CAST_128_256(__v1) \
|
||||
_mm256_castps128_ps256(__v1)
|
||||
|
||||
#define VEC_INSERT_VAL(__v1, __val, __pos) \
|
||||
_mm256_insertf128_ps(__v1, __val, __pos)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm256_cvtepi32_ps(__v1.i)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm256_set1_ps(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm256_cvtepi32_ps(_mm256_set1_epi32(__ch))
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm256_cvtepi32_ps(_mm256_loadu_si256((__m256i const *)__addr))
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm256_cmp_ps(__v1, __v2, _CMP_EQ_OQ)
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm256_set_ps(zero, zero, zero, zero, zero, zero, zero, __val);
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
_vector_shift_lastavxs(__v1, __val.f);
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm256_castps128_ps256(__vsLow) ; \
|
||||
__vdst = _mm256_insertf128_ps(__vdst, __vsHigh, 1) ;
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi32(__vs, 1)
|
||||
|
||||
#define COMPARE_VECS(__v1, __v2, __first, __last) { \
|
||||
float* ptr1 = (float*) (&__v1) ; \
|
||||
float* ptr2 = (float*) (&__v2) ; \
|
||||
for (int ei=__first; ei <= __last; ++ei) { \
|
||||
if (ptr1[ei] != ptr2[ei]) { \
|
||||
std::cout << "Float Mismatch at " << ei << ": " \
|
||||
<< ptr1[ei] << " vs. " << ptr2[ei] << std::endl ; \
|
||||
exit(0) ; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
class BitMaskVec_float {
|
||||
|
||||
MASK_VEC low_, high_ ;
|
||||
SIMD_TYPE combined_ ;
|
||||
|
||||
public:
|
||||
|
||||
inline MASK_TYPE& getLowEntry(int index) {
|
||||
return low_.masks[index] ;
|
||||
}
|
||||
inline MASK_TYPE& getHighEntry(int index) {
|
||||
return high_.masks[index] ;
|
||||
}
|
||||
|
||||
inline const SIMD_TYPE& getCombinedMask() {
|
||||
VEC_SSE_TO_AVX(low_.vecf, high_.vecf, combined_) ;
|
||||
return combined_ ;
|
||||
}
|
||||
|
||||
inline void shift_left_1bit() {
|
||||
VEC_SHIFT_LEFT_1BIT(low_.vec) ;
|
||||
VEC_SHIFT_LEFT_1BIT(high_.vec) ;
|
||||
}
|
||||
|
||||
} ;
|
||||
|
||||
#define BITMASK_VEC BitMaskVec_float
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef SIMD_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_INSERT_UNIT(__v1,__ins,__im)
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
||||
#define SSE
|
||||
#define PRECISION d
|
||||
|
||||
#define MAIN_TYPE double
|
||||
#define MAIN_TYPE_SIZE 64
|
||||
#define UNION_TYPE mix_D128
|
||||
#define IF_128 IF_128d
|
||||
#define IF_MAIN_TYPE IF_64
|
||||
#define SHIFT_CONST1 1
|
||||
#define SHIFT_CONST2 8
|
||||
#define SHIFT_CONST3 0
|
||||
#define _128_TYPE __m128d
|
||||
#define SIMD_TYPE __m128d
|
||||
#define _256_INT_TYPE __m128i
|
||||
#define AVX_LENGTH 2
|
||||
#define HAP_TYPE __m128i
|
||||
#define MASK_TYPE uint64_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFFL
|
||||
#define MASK_VEC MaskVec_D
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi64(__v1, __im)
|
||||
|
||||
#define VEC_INSERT_UNIT(__v1,__ins,__im) \
|
||||
_mm_insert_epi64(__v1,__ins,__im)
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm_or_pd(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm_add_pd(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm_sub_pd(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm_mul_pd(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm_div_pd(__v1, __v2)
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm_cmpeq_pd(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm_blend_pd(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm_blendv_pd(__v1, __v2, __maskV)
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
__v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm_cvtepi32_pd(__v1)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm_set1_pd(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm_cvtepi32_pd(_mm_set1_epi32(__ch))
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm_set_pd(zero, __val);
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm_cvtepi32_pd(_mm_loadu_si128((__m128i const *)__addr))
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm_castsi128_pd(_mm_set_epi64(__vsHigh, __vsLow))
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi64(__vs, 1)
|
||||
|
||||
|
||||
class BitMaskVec_sse_double {
|
||||
|
||||
MASK_VEC combined_ ;
|
||||
public:
|
||||
inline MASK_TYPE& getLowEntry(int index) {
|
||||
return combined_.masks[index] ;
|
||||
}
|
||||
inline MASK_TYPE& getHighEntry(int index) {
|
||||
return combined_.masks[AVX_LENGTH/2+index] ;
|
||||
}
|
||||
|
||||
inline const SIMD_TYPE& getCombinedMask() {
|
||||
return combined_.vecf ;
|
||||
}
|
||||
|
||||
inline void shift_left_1bit() {
|
||||
VEC_SHIFT_LEFT_1BIT(combined_.vec) ;
|
||||
}
|
||||
|
||||
} ;
|
||||
|
||||
#define BITMASK_VEC BitMaskVec_sse_double
|
||||
|
||||
|
|
@ -0,0 +1,173 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef SIMD_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_INSERT_UNIT(__v1,__ins,__im)
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
#undef BITMASK_VEC
|
||||
#endif
|
||||
|
||||
#define SSE
|
||||
#define PRECISION s
|
||||
|
||||
#define MAIN_TYPE float
|
||||
#define MAIN_TYPE_SIZE 32
|
||||
#define UNION_TYPE mix_F128
|
||||
#define IF_128 IF_128f
|
||||
#define IF_MAIN_TYPE IF_32
|
||||
#define SHIFT_CONST1 3
|
||||
#define SHIFT_CONST2 4
|
||||
#define SHIFT_CONST3 0
|
||||
#define _128_TYPE __m128
|
||||
#define SIMD_TYPE __m128
|
||||
#define _256_INT_TYPE __m128i
|
||||
#define AVX_LENGTH 4
|
||||
//#define MAVX_COUNT (MROWS+3)/AVX_LENGTH
|
||||
#define HAP_TYPE UNION_TYPE
|
||||
#define MASK_TYPE uint32_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFF
|
||||
#define MASK_VEC MaskVec_F
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi32(__v1, __im)
|
||||
|
||||
#define VEC_INSERT_UNIT(__v1,__ins,__im) \
|
||||
_mm_insert_epi32(__v1,__ins,__im)
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm_or_ps(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm_add_ps(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm_sub_ps(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm_div_ps(__v1, __v2)
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm_cmpeq_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm_blend_ps(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm_blendv_ps(__v1, __v2, __maskV)
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
_vector_shift_lastsses(__v1, __val.f)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm_cvtepi32_ps(__v1.i)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm_set1_ps(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm_cvtepi32_ps(_mm_set1_epi32(__ch))
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm_set_ps(zero, zero, zero, __val);
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm_cvtepi32_ps(_mm_loadu_si128((__m128i const *)__addr))
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm_cvtpi32x2_ps(__vsLow, __vsHigh)
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi32(__vs, 1)
|
||||
|
||||
class BitMaskVec_sse_float {
|
||||
|
||||
MASK_VEC combined_ ;
|
||||
|
||||
public:
|
||||
inline MASK_TYPE& getLowEntry(int index) {
|
||||
return combined_.masks[index] ;
|
||||
}
|
||||
inline MASK_TYPE& getHighEntry(int index) {
|
||||
return combined_.masks[AVX_LENGTH/2+index] ;
|
||||
}
|
||||
|
||||
inline const SIMD_TYPE& getCombinedMask() {
|
||||
return combined_.vecf ;
|
||||
}
|
||||
|
||||
inline void shift_left_1bit() {
|
||||
VEC_SHIFT_LEFT_1BIT(combined_.vec) ;
|
||||
}
|
||||
|
||||
} ;
|
||||
|
||||
#define BITMASK_VEC BitMaskVec_sse_float
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef COMMON_HEADERS_H
|
||||
#define COMMON_HEADERS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <fenv.h>
|
||||
|
||||
extern uint64_t exceptions_array[128];
|
||||
extern FILE* g_debug_fptr;
|
||||
#define STORE_FP_EXCEPTIONS(flagp, exceptions_array) \
|
||||
fegetexceptflag(&flagp, FE_ALL_EXCEPT | __FE_DENORM); \
|
||||
exceptions_array[FE_INVALID] += ((flagp & FE_INVALID)); \
|
||||
exceptions_array[__FE_DENORM] += ((flagp & __FE_DENORM) >> 1); \
|
||||
exceptions_array[FE_DIVBYZERO] += ((flagp & FE_DIVBYZERO) >> 2); \
|
||||
exceptions_array[FE_OVERFLOW] += ((flagp & FE_OVERFLOW) >> 3); \
|
||||
exceptions_array[FE_UNDERFLOW] += ((flagp & FE_UNDERFLOW) >> 4); \
|
||||
feclearexcept(FE_ALL_EXCEPT | __FE_DENORM);
|
||||
|
||||
#define CONVERT_AND_PRINT(X) \
|
||||
g_converter.f = (X); \
|
||||
fwrite(&(g_converter.i),4,1,g_debug_fptr); \
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,60 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef JNI_COMMON_H
|
||||
#define JNI_COMMON_H
|
||||
|
||||
/*#define SINGLE_THREADED_ONLY 1*/
|
||||
#include <jni.h>
|
||||
/*#define ENABLE_ASSERTIONS 1*/
|
||||
#ifdef SINGLE_THREADED_ONLY
|
||||
#define DO_PROFILING 1
|
||||
#endif
|
||||
/*#define DEBUG0_1 1*/
|
||||
/*#define DEBUG3 1*/
|
||||
/*#define DUMP_TO_SANDBOX 1*/
|
||||
|
||||
|
||||
#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1
|
||||
|
||||
#ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY
|
||||
//Gets direct access to Java arrays
|
||||
#define GET_BYTE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical
|
||||
#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical
|
||||
#define JNI_RO_RELEASE_MODE JNI_ABORT
|
||||
#define GET_DOUBLE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical
|
||||
#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical
|
||||
|
||||
#else
|
||||
//Likely makes copy of Java arrays to JNI C++ space
|
||||
#define GET_BYTE_ARRAY_ELEMENTS env->GetByteArrayElements
|
||||
#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleaseByteArrayElements
|
||||
#define JNI_RO_RELEASE_MODE JNI_ABORT
|
||||
#define GET_DOUBLE_ARRAY_ELEMENTS env->GetDoubleArrayElements
|
||||
#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleaseDoubleArrayElements
|
||||
|
||||
#endif //ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY
|
||||
|
||||
#endif //ifndef JNI_COMMON_H
|
||||
|
|
@ -0,0 +1,191 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef JNI_DEBUG_H
|
||||
#define JNI_DEBUG_H
|
||||
|
||||
template<class NUMBER>
|
||||
class DataHolder
|
||||
{
|
||||
#define INIT_MATRIX(X) \
|
||||
X = new NUMBER*[m_paddedMaxReadLength]; \
|
||||
for(int i=0;i<m_paddedMaxReadLength;++i) \
|
||||
{ \
|
||||
X[i] = new NUMBER[m_paddedMaxHaplotypeLength]; \
|
||||
for(int j=0;j<m_paddedMaxHaplotypeLength;++j) \
|
||||
X[i][j] = (NUMBER)0; \
|
||||
}
|
||||
|
||||
#define FREE_MATRIX(X) \
|
||||
for(int i=0;i<m_paddedMaxReadLength;++i) \
|
||||
delete[] X[i]; \
|
||||
delete[] X;
|
||||
|
||||
public:
|
||||
DataHolder() { m_is_initialized = false; }
|
||||
void initialize(int readMaxLength, int haplotypeMaxLength)
|
||||
{
|
||||
if(m_is_initialized)
|
||||
{
|
||||
FREE_MATRIX(m_matchMatrix);
|
||||
FREE_MATRIX(m_insertionMatrix);
|
||||
FREE_MATRIX(m_deletionMatrix);
|
||||
FREE_MATRIX(m_prior);
|
||||
delete[] m_transition;
|
||||
}
|
||||
|
||||
m_readMaxLength = readMaxLength;
|
||||
m_haplotypeMaxLength = haplotypeMaxLength;
|
||||
m_paddedMaxReadLength = readMaxLength + 1;
|
||||
m_paddedMaxHaplotypeLength = haplotypeMaxLength + 1;
|
||||
|
||||
INIT_MATRIX(m_matchMatrix);
|
||||
INIT_MATRIX(m_insertionMatrix);
|
||||
INIT_MATRIX(m_deletionMatrix);
|
||||
INIT_MATRIX(m_prior);
|
||||
m_transition = new NUMBER[m_paddedMaxReadLength][6];
|
||||
for(int i=0;i<m_paddedMaxReadLength;++i)
|
||||
for(int j=0;j<6;++j)
|
||||
m_transition[i][j] = (NUMBER)0;
|
||||
m_is_initialized = true;
|
||||
}
|
||||
|
||||
//Corresponds to initializeProbabilities
|
||||
void initializeProbabilities(jint length, jbyte* insertionGOP, jbyte* deletionGOP, jbyte* overallGCP)
|
||||
{
|
||||
static unsigned g_num_prob_init = 0;
|
||||
Context<NUMBER> ctx;
|
||||
for (int r = 1; r <= length;r++) //in original code, r < ROWS (where ROWS = paddedReadLength)
|
||||
{
|
||||
int _i = insertionGOP[r-1]; //insertionGOP
|
||||
int _d = deletionGOP[r-1]; //deletionGOP
|
||||
int _c = overallGCP[r-1]; //overallGCP
|
||||
m_transition[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127]; //lines 161-162
|
||||
m_transition[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c]; //line 163
|
||||
m_transition[r][MX] = ctx.ph2pr[_i]; //164
|
||||
m_transition[r][XX] = ctx.ph2pr[_c]; //165
|
||||
m_transition[r][MY] = ctx.ph2pr[_d];//last row seems different, compared to line 166
|
||||
m_transition[r][YY] = ctx.ph2pr[_c];//same as above for line 167
|
||||
//m_transition[r][MY] = (r == length) ? ctx._(1.0) : ctx.ph2pr[_d];//last row seems different, compared to line 166
|
||||
//m_transition[r][YY] = (r == length) ? ctx._(1.0) : ctx.ph2pr[_c];//same as above for line 167
|
||||
#ifdef DEBUG3
|
||||
for(int j=0;j<6;++j)
|
||||
debug_dump("transitions_jni.txt", to_string(m_transition[r][j]),true);
|
||||
#endif
|
||||
}
|
||||
++g_num_prob_init;
|
||||
}
|
||||
bool m_is_initialized;
|
||||
int m_readMaxLength;
|
||||
int m_haplotypeMaxLength;
|
||||
int m_paddedMaxReadLength;
|
||||
int m_paddedMaxHaplotypeLength;
|
||||
NUMBER** m_matchMatrix;
|
||||
NUMBER** m_insertionMatrix;
|
||||
NUMBER** m_deletionMatrix;
|
||||
NUMBER** m_prior;
|
||||
NUMBER (*m_transition)[6];
|
||||
};
|
||||
extern DataHolder<double> g_double_dataholder;
|
||||
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER** M, NUMBER** X, NUMBER** Y, NUMBER (*p)[6],
|
||||
bool do_initialization, jint hapStartIndex, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1; //ROWS = paddedReadLength
|
||||
int COLS = tc->haplen + 1; //COLS = paddedHaplotypeLength
|
||||
|
||||
Context<NUMBER> ctx;
|
||||
//////NOTES
|
||||
////ctx.ph2pr[quality]; //This quantity is QualityUtils.qualToErrorProb(quality)
|
||||
////1-ctx.ph2pr[quality]; //This corresponds to QualityUtils.qualToProb(quality);
|
||||
|
||||
//Initialization
|
||||
if(do_initialization)
|
||||
{
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
M[0][c] = ctx._(0.0);
|
||||
X[0][c] = ctx._(0.0);
|
||||
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen); //code from 87-90 in LoglessPairHMM
|
||||
}
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
M[r][0] = ctx._(0.0);
|
||||
//deletionMatrix row 0 in above nest is initialized in the Java code
|
||||
//However, insertionMatrix column 0 is not initialized in Java code, could it be that
|
||||
//values are re-used from a previous iteration?
|
||||
//Why even do this, X[0][0] = 0 from above loop nest, X[idx][0] = 0 from this computation
|
||||
X[r][0] = X[r-1][0] * p[r][XX];
|
||||
Y[r][0] = ctx._(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
for (c = hapStartIndex+1; c < COLS; c++)
|
||||
{
|
||||
//The following lines correspond to initializePriors()
|
||||
char _rs = tc->rs[r-1]; //line 137
|
||||
char _hap = tc->hap[c-1]; //line 140
|
||||
//int _q = tc->q[r-1] & 127; //line 138 - q is the quality (qual), should be byte hence int ANDed with 0xFF
|
||||
int _q = tc->q[r-1]; //line 138 - q is the quality (qual), should be byte hence int ANDed with 0xFF
|
||||
NUMBER distm = ctx.ph2pr[_q]; //This quantity is QualityUtils.qualToErrorProb(_q)
|
||||
//The assumption here is that doNotUseTristateCorrection is true
|
||||
//TOASK
|
||||
if (_rs == _hap || _rs == 'N' || _hap == 'N')
|
||||
distm = ctx._(1.0) - distm; //This is the quantity QualityUtils.qualToProb(qual)
|
||||
else
|
||||
distm = distm/3;
|
||||
#ifdef DEBUG3
|
||||
debug_dump("priors_jni.txt",to_string(distm),true);
|
||||
#endif
|
||||
|
||||
//Computation inside updateCell
|
||||
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
|
||||
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
|
||||
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
|
||||
#ifdef DEBUG3
|
||||
debug_dump("matrices_jni.txt",to_string(M[r][c]),true);
|
||||
debug_dump("matrices_jni.txt",to_string(X[r][c]),true);
|
||||
debug_dump("matrices_jni.txt",to_string(Y[r][c]),true);
|
||||
#endif
|
||||
}
|
||||
|
||||
NUMBER result = ctx._(0.0);
|
||||
for (c = 0; c < COLS; c++)
|
||||
result += M[ROWS-1][c] + X[ROWS-1][c];
|
||||
|
||||
if (before_last_log != NULL)
|
||||
*before_last_log = result;
|
||||
|
||||
#ifdef DEBUG
|
||||
debug_dump("return_values_jni.txt",to_string(ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT),true);
|
||||
#endif
|
||||
return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,176 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "headers.h"
|
||||
#include "jni_common.h"
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
#include "jnidebug.h"
|
||||
DataHolder<double> g_double_dataholder;
|
||||
|
||||
using namespace std;
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitialize
|
||||
(JNIEnv* env, jobject thisObject,
|
||||
jint readMaxLength, jint haplotypeMaxLength)
|
||||
{
|
||||
static int g_num_init_calls = 0;
|
||||
#ifdef DEBUG3
|
||||
cout << "Entered alloc initialized .. readMaxLength "<<readMaxLength<<" haplotypeMaxLength "<<haplotypeMaxLength<<"\n";
|
||||
#endif
|
||||
g_double_dataholder.initialize(readMaxLength, haplotypeMaxLength);
|
||||
#ifdef DEBUG3
|
||||
debug_dump("lengths_jni.txt", to_string(readMaxLength)+" "+to_string(haplotypeMaxLength),true);
|
||||
#endif
|
||||
++g_num_init_calls;
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializeProbabilities
|
||||
(JNIEnv* env, jclass thisObject,
|
||||
jobjectArray transition, jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP
|
||||
)
|
||||
{
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
jsize length = (env)->GetArrayLength(insertionGOP);
|
||||
#ifdef DEBUG3
|
||||
cout << "Entered initializeProbabilities .. length "<<length<<"\n";
|
||||
#endif
|
||||
jbyte* insertionGOPArray = (env)->GetByteArrayElements(insertionGOP, &is_copy);
|
||||
jbyte* deletionGOPArray = (env)->GetByteArrayElements(deletionGOP, &is_copy);
|
||||
jbyte* overallGCPArray = (env)->GetByteArrayElements(overallGCP, &is_copy);
|
||||
#ifdef DEBUG
|
||||
if(insertionGOPArray == 0)
|
||||
cerr << "insertionGOP array not initialized in JNI\n";
|
||||
////assert(insertionGOPArray && "insertionGOP array not initialized in JNI");
|
||||
if(deletionGOPArray == 0)
|
||||
cerr << "deletionGOP array not initialized in JNI\n";
|
||||
////assert(deletionGOPArray && "deletionGOP array not initialized in JNI");
|
||||
assert(overallGCPArray && "OverallGCP array not initialized in JNI");
|
||||
#endif
|
||||
|
||||
g_double_dataholder.initializeProbabilities(length, insertionGOPArray, deletionGOPArray, overallGCPArray);
|
||||
|
||||
env->ReleaseByteArrayElements(overallGCP, overallGCPArray, JNI_ABORT);
|
||||
env->ReleaseByteArrayElements(deletionGOP, deletionGOPArray, JNI_ABORT);
|
||||
env->ReleaseByteArrayElements(insertionGOP, insertionGOPArray, JNI_ABORT);
|
||||
}
|
||||
|
||||
JNIEXPORT jdouble JNICALL
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializePriorsAndUpdateCells(
|
||||
JNIEnv* env, jobject thisObject,
|
||||
jboolean doInitialization, jint paddedReadLength, jint paddedHaplotypeLength,
|
||||
jbyteArray readBases, jbyteArray haplotypeBases, jbyteArray readQuals,
|
||||
jint hapStartIndex
|
||||
)
|
||||
{
|
||||
#ifdef DEBUG3
|
||||
cout << "Entered mainCompute .. doInitialization "<<(doInitialization == JNI_TRUE)<<" hapStartIndex "<<hapStartIndex<<"\n";
|
||||
cout << "mainCompute padded lengths "<< paddedReadLength << " " << paddedHaplotypeLength <<"\n";
|
||||
#endif
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
jbyte* readBasesArray = (env)->GetByteArrayElements(readBases, &is_copy);
|
||||
jbyte* haplotypeBasesArray = (env)->GetByteArrayElements(haplotypeBases, &is_copy);
|
||||
jbyte* readQualsArray = (env)->GetByteArrayElements(readQuals, &is_copy);
|
||||
#ifdef DEBUG
|
||||
assert(readBasesArray && "readBasesArray not initialized in JNI");
|
||||
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
|
||||
assert(readQualsArray && "readQualsArray not initialized in JNI");
|
||||
#endif
|
||||
testcase tc;
|
||||
|
||||
tc.rslen = paddedReadLength-1;
|
||||
tc.haplen = paddedHaplotypeLength-1;
|
||||
|
||||
tc.rs = (char*)readBasesArray;
|
||||
tc.hap = (char*)haplotypeBasesArray;
|
||||
tc.q = (char*)readQualsArray; //TOASK - q is now char*
|
||||
|
||||
compute_full_prob<double>(&tc, g_double_dataholder.m_matchMatrix, g_double_dataholder.m_insertionMatrix,
|
||||
g_double_dataholder.m_deletionMatrix, g_double_dataholder.m_transition,
|
||||
doInitialization == JNI_TRUE, hapStartIndex, NULL);
|
||||
|
||||
env->ReleaseByteArrayElements(readBases, readBasesArray, JNI_ABORT);
|
||||
env->ReleaseByteArrayElements(haplotypeBases, haplotypeBasesArray, JNI_ABORT);
|
||||
env->ReleaseByteArrayElements(readQuals, readQualsArray, JNI_ABORT);
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
JNIEXPORT jdouble JNICALL
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10(
|
||||
JNIEnv* env, jobject thisObject,
|
||||
jint readLength, jint haplotypeLength,
|
||||
jbyteArray readBases, jbyteArray haplotypeBases, jbyteArray readQuals,
|
||||
jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP,
|
||||
jint hapStartIndex
|
||||
)
|
||||
{
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
jbyte* readBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readBases, &is_copy);
|
||||
jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBases, &is_copy);
|
||||
jbyte* readQualsArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readQuals, &is_copy);
|
||||
jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy);
|
||||
jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy);
|
||||
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
|
||||
#ifdef DEBUG
|
||||
assert(readBasesArray && "readBasesArray not initialized in JNI");
|
||||
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
|
||||
assert(readQualsArray && "readQualsArray not initialized in JNI");
|
||||
assert(insertionGOPArray && "insertionGOP array not initialized in JNI");
|
||||
assert(deletionGOPArray && "deletionGOP array not initialized in JNI");
|
||||
assert(overallGCPArray && "OverallGCP array not initialized in JNI");
|
||||
//assert(readLength < MROWS);
|
||||
#endif
|
||||
testcase tc;
|
||||
tc.rslen = readLength;
|
||||
tc.haplen = haplotypeLength;
|
||||
tc.rs = (char*)readBasesArray;
|
||||
tc.hap = (char*)haplotypeBasesArray;
|
||||
for(unsigned i=0;i<readLength;++i)
|
||||
{
|
||||
tc.q[i] = (int)readQualsArray[i];
|
||||
tc.i[i] = (int)insertionGOPArray[i];
|
||||
tc.d[i] = (int)deletionGOPArray[i];
|
||||
tc.c[i] = (int)overallGCPArray[i];
|
||||
}
|
||||
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
#ifdef DEBUG
|
||||
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(result),true);
|
||||
#endif
|
||||
|
||||
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBases, haplotypeBasesArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readBases, readBasesArray, JNI_RO_RELEASE_MODE);
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM */
|
||||
|
||||
#ifndef _Included_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM
|
||||
#define _Included_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_TRISTATE_CORRECTION
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_TRISTATE_CORRECTION 3.0
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToMatch
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToMatch 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_indelToMatch
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_indelToMatch 1L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToInsertion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToInsertion 2L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_insertionToInsertion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_insertionToInsertion 3L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToDeletion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToDeletion 4L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_deletionToDeletion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_deletionToDeletion 5L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_verify
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_verify 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug0_1
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug0_1 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug1
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug1 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug2
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug2 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug3
|
||||
#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug3 0L
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM
|
||||
* Method: jniInitialize
|
||||
* Signature: (II)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitialize
|
||||
(JNIEnv *, jobject, jint, jint);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM
|
||||
* Method: jniInitializeProbabilities
|
||||
* Signature: ([[D[B[B[B)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializeProbabilities
|
||||
(JNIEnv *, jclass, jobjectArray, jbyteArray, jbyteArray, jbyteArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM
|
||||
* Method: jniInitializePriorsAndUpdateCells
|
||||
* Signature: (ZII[B[B[BI)D
|
||||
*/
|
||||
JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializePriorsAndUpdateCells
|
||||
(JNIEnv *, jobject, jboolean, jint, jint, jbyteArray, jbyteArray, jbyteArray, jint);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM
|
||||
* Method: jniSubComputeReadLikelihoodGivenHaplotypeLog10
|
||||
* Signature: (II[B[B[B[B[B[BI)D
|
||||
*/
|
||||
JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10
|
||||
(JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,416 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "headers.h"
|
||||
#include "jni_common.h"
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType
|
||||
(JNIEnv* env, jobject thisObject)
|
||||
{
|
||||
return (jlong)get_machine_capabilities();
|
||||
}
|
||||
|
||||
//Should be called only once for the whole Java process - initializes field ids for the classes JNIReadDataHolderClass
|
||||
//and JNIHaplotypeDataHolderClass
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask
|
||||
(JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask)
|
||||
{
|
||||
assert(readDataHolderClass);
|
||||
assert(haplotypeDataHolderClass);
|
||||
jfieldID fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "readBases", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for readBases");
|
||||
g_load_time_initializer.m_readBasesFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "readQuals", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for readQuals");
|
||||
g_load_time_initializer.m_readQualsFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "insertionGOP", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for insertionGOP");
|
||||
g_load_time_initializer.m_insertionGOPFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "deletionGOP", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for deletionGOP");
|
||||
g_load_time_initializer.m_deletionGOPFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "overallGCP", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for overallGCP");
|
||||
g_load_time_initializer.m_overallGCPFID = fid;
|
||||
|
||||
fid = env->GetFieldID(haplotypeDataHolderClass, "haplotypeBases", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for haplotypeBases");
|
||||
g_load_time_initializer.m_haplotypeBasesFID = fid;
|
||||
if(mask != ENABLE_ALL_HARDWARE_FEATURES)
|
||||
{
|
||||
cout << "Using user supplied hardware mask to re-initialize function pointers for PairHMM\n";
|
||||
initialize_function_pointers((uint64_t)mask);
|
||||
cout.flush();
|
||||
}
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL initializeHaplotypes
|
||||
(JNIEnv * env, jobject& thisObject, jint numHaplotypes, jobjectArray& haplotypeDataArray,
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector, vector<unsigned>& haplotypeBasesLengths)
|
||||
{
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
haplotypeBasesArrayVector.clear();
|
||||
haplotypeBasesLengths.clear();
|
||||
haplotypeBasesArrayVector.resize(numHaplotypes);
|
||||
haplotypeBasesLengths.resize(numHaplotypes);
|
||||
jsize haplotypeBasesLength = 0;
|
||||
for(unsigned j=0;j<numHaplotypes;++j)
|
||||
{
|
||||
jobject haplotypeObject = env->GetObjectArrayElement(haplotypeDataArray, j);
|
||||
jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID);
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str());
|
||||
#endif
|
||||
//Need a global reference as this will be accessed across multiple JNI calls to JNIComputeLikelihoods()
|
||||
jbyteArray haplotypeBasesGlobalRef = (jbyteArray)env->NewGlobalRef(haplotypeBases);
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(haplotypeBasesGlobalRef && ("Could not get global ref to haplotypeBases at index : "+to_string(j)+"\n").c_str());
|
||||
#endif
|
||||
env->DeleteLocalRef(haplotypeBases); //free the local reference
|
||||
jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBasesGlobalRef, &is_copy);
|
||||
haplotypeBasesLength = env->GetArrayLength(haplotypeBasesGlobalRef);
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
|
||||
//assert(haplotypeBasesLength < MCOLS);
|
||||
#endif
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI haplotype length "<<haplotypeBasesLength<<"\n";
|
||||
#endif
|
||||
haplotypeBasesArrayVector[j] = make_pair(haplotypeBasesGlobalRef, haplotypeBasesArray);
|
||||
haplotypeBasesLengths[j] = haplotypeBasesLength;
|
||||
#ifdef DEBUG3
|
||||
for(unsigned k=0;k<haplotypeBasesLength;++k)
|
||||
g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.update_stat(HAPLOTYPE_LENGTH_IDX, haplotypeBasesLength);
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? haplotypeBasesLength : 0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL releaseHaplotypes(JNIEnv * env, jobject thisObject,
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector, vector<unsigned>& haplotypeBasesLengths
|
||||
)
|
||||
{
|
||||
//Now release haplotype arrays
|
||||
for(int j=haplotypeBasesArrayVector.size()-1;j>=0;--j) //note the order - reverse of GET
|
||||
{
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RO_RELEASE_MODE);
|
||||
env->DeleteGlobalRef(haplotypeBasesArrayVector[j].first); //free the global reference
|
||||
}
|
||||
haplotypeBasesArrayVector.clear();
|
||||
haplotypeBasesLengths.clear();
|
||||
}
|
||||
|
||||
|
||||
vector<pair<jbyteArray, jbyte*> > g_haplotypeBasesArrayVector;
|
||||
vector<unsigned> g_haplotypeBasesLengths;
|
||||
//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region,
|
||||
//transfer the list only once
|
||||
//Works only for ST case as the haplotype data is stored in global variables
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes
|
||||
(JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray)
|
||||
{
|
||||
#ifdef SINGLE_THREADED_ONLY
|
||||
//To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector
|
||||
initializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray, g_haplotypeBasesArrayVector, g_haplotypeBasesLengths);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
//Create a vector of testcases for computation - copy the references to bytearrays read/readQuals etc into the appropriate
|
||||
//testcase struct
|
||||
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector
|
||||
(JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray,
|
||||
vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector,
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector, vector<unsigned>& haplotypeBasesLengths,
|
||||
vector<testcase>& tc_array)
|
||||
{
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
unsigned tc_idx = 0;
|
||||
for(unsigned i=0;i<numReads;++i)
|
||||
{
|
||||
//Get bytearray fields from read
|
||||
jobject readObject = env->GetObjectArrayElement(readDataArray, i);
|
||||
jbyteArray readBases = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_readBasesFID);
|
||||
jbyteArray insertionGOP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_insertionGOPFID);
|
||||
jbyteArray deletionGOP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_deletionGOPFID);
|
||||
jbyteArray overallGCP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_overallGCPFID);
|
||||
jbyteArray readQuals = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_readQualsFID);
|
||||
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(readBases && ("readBases is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(insertionGOP && ("insertionGOP is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(deletionGOP && ("deletionGOP is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(overallGCP && ("overallGCP is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(readQuals && ("readQuals is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
#endif
|
||||
jsize readLength = env->GetArrayLength(readBases);
|
||||
|
||||
jbyte* readBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readBases, &is_copy); //order of GET-RELEASE is important
|
||||
jbyte* readQualsArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readQuals, &is_copy);
|
||||
jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy);
|
||||
jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy);
|
||||
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0);
|
||||
g_load_time_initializer.update_stat(READ_LENGTH_IDX, readLength);
|
||||
#endif
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(readBasesArray && "readBasesArray not initialized in JNI");
|
||||
assert(readQualsArray && "readQualsArray not initialized in JNI");
|
||||
assert(insertionGOPArray && "insertionGOP array not initialized in JNI");
|
||||
assert(deletionGOPArray && "deletionGOP array not initialized in JNI");
|
||||
assert(overallGCPArray && "overallGCP array not initialized in JNI");
|
||||
//assert(readLength < MROWS);
|
||||
assert(readLength == env->GetArrayLength(readQuals));
|
||||
assert(readLength == env->GetArrayLength(insertionGOP));
|
||||
assert(readLength == env->GetArrayLength(deletionGOP));
|
||||
assert(readLength == env->GetArrayLength(overallGCP));
|
||||
#endif
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI read length "<<readLength<<"\n";
|
||||
#endif
|
||||
#ifdef DEBUG3
|
||||
for(unsigned j=0;j<readLength;++j)
|
||||
{
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)readBasesArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)readQualsArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)insertionGOPArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)deletionGOPArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true);
|
||||
}
|
||||
#endif
|
||||
for(unsigned j=0;j<numHaplotypes;++j)
|
||||
{
|
||||
jsize haplotypeLength = (jsize)haplotypeBasesLengths[j];
|
||||
jbyte* haplotypeBasesArray = haplotypeBasesArrayVector[j].second;
|
||||
tc_array[tc_idx].rslen = (int)readLength;
|
||||
tc_array[tc_idx].haplen = (int)haplotypeLength;
|
||||
tc_array[tc_idx].hap = (char*)haplotypeBasesArray;
|
||||
tc_array[tc_idx].rs = (char*)readBasesArray;
|
||||
tc_array[tc_idx].q = (char*)readQualsArray;
|
||||
tc_array[tc_idx].i = (char*)insertionGOPArray;
|
||||
tc_array[tc_idx].d = (char*)deletionGOPArray;
|
||||
tc_array[tc_idx].c = (char*)overallGCPArray;
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.update_stat(PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, ((uint64_t)readLength)*((uint64_t)haplotypeLength));
|
||||
#endif
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.dump_sandbox(tc_array[tc_idx], tc_idx, numReads, numHaplotypes);
|
||||
#endif
|
||||
++tc_idx;
|
||||
}
|
||||
//Store the read array references and release them at the end because they are used by compute_full_prob
|
||||
//Maintain order in which GET_BYTE_ARRAY_ELEMENTS called
|
||||
readBasesArrayVector[i].clear();
|
||||
readBasesArrayVector[i].resize(5);
|
||||
readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray);
|
||||
readBasesArrayVector[i][1] = make_pair(readQuals, readQualsArray);
|
||||
readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray);
|
||||
readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray);
|
||||
readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray);
|
||||
}
|
||||
}
|
||||
|
||||
//Do compute over vector of testcase structs
|
||||
inline void compute_testcases(vector<testcase>& tc_array, unsigned numTestCases, double* likelihoodDoubleArray,
|
||||
unsigned maxNumThreadsToUse)
|
||||
{
|
||||
#ifdef DO_REPEAT_PROFILING
|
||||
for(unsigned i=0;i<10;++i)
|
||||
#endif
|
||||
{
|
||||
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
|
||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//Inform the Java VM that we no longer need access to the read arrays (and free memory)
|
||||
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniReleaseReadArrays
|
||||
(JNIEnv* env, vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector)
|
||||
{
|
||||
//Release read arrays first
|
||||
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
|
||||
{
|
||||
for(int j=readBasesArrayVector[i].size()-1;j>=0;--j)
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE);
|
||||
readBasesArrayVector[i].clear();
|
||||
}
|
||||
readBasesArrayVector.clear();
|
||||
}
|
||||
|
||||
|
||||
#ifdef DO_WARMUP
|
||||
uint64_t g_sum = 0;
|
||||
#endif
|
||||
//JNI function to invoke compute_full_prob_avx
|
||||
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
|
||||
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
|
||||
//likelihoodArray - array of doubles to return results back to Java. Memory allocated by Java prior to JNI call
|
||||
//maxNumThreadsToUse - Max number of threads that OpenMP can use for the HMM computation
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
|
||||
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
|
||||
{
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
|
||||
#endif
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
struct timespec start_time;
|
||||
unsigned numTestCases = numReads*numHaplotypes;
|
||||
//vector to store testcases
|
||||
vector<testcase> tc_array;
|
||||
tc_array.clear();
|
||||
tc_array.resize(numTestCases);
|
||||
//Store read arrays for release later
|
||||
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
|
||||
readBasesArrayVector.clear();
|
||||
readBasesArrayVector.resize(numReads);
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.open_sandbox();
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
|
||||
#ifdef SINGLE_THREADED_ONLY
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
|
||||
vector<unsigned>& haplotypeBasesLengths = g_haplotypeBasesLengths;
|
||||
#else
|
||||
vector<pair<jbyteArray, jbyte*> > l_haplotypeBasesArrayVector;
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = l_haplotypeBasesArrayVector;
|
||||
vector<unsigned> l_haplotypeBasesLengths;
|
||||
vector<unsigned>& haplotypeBasesLengths = l_haplotypeBasesLengths;
|
||||
initializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray, haplotypeBasesArrayVector, haplotypeBasesLengths);
|
||||
#endif
|
||||
//Copy byte array references from Java memory into vector of testcase structs
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector(env,
|
||||
numReads, numHaplotypes, readDataArray, readBasesArrayVector, haplotypeBasesArrayVector, haplotypeBasesLengths, tc_array);
|
||||
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
|
||||
#endif
|
||||
|
||||
//Get double array where results are stored (to pass back to java)
|
||||
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
|
||||
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
||||
#endif
|
||||
#ifdef DO_WARMUP //ignore - only for crazy profiling
|
||||
for(unsigned i=0;i<haplotypeBasesArrayVector.size();++i)
|
||||
{
|
||||
unsigned curr_size = env->GetArrayLength(haplotypeBasesArrayVector[i].first);
|
||||
for(unsigned j=0;j<curr_size;++j)
|
||||
g_sum += ((uint64_t)((haplotypeBasesArrayVector[i].second)[j]));
|
||||
}
|
||||
for(unsigned i=0;i<readBasesArrayVector.size();++i)
|
||||
{
|
||||
for(unsigned j=0;j<readBasesArrayVector[i].size();++j)
|
||||
{
|
||||
unsigned curr_size = env->GetArrayLength(readBasesArrayVector[i][j].first);
|
||||
for(unsigned k=0;k<curr_size;++k)
|
||||
g_sum += ((uint64_t)((readBasesArrayVector[i][j].second)[k]));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse); //actual computation
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_compute_time += diff_time(start_time);
|
||||
#endif
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory (if copy made)
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniReleaseReadArrays(env, readBasesArrayVector);
|
||||
#ifndef SINGLE_THREADED_ONLY
|
||||
releaseHaplotypes(env, thisObject, haplotypeBasesArrayVector, haplotypeBasesLengths);
|
||||
#endif
|
||||
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
|
||||
g_load_time_initializer.update_stat(NUM_REGIONS_IDX, 1);
|
||||
g_load_time_initializer.update_stat(NUM_READS_IDX, numReads);
|
||||
g_load_time_initializer.update_stat(NUM_HAPLOTYPES_IDX, numHaplotypes);
|
||||
g_load_time_initializer.update_stat(NUM_TESTCASES_IDX, numTestCases);
|
||||
#endif
|
||||
tc_array.clear();
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.close_sandbox();
|
||||
#endif
|
||||
}
|
||||
|
||||
//If single threaded, release haplotypes at the end of a region
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniFinalizeRegion
|
||||
(JNIEnv * env, jobject thisObject)
|
||||
{
|
||||
#ifdef SINGLE_THREADED_ONLY
|
||||
releaseHaplotypes(env, thisObject, g_haplotypeBasesArrayVector, g_haplotypeBasesLengths);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniClose
|
||||
(JNIEnv* env, jobject thisObject)
|
||||
{
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.print_profiling();
|
||||
#endif
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_close();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM */
|
||||
|
||||
#ifndef _Included_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
#define _Included_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_TRISTATE_CORRECTION
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_TRISTATE_CORRECTION 3.0
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToMatch
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToMatch 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_indelToMatch
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_indelToMatch 1L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToInsertion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToInsertion 2L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_insertionToInsertion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_insertionToInsertion 3L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToDeletion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToDeletion 4L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_deletionToDeletion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_deletionToDeletion 5L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_sse42Mask
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_sse42Mask 1LL
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_avxMask
|
||||
#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_avxMask 2LL
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
* Method: jniGetMachineType
|
||||
* Signature: ()J
|
||||
*/
|
||||
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
* Method: jniClose
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniClose
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
* Method: jniInitializeClassFieldsAndMachineMask
|
||||
* Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask
|
||||
(JNIEnv *, jobject, jclass, jclass, jlong);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
* Method: jniInitializeHaplotypes
|
||||
* Signature: (I[Lorg/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM/JNIHaplotypeDataHolderClass;)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes
|
||||
(JNIEnv *, jobject, jint, jobjectArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
* Method: jniFinalizeRegion
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniFinalizeRegion
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM
|
||||
* Method: jniComputeLikelihoods
|
||||
* Signature: (II[Lorg/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM/JNIHaplotypeDataHolderClass;[DI)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "headers.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
#define BATCH_SIZE 10000
|
||||
if(argc < 2)
|
||||
{
|
||||
cerr << "Needs path to input file as argument\n";
|
||||
exit(0);
|
||||
}
|
||||
bool use_old_read_testcase = false;
|
||||
if(argc >= 3 && string(argv[2]) == "1")
|
||||
use_old_read_testcase = true;
|
||||
unsigned chunk_size = 10000;
|
||||
bool do_check = true;
|
||||
uint64_t mask = ~(0ull);
|
||||
for(int i=3;i<argc;++i)
|
||||
{
|
||||
if(strncmp(argv[i], "-chunk_size", 15) == 0)
|
||||
{
|
||||
++i;
|
||||
chunk_size = strtol(argv[i],0,10);
|
||||
}
|
||||
else
|
||||
if(strncmp(argv[i], "-mask", 15) == 0)
|
||||
{
|
||||
++i;
|
||||
mask = strtoll(argv[i],0,16);
|
||||
}
|
||||
else
|
||||
if(strncmp(argv[i], "-no-check", 15) == 0)
|
||||
do_check = false;
|
||||
}
|
||||
if(mask != (~0ull))
|
||||
initialize_function_pointers(mask);
|
||||
do_compute(argv[1], use_old_read_testcase, chunk_size, do_check);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,380 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef PRECISION
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
void CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) {
|
||||
|
||||
const int maskBitCnt = MAIN_TYPE_SIZE ;
|
||||
|
||||
for (int vi=0; vi < numMaskVecs; ++vi) {
|
||||
for (int rs=0; rs < NUM_DISTINCT_CHARS; ++rs) {
|
||||
maskArr[vi][rs] = 0 ;
|
||||
}
|
||||
maskArr[vi][AMBIG_CHAR] = MASK_ALL_ONES ;
|
||||
}
|
||||
|
||||
for (int col=1; col < COLS; ++col) {
|
||||
int mIndex = (col-1) / maskBitCnt ;
|
||||
int mOffset = (col-1) % maskBitCnt ;
|
||||
MASK_TYPE bitMask = ((MASK_TYPE)0x1) << (maskBitCnt-1-mOffset) ;
|
||||
|
||||
char hapChar = ConvertChar::get(tc.hap[col-1]);
|
||||
|
||||
if (hapChar == AMBIG_CHAR) {
|
||||
for (int ci=0; ci < NUM_DISTINCT_CHARS; ++ci)
|
||||
maskArr[mIndex][ci] |= bitMask ;
|
||||
}
|
||||
|
||||
maskArr[mIndex][hapChar] |= bitMask ;
|
||||
// bit corresponding to col 1 will be the MSB of the mask 0
|
||||
// bit corresponding to col 2 will be the MSB-1 of the mask 0
|
||||
// ...
|
||||
// bit corresponding to col 32 will be the LSB of the mask 0
|
||||
// bit corresponding to col 33 will be the MSB of the mask 1
|
||||
// ...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) {
|
||||
|
||||
for (int ri=0; ri < numRowsToProcess; ++ri) {
|
||||
rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ;
|
||||
}
|
||||
|
||||
for (int ei=0; ei < AVX_LENGTH; ++ei) {
|
||||
lastMaskShiftOut[ei] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MASK_TYPE __bitMask = (((MASK_TYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MASK_TYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
__dstMask = (__srcMask >> __shiftBy) | __lastShiftOut ; \
|
||||
__lastShiftOut = __nextShiftOut ; \
|
||||
}
|
||||
|
||||
|
||||
void CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)(int maskIndex, BITMASK_VEC& bitMaskVec, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, int maskBitCnt) {
|
||||
|
||||
for (int ei=0; ei < AVX_LENGTH/2; ++ei) {
|
||||
SET_MASK_WORD(bitMaskVec.getLowEntry(ei), maskArr[maskIndex][rsArr[ei]],
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
int ei2 = ei + AVX_LENGTH/2 ; // the second entry index
|
||||
SET_MASK_WORD(bitMaskVec.getHighEntry(ei), maskArr[maskIndex][rsArr[ei2]],
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline void CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (BITMASK_VEC& bitMaskVec, SIMD_TYPE& distm, SIMD_TYPE& _1_distm, SIMD_TYPE& distmChosen) {
|
||||
|
||||
distmChosen = VEC_BLENDV(distm, _1_distm, bitMaskVec.getCombinedMask()) ;
|
||||
|
||||
bitMaskVec.shift_left_1bit() ;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function:
|
||||
* 1- Intializes probability values p_MM, p_XX, P_YY, p_MX, p_GAPM and pack them into vectors (SSE or AVX)
|
||||
* 2- Precompute parts of "distm" which only depeneds on a row number and pack it into vector
|
||||
*/
|
||||
|
||||
template<class NUMBER> void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, SIMD_TYPE *p_MM, SIMD_TYPE *p_GAPM, SIMD_TYPE *p_MX, SIMD_TYPE *p_XX, SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, SIMD_TYPE *distm1D)
|
||||
{
|
||||
NUMBER zero = ctx._(0.0);
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
for (int s=0;s<ROWS+COLS+AVX_LENGTH;s++)
|
||||
{
|
||||
shiftOutM[s] = zero;
|
||||
shiftOutX[s] = zero;
|
||||
shiftOutY[s] = init_Y;
|
||||
}
|
||||
|
||||
NUMBER *ptr_p_MM = (NUMBER *)p_MM;
|
||||
NUMBER *ptr_p_XX = (NUMBER *)p_XX;
|
||||
NUMBER *ptr_p_YY = (NUMBER *)p_YY;
|
||||
NUMBER *ptr_p_MX = (NUMBER *)p_MX;
|
||||
NUMBER *ptr_p_MY = (NUMBER *)p_MY;
|
||||
NUMBER *ptr_p_GAPM = (NUMBER *)p_GAPM;
|
||||
|
||||
*ptr_p_MM = ctx._(0.0);
|
||||
*ptr_p_XX = ctx._(0.0);
|
||||
*ptr_p_YY = ctx._(0.0);
|
||||
*ptr_p_MX = ctx._(0.0);
|
||||
*ptr_p_MY = ctx._(0.0);
|
||||
*ptr_p_GAPM = ctx._(0.0);
|
||||
|
||||
for (int r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _i = tc->i[r-1] & 127;
|
||||
int _d = tc->d[r-1] & 127;
|
||||
int _c = tc->c[r-1] & 127;
|
||||
|
||||
//*(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||
SET_MATCH_TO_MATCH_PROB(*(ptr_p_MM+r-1), _i, _d);
|
||||
*(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
*(ptr_p_MX+r-1) = ctx.ph2pr[_i];
|
||||
*(ptr_p_XX+r-1) = ctx.ph2pr[_c];
|
||||
*(ptr_p_MY+r-1) = ctx.ph2pr[_d];
|
||||
*(ptr_p_YY+r-1) = ctx.ph2pr[_c];
|
||||
}
|
||||
|
||||
NUMBER *ptr_distm1D = (NUMBER *)distm1D;
|
||||
for (int r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _q = tc->q[r-1] & 127;
|
||||
ptr_distm1D[r-1] = ctx.ph2pr[_q];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This function handles pre-stripe computation:
|
||||
* 1- Retrieve probaility vectors from memory
|
||||
* 2- Initialize M, X, Y vectors with all 0's (for the first stripe) and shifting the last row from previous stripe for the rest
|
||||
*/
|
||||
|
||||
template<class NUMBER> inline void CONCAT(CONCAT(stripeINITIALIZATION,SIMD_ENGINE), PRECISION)(
|
||||
int stripeIdx, Context<NUMBER> ctx, testcase *tc, SIMD_TYPE &pGAPM, SIMD_TYPE &pMM, SIMD_TYPE &pMX, SIMD_TYPE &pXX, SIMD_TYPE &pMY, SIMD_TYPE &pYY,
|
||||
SIMD_TYPE &rs, UNION_TYPE &rsN, SIMD_TYPE &distm, SIMD_TYPE &_1_distm, SIMD_TYPE *distm1D, SIMD_TYPE N_packed256, SIMD_TYPE *p_MM , SIMD_TYPE *p_GAPM ,
|
||||
SIMD_TYPE *p_MX, SIMD_TYPE *p_XX , SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM)
|
||||
{
|
||||
int i = stripeIdx;
|
||||
pGAPM = p_GAPM[i];
|
||||
pMM = p_MM[i];
|
||||
pMX = p_MX[i];
|
||||
pXX = p_XX[i];
|
||||
pMY = p_MY[i];
|
||||
pYY = p_YY[i];
|
||||
|
||||
NUMBER zero = ctx._(0.0);
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0);
|
||||
UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(3.0);
|
||||
|
||||
distm = distm1D[i];
|
||||
_1_distm = VEC_SUB(packed1.d, distm);
|
||||
|
||||
distm = VEC_DIV(distm, packed3.d);
|
||||
|
||||
/* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */
|
||||
M_t_2.d = VEC_SET1_VAL(zero);
|
||||
X_t_2.d = VEC_SET1_VAL(zero);
|
||||
|
||||
if (i==0) {
|
||||
M_t_1.d = VEC_SET1_VAL(zero);
|
||||
X_t_1.d = VEC_SET1_VAL(zero);
|
||||
Y_t_2.d = VEC_SET_LSE(init_Y);
|
||||
Y_t_1.d = VEC_SET1_VAL(zero);
|
||||
}
|
||||
else {
|
||||
X_t_1.d = VEC_SET_LSE(shiftOutX[AVX_LENGTH]);
|
||||
M_t_1.d = VEC_SET_LSE(shiftOutM[AVX_LENGTH]);
|
||||
Y_t_2.d = VEC_SET1_VAL(zero);
|
||||
Y_t_1.d = VEC_SET1_VAL(zero);
|
||||
}
|
||||
M_t_1_y = M_t_1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is the main compute kernel to compute M, X and Y
|
||||
*/
|
||||
|
||||
inline void CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
SIMD_TYPE pMM, SIMD_TYPE pGAPM, SIMD_TYPE pMX, SIMD_TYPE pXX, SIMD_TYPE pMY, SIMD_TYPE pYY, SIMD_TYPE distmSel)
|
||||
{
|
||||
/* Compute M_t <= distm * (p_MM*M_t_2 + p_GAPM*X_t_2 + p_GAPM*Y_t_2) */
|
||||
M_t.d = VEC_MUL(VEC_ADD(VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(X_t_2.d, pGAPM)), VEC_MUL(Y_t_2.d, pGAPM)), distmSel);
|
||||
//M_t.d = VEC_MUL( VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(VEC_ADD(X_t_2.d, Y_t_2.d), pGAPM)), distmSel);
|
||||
|
||||
M_t_y = M_t;
|
||||
|
||||
/* Compute X_t */
|
||||
X_t.d = VEC_ADD(VEC_MUL(M_t_1.d, pMX) , VEC_MUL(X_t_1.d, pXX));
|
||||
|
||||
/* Compute Y_t */
|
||||
Y_t.d = VEC_ADD(VEC_MUL(M_t_1_y.d, pMY) , VEC_MUL(Y_t_1.d, pYY));
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the main compute function. It operates on the matrix in s stripe manner.
|
||||
* The stripe height is determined by the SIMD engine type.
|
||||
* Stripe height: "AVX float": 8, "AVX double": 4, "SSE float": 4, "SSE double": 2
|
||||
* For each stripe the operations are anti-diagonal based.
|
||||
* Each anti-diagonal (M_t, Y_t, X_t) depends on the two previous anti-diagonals (M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, Y_t_1).
|
||||
* Each stripe (except the fist one) depends on the last row of the previous stripe.
|
||||
* The last stripe computation handles the addition of the last row of M and X, that's the reason for loop spliting.
|
||||
*/
|
||||
|
||||
template<class NUMBER> NUMBER CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
int ROWS = tc->rslen + 1;
|
||||
int COLS = tc->haplen + 1;
|
||||
int MAVX_COUNT = (ROWS+AVX_LENGTH-1)/AVX_LENGTH;
|
||||
|
||||
/* Probaility arrays */
|
||||
SIMD_TYPE p_MM [MAVX_COUNT], p_GAPM [MAVX_COUNT], p_MX [MAVX_COUNT];
|
||||
SIMD_TYPE p_XX [MAVX_COUNT], p_MY [MAVX_COUNT], p_YY [MAVX_COUNT];
|
||||
|
||||
/* For distm precomputation */
|
||||
SIMD_TYPE distm1D[MAVX_COUNT];
|
||||
|
||||
/* Carries the values from each stripe to the next stripe */
|
||||
NUMBER shiftOutM[ROWS+COLS+AVX_LENGTH], shiftOutX[ROWS+COLS+AVX_LENGTH], shiftOutY[ROWS+COLS+AVX_LENGTH];
|
||||
|
||||
/* The vector to keep the anti-diagonals of M, X, Y*/
|
||||
/* Current: M_t, X_t, Y_t */
|
||||
/* Previous: M_t_1, X_t_1, Y_t_1 */
|
||||
/* Previous to previous: M_t_2, X_t_2, Y_t_2 */
|
||||
UNION_TYPE M_t, M_t_1, M_t_2, X_t, X_t_1, X_t_2, Y_t, Y_t_1, Y_t_2, M_t_y, M_t_1_y;
|
||||
|
||||
/* Probality vectors */
|
||||
SIMD_TYPE pGAPM, pMM, pMX, pXX, pMY, pYY;
|
||||
|
||||
struct timeval start, end;
|
||||
NUMBER result_avx2;
|
||||
Context<NUMBER> ctx;
|
||||
UNION_TYPE rs , rsN;
|
||||
HAP_TYPE hap;
|
||||
SIMD_TYPE distmSel, distmChosen ;
|
||||
SIMD_TYPE distm, _1_distm;
|
||||
|
||||
int r, c;
|
||||
NUMBER zero = ctx._(0.0);
|
||||
UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0);
|
||||
SIMD_TYPE N_packed256 = VEC_POPCVT_CHAR('N');
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
int remainingRows = (ROWS-1) % AVX_LENGTH;
|
||||
int stripe_cnt = ((ROWS-1) / AVX_LENGTH) + (remainingRows!=0);
|
||||
|
||||
const int maskBitCnt = MAIN_TYPE_SIZE ;
|
||||
const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function
|
||||
|
||||
/* Mask precomputation for distm*/
|
||||
MASK_TYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ;
|
||||
CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(*tc, COLS, numMaskVecs, maskArr) ;
|
||||
|
||||
char rsArr[AVX_LENGTH] ;
|
||||
MASK_TYPE lastMaskShiftOut[AVX_LENGTH] ;
|
||||
|
||||
/* Precompute initialization for probabilities and shift vector*/
|
||||
CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)<NUMBER>(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY,
|
||||
ctx, tc, p_MM, p_GAPM, p_MX, p_XX, p_MY, p_YY, distm1D);
|
||||
|
||||
for (int i=0;i<stripe_cnt-1;i++)
|
||||
{
|
||||
//STRIPE_INITIALIZATION
|
||||
CONCAT(CONCAT(stripeINITIALIZATION,SIMD_ENGINE), PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM);
|
||||
CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(*tc, rsArr, lastMaskShiftOut, i*AVX_LENGTH+1, AVX_LENGTH) ;
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
|
||||
BITMASK_VEC bitMaskVec ;
|
||||
|
||||
for (int begin_d=1;begin_d<COLS+AVX_LENGTH;begin_d+=MAIN_TYPE_SIZE)
|
||||
{
|
||||
int numMaskBitsToProcess = std::min(MAIN_TYPE_SIZE, COLS+AVX_LENGTH-begin_d) ;
|
||||
CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)((begin_d-1)/MAIN_TYPE_SIZE, bitMaskVec, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
|
||||
for (int mbi=0; mbi < numMaskBitsToProcess; ++mbi) {
|
||||
CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (bitMaskVec, distm, _1_distm, distmChosen) ;
|
||||
int ShiftIdx = begin_d + mbi + AVX_LENGTH;
|
||||
|
||||
CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
pMM, pGAPM, pMX, pXX, pMY, pYY, distmChosen);
|
||||
|
||||
CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION)(M_t, shiftOutM[ShiftIdx], shiftOutM[begin_d+mbi]);
|
||||
|
||||
CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION)(X_t, shiftOutX[ShiftIdx], shiftOutX[begin_d+mbi]);
|
||||
|
||||
CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION)(Y_t_1, shiftOutY[ShiftIdx], shiftOutY[begin_d+mbi]);
|
||||
|
||||
M_t_2 = M_t_1; M_t_1 = M_t; X_t_2 = X_t_1; X_t_1 = X_t;
|
||||
Y_t_2 = Y_t_1; Y_t_1 = Y_t; M_t_1_y = M_t_y;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int i = stripe_cnt-1;
|
||||
{
|
||||
//STRIPE_INITIALIZATION
|
||||
CONCAT(CONCAT(stripeINITIALIZATION,SIMD_ENGINE), PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM);
|
||||
|
||||
if (remainingRows==0) remainingRows=AVX_LENGTH;
|
||||
CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(*tc, rsArr, lastMaskShiftOut, i*AVX_LENGTH+1, remainingRows) ;
|
||||
|
||||
SIMD_TYPE sumM, sumX;
|
||||
sumM = VEC_SET1_VAL(zero);
|
||||
sumX = VEC_SET1_VAL(zero);
|
||||
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
BITMASK_VEC bitMaskVec ;
|
||||
|
||||
for (int begin_d=1;begin_d<COLS+remainingRows-1;begin_d+=MAIN_TYPE_SIZE)
|
||||
{
|
||||
int numMaskBitsToProcess = std::min(MAIN_TYPE_SIZE, COLS+remainingRows-1-begin_d) ;
|
||||
CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE),PRECISION)((begin_d-1)/MAIN_TYPE_SIZE, bitMaskVec, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
|
||||
for (int mbi=0; mbi < numMaskBitsToProcess; ++mbi) {
|
||||
|
||||
CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (bitMaskVec, distm, _1_distm, distmChosen) ;
|
||||
int ShiftIdx = begin_d + mbi +AVX_LENGTH;
|
||||
|
||||
CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
pMM, pGAPM, pMX, pXX, pMY, pYY, distmChosen);
|
||||
|
||||
sumM = VEC_ADD(sumM, M_t.d);
|
||||
CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION)(M_t, shiftOutM[ShiftIdx]);
|
||||
|
||||
sumX = VEC_ADD(sumX, X_t.d);
|
||||
CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION)(X_t, shiftOutX[ShiftIdx]);
|
||||
|
||||
CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION)(Y_t_1, shiftOutY[ShiftIdx]);
|
||||
|
||||
M_t_2 = M_t_1; M_t_1 = M_t; X_t_2 = X_t_1; X_t_1 = X_t;
|
||||
Y_t_2 = Y_t_1; Y_t_1 = Y_t; M_t_1_y = M_t_y;
|
||||
|
||||
}
|
||||
}
|
||||
UNION_TYPE sumMX;
|
||||
sumMX.d = VEC_ADD(sumM, sumX);
|
||||
result_avx2 = sumMX.f[remainingRows-1];
|
||||
}
|
||||
return result_avx2;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "vector_defs.h"
|
||||
|
||||
#define SIMD_ENGINE avx
|
||||
#define SIMD_ENGINE_AVX
|
||||
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
||||
double getCurrClk();
|
||||
int thread_level_parallelism_enabled = false ;
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
testcase* tc = new testcase[BATCH_SIZE];
|
||||
float result[BATCH_SIZE], result_avxf;
|
||||
double result_avxd;
|
||||
double lastClk = 0.0 ;
|
||||
double aggregateTimeRead = 0.0;
|
||||
double aggregateTimeCompute = 0.0;
|
||||
double aggregateTimeWrite = 0.0;
|
||||
|
||||
// Need to call it once to initialize the static array
|
||||
ConvertChar::init() ;
|
||||
|
||||
// char* ompEnvVar = getenv("OMP_NUM_THREADS") ;
|
||||
// if (ompEnvVar != NULL && ompEnvVar != "" && ompEnvVar != "1" ) {
|
||||
// thread_level_parallelism_enabled = true ;
|
||||
// }
|
||||
|
||||
bool noMoreData = false;
|
||||
int count =0;
|
||||
while (!noMoreData)
|
||||
{
|
||||
int read_count = BATCH_SIZE;
|
||||
|
||||
lastClk = getCurrClk() ;
|
||||
for (int b=0;b<BATCH_SIZE;b++)
|
||||
if (read_testcase(&tc[b])==-1)
|
||||
{
|
||||
read_count = b;
|
||||
noMoreData = true;
|
||||
break;
|
||||
}
|
||||
aggregateTimeRead += (getCurrClk() - lastClk) ;
|
||||
lastClk = getCurrClk() ;
|
||||
|
||||
//#pragma omp parallel for schedule(dynamic) if(thread_level_parallelism_enabled)
|
||||
for (int b=0;b<read_count;b++)
|
||||
{
|
||||
result_avxf = CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), s)<float>(&tc[b]);
|
||||
|
||||
#ifdef RUN_HYBRID
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
count++;
|
||||
result_avxd = CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), d)<double>(&tc[b]);
|
||||
result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f));
|
||||
}
|
||||
else
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
#ifndef RUN_HYBRID
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
}
|
||||
aggregateTimeCompute += (getCurrClk() - lastClk) ;
|
||||
lastClk = getCurrClk() ;
|
||||
for (int b=0;b<read_count;b++)
|
||||
printf("%E\n", result[b]);
|
||||
aggregateTimeWrite += (getCurrClk() - lastClk) ;
|
||||
}
|
||||
|
||||
delete[] tc;
|
||||
printf("AVX Read Time: %.2f\n", aggregateTimeRead);
|
||||
printf("AVX Compute Time: %.2f\n", aggregateTimeCompute);
|
||||
printf("AVX Write Time: %.2f\n", aggregateTimeWrite);
|
||||
printf("AVX Total Time: %.2f\n", aggregateTimeRead + aggregateTimeCompute + aggregateTimeWrite);
|
||||
printf("# Double called: %d\n", count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
#!/bin/bash
|
||||
rm -f *.txt *.log
|
||||
GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
|
||||
pair_hmm_implementation="VECTOR_LOGLESS_CACHING";
|
||||
if [ "$#" -ge 1 ];
|
||||
then
|
||||
pair_hmm_implementation=$1;
|
||||
fi
|
||||
|
||||
#-Djava.library.path is needed if you wish to override the default 'packed' library
|
||||
#java -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||
java -Djava.library.path=${GSA_ROOT_DIR}/public/VectorPairHMM/src/main/c++ -jar $GSA_ROOT_DIR/target/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
-I /data/simulated/sim1M_pairs_final.bam \
|
||||
-stand_call_conf 50.0 \
|
||||
-stand_emit_conf 10.0 \
|
||||
--pair_hmm_implementation $pair_hmm_implementation \
|
||||
-o output.raw.snps.indels.vcf
|
||||
|
||||
#--pair_hmm_implementation JNI_LOGLESS_CACHING \
|
||||
#-XL unmapped \
|
||||
#-I /data/simulated/sim1M_pairs_final.bam \
|
||||
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
#-I /data/broad/samples/joint_variant_calling/NA12878_high_coverage_alignment/NA12878.mapped.ILLUMINA.bwa.CEU.high_coverage_pcr_free.20130906.bam \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly18.fasta \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/ucsc.hg19.fasta \
|
||||
#-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/human_g1k_v37_decoy.fasta \
|
||||
#--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||
#--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/dbsnp_138.hg19.vcf \
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifdef PRECISION
|
||||
|
||||
#ifdef SIMD_ENGINE_AVX
|
||||
|
||||
inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
||||
{
|
||||
IF_128 xlow , xhigh;
|
||||
/* cast x to xlow */
|
||||
xlow.f = VEC_CAST_256_128(x.d);
|
||||
/* extract x,1 to xhigh */
|
||||
xhigh.f = VEC_EXTRACT_128(x.d, 1);
|
||||
/* extract xlow[3] */
|
||||
IF_128 shiftOutL128;
|
||||
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
||||
/* extract xhigh[3] */
|
||||
IF_MAIN_TYPE shiftOutH;
|
||||
shiftOutH.i = VEC_EXTRACT_UNIT(xhigh.i, SHIFT_CONST2);
|
||||
shiftOut = shiftOutH.f;
|
||||
/* shift xlow */
|
||||
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
||||
/* shift xhigh */
|
||||
xhigh.i = _mm_slli_si128 (xhigh.i, SHIFT_CONST3);
|
||||
/*movss shiftIn to xlow[0] */
|
||||
_128_TYPE shiftIn128 = VEC_SET1_VAL128(shiftIn);
|
||||
xlow.f = VEC_MOVE(xlow.f , shiftIn128);
|
||||
/*movss xlow[3] to xhigh[0] */
|
||||
xhigh.f = VEC_MOVE(xhigh.f, shiftOutL128.f);
|
||||
/* cast xlow to x */
|
||||
x.d = VEC_CAST_128_256(xlow.f);
|
||||
/* insert xhigh to x,1 */
|
||||
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
||||
}
|
||||
|
||||
|
||||
inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
||||
{
|
||||
IF_128 xlow , xhigh;
|
||||
/* cast x to xlow */
|
||||
xlow.f = VEC_CAST_256_128(x.d);
|
||||
/* extract x,1 to xhigh */
|
||||
xhigh.f = VEC_EXTRACT_128(x.d, 1);
|
||||
/* extract xlow[3] */
|
||||
IF_128 shiftOutL128;
|
||||
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
||||
/* shift xlow */
|
||||
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
||||
/* shift xhigh */
|
||||
xhigh.i = _mm_slli_si128 (xhigh.i, SHIFT_CONST3);
|
||||
/*movss shiftIn to xlow[0] */
|
||||
_128_TYPE shiftIn128 = VEC_SET1_VAL128(shiftIn);
|
||||
xlow.f = VEC_MOVE(xlow.f , shiftIn128);
|
||||
/*movss xlow[3] to xhigh[0] */
|
||||
xhigh.f = VEC_MOVE(xhigh.f, shiftOutL128.f);
|
||||
/* cast xlow to x */
|
||||
x.d = VEC_CAST_128_256(xlow.f);
|
||||
/* insert xhigh to x,1 */
|
||||
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_ENGINE_SSE
|
||||
|
||||
inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
||||
{
|
||||
IF_MAIN_TYPE tempIn, tempOut;
|
||||
tempIn.f = shiftIn;
|
||||
/* extratc H */
|
||||
tempOut.i = VEC_EXTRACT_UNIT(x.i, SHIFT_CONST1);
|
||||
shiftOut = tempOut.f;
|
||||
/* shift */
|
||||
x.i = _mm_slli_si128(x.i, SHIFT_CONST2);
|
||||
/* insert L */
|
||||
x.i = VEC_INSERT_UNIT(x.i , tempIn.i, SHIFT_CONST3);
|
||||
}
|
||||
|
||||
inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
||||
{
|
||||
IF_MAIN_TYPE temp; temp.f = shiftIn;
|
||||
/* shift */
|
||||
x.i = _mm_slli_si128(x.i, SHIFT_CONST2);
|
||||
/* insert L */
|
||||
x.i = VEC_INSERT_UNIT(x.i , temp.i, SHIFT_CONST3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
|
||||
#define SIMD_ENGINE sse
|
||||
#define SIMD_ENGINE_SSE
|
||||
|
||||
#include "define-sse-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "define-sse-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
template double compute_full_prob_ssed<double>(testcase* tc, double* nextlog);
|
||||
template float compute_full_prob_sses<float>(testcase* tc, float* nextlog);
|
||||
|
|
@ -0,0 +1,320 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef TEMPLATES_H_
|
||||
#define TEMPLATES_H_
|
||||
|
||||
#include "headers.h"
|
||||
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
//#define MROWS 500
|
||||
//#define MCOLS 1000
|
||||
|
||||
#define CAT(X,Y) X####Y
|
||||
#define CONCAT(X,Y) CAT(X,Y)
|
||||
|
||||
#define ALIGNED __attribute__((aligned(32)))
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m256 ALIGNED d;
|
||||
ALIGNED __m128i ALIGNED s[2];
|
||||
ALIGNED float ALIGNED f[8];
|
||||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_F ALIGNED;
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m128 ALIGNED d;
|
||||
ALIGNED __m64 ALIGNED s[2];
|
||||
ALIGNED float ALIGNED f[4];
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
} ALIGNED mix_F128 ALIGNED;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m128i vec ;
|
||||
__m128 vecf ;
|
||||
uint32_t masks[4] ;
|
||||
} MaskVec_F ;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m64 vec ;
|
||||
__m64 vecf ;
|
||||
uint32_t masks[2] ;
|
||||
} MaskVec_F128 ;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
ALIGNED __m128 ALIGNED f;
|
||||
} ALIGNED IF_128f ALIGNED;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED int ALIGNED i;
|
||||
ALIGNED float ALIGNED f;
|
||||
} ALIGNED IF_32 ALIGNED;
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m256d ALIGNED d;
|
||||
ALIGNED __m128i ALIGNED s[2];
|
||||
ALIGNED double ALIGNED f[4];
|
||||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_D ALIGNED;
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m128d ALIGNED d;
|
||||
ALIGNED __m64 ALIGNED s[2];
|
||||
ALIGNED double ALIGNED f[2];
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
} ALIGNED mix_D128 ALIGNED;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m128i vec ;
|
||||
__m128d vecf ;
|
||||
uint64_t masks[2] ;
|
||||
} MaskVec_D ;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m64 vec ;
|
||||
__m64 vecf ;
|
||||
uint64_t masks[1] ;
|
||||
} MaskVec_D128 ;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
ALIGNED __m128d ALIGNED f;
|
||||
} ALIGNED IF_128d ALIGNED;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED int64_t ALIGNED i;
|
||||
ALIGNED double ALIGNED f;
|
||||
} ALIGNED IF_64 ALIGNED;
|
||||
|
||||
|
||||
#define MAX_QUAL 254
|
||||
#define MAX_JACOBIAN_TOLERANCE 8.0
|
||||
#define JACOBIAN_LOG_TABLE_STEP 0.0001
|
||||
#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP)
|
||||
#define MAXN 70000
|
||||
#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients
|
||||
#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1)
|
||||
|
||||
template<class NUMBER>
|
||||
struct ContextBase
|
||||
{
|
||||
public:
|
||||
NUMBER ph2pr[128];
|
||||
NUMBER INITIAL_CONSTANT;
|
||||
NUMBER LOG10_INITIAL_CONSTANT;
|
||||
NUMBER RESULT_THRESHOLD;
|
||||
|
||||
static bool staticMembersInitializedFlag;
|
||||
static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
|
||||
static void initializeStaticMembers()
|
||||
{
|
||||
if(!staticMembersInitializedFlag)
|
||||
{
|
||||
//Order of calls important - Jacobian first, then MatchToMatch
|
||||
initializeJacobianLogTable();
|
||||
initializeMatchToMatchProb();
|
||||
staticMembersInitializedFlag = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void deleteStaticMembers()
|
||||
{
|
||||
if(staticMembersInitializedFlag)
|
||||
{
|
||||
staticMembersInitializedFlag = false;
|
||||
}
|
||||
}
|
||||
|
||||
//Called only once during library load - don't bother to optimize with single precision fp
|
||||
static void initializeJacobianLogTable()
|
||||
{
|
||||
for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) {
|
||||
jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP)));
|
||||
}
|
||||
}
|
||||
|
||||
//Called only once per library load - don't bother optimizing with single fp
|
||||
static void initializeMatchToMatchProb()
|
||||
{
|
||||
double LN10 = log(10);
|
||||
double INV_LN10 = 1.0/LN10;
|
||||
for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i)
|
||||
for (int j = 0; j <= i; j++) {
|
||||
double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j);
|
||||
double matchToMatchLog10 =
|
||||
log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10;
|
||||
matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10));
|
||||
}
|
||||
}
|
||||
//Called during computation - use single precision where possible
|
||||
static int fastRound(NUMBER d) {
|
||||
return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5));
|
||||
}
|
||||
//Called during computation - use single precision where possible
|
||||
static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) {
|
||||
// make sure small is really the smaller value
|
||||
if (small > big) {
|
||||
NUMBER t = big;
|
||||
big = small;
|
||||
small = t;
|
||||
}
|
||||
|
||||
if (isinf(small) == -1 || isinf(big) == -1)
|
||||
return big;
|
||||
|
||||
NUMBER diff = big - small;
|
||||
if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE))
|
||||
return big;
|
||||
|
||||
// OK, so |y-x| < tol: we use the following identity then:
|
||||
// we need to compute log10(10^x + 10^y)
|
||||
// By Jacobian logarithm identity, this is equal to
|
||||
// max(x,y) + log10(1+10^-abs(x-y))
|
||||
// we compute the second term as a table lookup with integer quantization
|
||||
// we have pre-stored correction for 0,0.1,0.2,... 10.0
|
||||
int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding
|
||||
return big + jacobianLogTable[ind];
|
||||
}
|
||||
};
|
||||
|
||||
template<class NUMBER>
|
||||
struct Context : public ContextBase<NUMBER>
|
||||
{};
|
||||
|
||||
template<>
|
||||
struct Context<double> : public ContextBase<double>
|
||||
{
|
||||
Context():ContextBase<double>()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
ph2pr[x] = pow(10.0, -((double)x) / 10.0);
|
||||
|
||||
INITIAL_CONSTANT = ldexp(1.0, 1020.0);
|
||||
LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = 0.0;
|
||||
}
|
||||
|
||||
double LOG10(double v){ return log10(v); }
|
||||
inline double POW(double b, double e) { return pow(b,e); }
|
||||
|
||||
static double _(double n){ return n; }
|
||||
static double _(float n){ return ((double) n); }
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Context<float> : public ContextBase<float>
|
||||
{
|
||||
Context() : ContextBase<float>()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
{
|
||||
ph2pr[x] = powf(10.f, -((float)x) / 10.f);
|
||||
}
|
||||
|
||||
INITIAL_CONSTANT = ldexpf(1.f, 120.f);
|
||||
LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = ldexpf(1.f, -110.f);
|
||||
}
|
||||
|
||||
float LOG10(float v){ return log10f(v); }
|
||||
inline float POW(float b, float e) { return powf(b,e); }
|
||||
|
||||
static float _(double n){ return ((float) n); }
|
||||
static float _(float n){ return n; }
|
||||
};
|
||||
|
||||
#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \
|
||||
{ \
|
||||
int minQual = delQual; \
|
||||
int maxQual = insQual; \
|
||||
if (insQual <= delQual) \
|
||||
{ \
|
||||
minQual = insQual; \
|
||||
maxQual = delQual; \
|
||||
} \
|
||||
(output) = (MAX_QUAL < maxQual) ? \
|
||||
((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \
|
||||
: ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rslen, haplen;
|
||||
/*int *q, *i, *d, *c;*/
|
||||
/*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/
|
||||
char *q, *i, *d, *c;
|
||||
char *hap, *rs;
|
||||
int *ihap;
|
||||
int *irs;
|
||||
} testcase;
|
||||
|
||||
int normalize(char c);
|
||||
int read_testcase(testcase *tc, FILE* ifp=0);
|
||||
|
||||
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
class ConvertChar {
|
||||
|
||||
static uint8_t conversionTable[255] ;
|
||||
|
||||
public:
|
||||
|
||||
static void init() {
|
||||
assert (NUM_DISTINCT_CHARS == 5) ;
|
||||
assert (AMBIG_CHAR == 4) ;
|
||||
|
||||
conversionTable['A'] = 0 ;
|
||||
conversionTable['C'] = 1 ;
|
||||
conversionTable['T'] = 2 ;
|
||||
conversionTable['G'] = 3 ;
|
||||
conversionTable['N'] = 4 ;
|
||||
}
|
||||
|
||||
static inline uint8_t get(uint8_t input) {
|
||||
return conversionTable[input] ;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,496 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "vector_defs.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
using namespace std;
|
||||
|
||||
//static members from ConvertChar
|
||||
uint8_t ConvertChar::conversionTable[255];
|
||||
//Global function pointers in utils.h
|
||||
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
||||
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
|
||||
//Static members in ContextBase
|
||||
bool ContextBase<double>::staticMembersInitializedFlag = false;
|
||||
double ContextBase<double>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
double ContextBase<double>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
bool ContextBase<float>::staticMembersInitializedFlag = false;
|
||||
float ContextBase<float>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE];
|
||||
float ContextBase<float>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1];
|
||||
|
||||
|
||||
bool is_avx_supported()
|
||||
{
|
||||
return (_may_i_use_cpu_feature(_FEATURE_AVX) > 0);
|
||||
//int ecx = 0, edx = 0, ebx = 0;
|
||||
//__asm__("cpuid"
|
||||
//: "=b" (ebx),
|
||||
//"=c" (ecx),
|
||||
//"=d" (edx)
|
||||
//: "a" (1)
|
||||
//);
|
||||
//return ((ecx >> 28)&1) == 1;
|
||||
}
|
||||
|
||||
bool is_sse41_supported()
|
||||
{
|
||||
return (_may_i_use_cpu_feature(_FEATURE_SSE4_1) > 0);
|
||||
//int ecx = 0, edx = 0, ebx = 0;
|
||||
//__asm__("cpuid"
|
||||
//: "=b" (ebx),
|
||||
//"=c" (ecx),
|
||||
//"=d" (edx)
|
||||
//: "a" (1)
|
||||
//);
|
||||
//return ((ecx >> 19)&1) == 1;
|
||||
}
|
||||
|
||||
bool is_sse42_supported()
|
||||
{
|
||||
return (_may_i_use_cpu_feature(_FEATURE_SSE4_2) > 0);
|
||||
//int ecx = 0, edx = 0, ebx = 0;
|
||||
//__asm__("cpuid"
|
||||
//: "=b" (ebx),
|
||||
//"=c" (ecx),
|
||||
//"=d" (edx)
|
||||
//: "a" (1)
|
||||
//);
|
||||
//return ((ecx >> 20)&1) == 1;
|
||||
}
|
||||
|
||||
uint64_t get_machine_capabilities()
|
||||
{
|
||||
uint64_t machine_mask = 0ull;
|
||||
if(is_avx_supported())
|
||||
machine_mask |= (1 << AVX_CUSTOM_IDX);
|
||||
if(is_sse42_supported())
|
||||
machine_mask |= (1 << SSE42_CUSTOM_IDX);
|
||||
if(is_sse41_supported())
|
||||
machine_mask |= (1 << SSE41_CUSTOM_IDX);
|
||||
return machine_mask;
|
||||
}
|
||||
|
||||
void initialize_function_pointers(uint64_t mask)
|
||||
{
|
||||
//mask = 0ull;
|
||||
//mask = (1 << SSE41_CUSTOM_IDX);
|
||||
if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX)))
|
||||
{
|
||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
||||
g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
||||
}
|
||||
else
|
||||
if(is_sse41_supported() && (mask & ((1<< SSE41_CUSTOM_IDX) | (1<<SSE42_CUSTOM_IDX))))
|
||||
{
|
||||
cout << "Using SSE4.1 accelerated implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob_sses<float>;
|
||||
g_compute_full_prob_double = compute_full_prob_ssed<double>;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Using un-vectorized C++ implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob<float>;
|
||||
g_compute_full_prob_double = compute_full_prob<double>;
|
||||
}
|
||||
}
|
||||
|
||||
int normalize(char c)
|
||||
{
|
||||
return ((int) (c - 33));
|
||||
}
|
||||
|
||||
int read_testcase(testcase *tc, FILE* ifp)
|
||||
{
|
||||
char *q, *i, *d, *c, *line = NULL;
|
||||
int _q, _i, _d, _c;
|
||||
int x, size = 0;
|
||||
ssize_t read;
|
||||
|
||||
|
||||
read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp);
|
||||
if (read == -1)
|
||||
{
|
||||
free(line);
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
||||
tc->hap = (char *) malloc(size);
|
||||
tc->rs = (char *) malloc(size);
|
||||
q = (char *) malloc(size);
|
||||
i = (char *) malloc(size);
|
||||
d = (char *) malloc(size);
|
||||
c = (char *) malloc(size);
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
|
||||
return -1;
|
||||
|
||||
|
||||
tc->haplen = strlen(tc->hap);
|
||||
tc->rslen = strlen(tc->rs);
|
||||
assert(strlen(q) == tc->rslen);
|
||||
assert(strlen(i) == tc->rslen);
|
||||
assert(strlen(d) == tc->rslen);
|
||||
assert(strlen(c) == tc->rslen);
|
||||
//assert(tc->rslen < MROWS);
|
||||
//tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
||||
//tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
||||
|
||||
tc->q = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
tc->i = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
tc->d = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
tc->c = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
|
||||
for (x = 0; x < tc->rslen; x++)
|
||||
{
|
||||
_q = normalize(q[x]);
|
||||
_i = normalize(i[x]);
|
||||
_d = normalize(d[x]);
|
||||
_c = normalize(c[x]);
|
||||
tc->q[x] = (_q < 6) ? 6 : _q;
|
||||
//tc->q[x] = _q;
|
||||
tc->i[x] = _i;
|
||||
tc->d[x] = _d;
|
||||
tc->c[x] = _c;
|
||||
//tc->irs[x] = tc->rs[x];
|
||||
}
|
||||
//for (x = 0; x < tc->haplen; x++)
|
||||
//tc->ihap[x] = tc->hap[x];
|
||||
|
||||
free(q);
|
||||
free(i);
|
||||
free(d);
|
||||
free(c);
|
||||
free(line);
|
||||
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned MAX_LINE_LENGTH = 65536;
|
||||
int convToInt(std::string s)
|
||||
{
|
||||
int i;
|
||||
std::istringstream strin(s);
|
||||
strin >> i;
|
||||
return i;
|
||||
}
|
||||
|
||||
void tokenize(std::ifstream& fptr, std::vector<std::string>& tokens)
|
||||
{
|
||||
int i = 0;
|
||||
std::string tmp;
|
||||
std::vector<std::string> myVec;
|
||||
vector<char> line;
|
||||
line.clear();
|
||||
line.resize(MAX_LINE_LENGTH);
|
||||
vector<char> tmpline;
|
||||
tmpline.clear();
|
||||
tmpline.resize(MAX_LINE_LENGTH);
|
||||
myVec.clear();
|
||||
|
||||
while(!fptr.eof())
|
||||
{
|
||||
i = 0;
|
||||
bool still_read_line = true;
|
||||
unsigned line_position = 0;
|
||||
while(still_read_line)
|
||||
{
|
||||
fptr.getline(&(tmpline[0]), MAX_LINE_LENGTH);
|
||||
if(line_position + MAX_LINE_LENGTH > line.size())
|
||||
line.resize(2*line.size());
|
||||
for(unsigned i=0;i<MAX_LINE_LENGTH && tmpline[i] != '\0';++i,++line_position)
|
||||
line[line_position] = tmpline[i];
|
||||
if(fptr.eof() || !fptr.fail())
|
||||
{
|
||||
still_read_line = false;
|
||||
line[line_position++] = '\0';
|
||||
}
|
||||
}
|
||||
std::istringstream kap(&(line[0]));
|
||||
|
||||
while(!kap.eof())
|
||||
{
|
||||
kap >> std::skipws >> tmp;
|
||||
if(tmp != "")
|
||||
{
|
||||
myVec.push_back(tmp);
|
||||
++i;
|
||||
//std::cout <<tmp <<"#";
|
||||
}
|
||||
tmp = "";
|
||||
}
|
||||
//std::cout << "\n";
|
||||
if(myVec.size() > 0)
|
||||
break;
|
||||
}
|
||||
tokens.clear();
|
||||
//std::cout << "Why "<<myVec.size()<<"\n";
|
||||
tokens.resize(myVec.size());
|
||||
for(i=0;i<(int)myVec.size();++i)
|
||||
tokens[i] = myVec[i];
|
||||
line.clear();
|
||||
tmpline.clear();
|
||||
}
|
||||
|
||||
int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
||||
{
|
||||
static bool first_call = true;
|
||||
vector<string> tokens;
|
||||
tokens.clear();
|
||||
tokenize(fptr, tokens);
|
||||
if(tokens.size() == 0)
|
||||
return -1;
|
||||
tc->hap = new char[tokens[0].size()+2];
|
||||
tc->haplen = tokens[0].size();
|
||||
memcpy(tc->hap, tokens[0].c_str(), tokens[0].size());
|
||||
tc->rs = new char[tokens[1].size()+2];
|
||||
tc->rslen = tokens[1].size();
|
||||
tc->q = new char[tc->rslen];
|
||||
tc->i = new char[tc->rslen];
|
||||
tc->d = new char[tc->rslen];
|
||||
tc->c = new char[tc->rslen];
|
||||
//cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
|
||||
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
|
||||
assert(tokens.size() == 2 + 4*(tc->rslen));
|
||||
//assert(tc->rslen < MROWS);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]);
|
||||
|
||||
if(reformat)
|
||||
{
|
||||
ofstream ofptr;
|
||||
ofptr.open("reformat/debug_dump.txt",first_call ? ios::out : ios::app);
|
||||
assert(ofptr.is_open());
|
||||
ofptr << tokens[0] << " ";
|
||||
ofptr << tokens[1] << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->q[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->i[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->d[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->c[j]+33));
|
||||
ofptr << " 0 false\n";
|
||||
|
||||
ofptr.close();
|
||||
first_call = false;
|
||||
}
|
||||
|
||||
|
||||
return tokens.size();
|
||||
}
|
||||
|
||||
double getCurrClk() {
|
||||
struct timeval tv ;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
inline unsigned long long rdtsc(void)
|
||||
{
|
||||
unsigned hi, lo;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
|
||||
}
|
||||
|
||||
void get_time(struct timespec* store_struct)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, store_struct);
|
||||
}
|
||||
|
||||
uint64_t diff_time(struct timespec& prev_time)
|
||||
{
|
||||
struct timespec curr_time;
|
||||
clock_gettime(CLOCK_REALTIME, &curr_time);
|
||||
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_PAPI
|
||||
#include "papi.h"
|
||||
#define NUM_PAPI_COUNTERS 4
|
||||
#endif
|
||||
|
||||
void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, bool do_check)
|
||||
{
|
||||
FILE* fptr = 0;
|
||||
ifstream ifptr;
|
||||
if(use_old_read_testcase)
|
||||
{
|
||||
fptr = fopen(filename,"r");
|
||||
assert(fptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
ifptr.open(filename);
|
||||
assert(ifptr.is_open());
|
||||
}
|
||||
vector<testcase> tc_vector;
|
||||
tc_vector.clear();
|
||||
testcase tc;
|
||||
uint64_t vector_compute_time = 0;
|
||||
uint64_t baseline_compute_time = 0;
|
||||
uint64_t num_double_calls = 0;
|
||||
unsigned num_testcases = 0;
|
||||
bool all_ok = do_check ? true : false;
|
||||
#ifdef USE_PAPI
|
||||
uint32_t all_mask = (0);
|
||||
uint32_t no_usr_mask = (1 << 16); //bit 16 user mode, bit 17 kernel mode
|
||||
uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode
|
||||
PAPI_num_counters();
|
||||
int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "itlb_walk_cycles", "dtlb_load_walk_cycles", "dtlb_store_walk_cycles" };
|
||||
assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("ITLB_MISSES:WALK_DURATION", &(events[1])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("DTLB_LOAD_MISSES:WALK_DURATION", &(events[2])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("DTLB_STORE_MISSES:WALK_DURATION", &(events[3])) == PAPI_OK);
|
||||
long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
#endif
|
||||
while(1)
|
||||
{
|
||||
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
||||
if(break_value >= 0)
|
||||
tc_vector.push_back(tc);
|
||||
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
|
||||
{
|
||||
vector<double> results_vec;
|
||||
vector<double> baseline_results_vec;
|
||||
results_vec.clear();
|
||||
baseline_results_vec.clear();
|
||||
results_vec.resize(tc_vector.size());
|
||||
baseline_results_vec.resize(tc_vector.size());
|
||||
struct timespec start_time;
|
||||
#ifdef USE_PAPI
|
||||
assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||
#endif
|
||||
get_time(&start_time);
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||
#ifdef DO_REPEAT_PROFILING
|
||||
for(unsigned z=0;z<10;++z)
|
||||
#endif
|
||||
{
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
++num_double_calls;
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
||||
#endif
|
||||
results_vec[i] = result;
|
||||
}
|
||||
}
|
||||
#ifdef USE_PAPI
|
||||
assert(PAPI_stop_counters(values, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||
#endif
|
||||
vector_compute_time += diff_time(start_time);
|
||||
#ifdef USE_PAPI
|
||||
for(unsigned k=0;k<NUM_PAPI_COUNTERS;++k)
|
||||
accum_values[k] += values[k];
|
||||
#endif
|
||||
num_testcases += tc_vector.size();
|
||||
if(do_check)
|
||||
{
|
||||
get_time(&start_time);
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
double baseline_result = compute_full_prob<double>(&tc);
|
||||
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
|
||||
baseline_results_vec[i] = baseline_result;
|
||||
}
|
||||
baseline_compute_time += diff_time(start_time);
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
double baseline_result = baseline_results_vec[i];
|
||||
double abs_error = fabs(baseline_result-results_vec[i]);
|
||||
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
|
||||
if(abs_error > 1e-5 && rel_error > 1e-5)
|
||||
{
|
||||
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
|
||||
all_ok = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
delete[] tc_vector[i].rs;
|
||||
delete[] tc_vector[i].hap;
|
||||
delete[] tc_vector[i].q;
|
||||
delete[] tc_vector[i].i;
|
||||
delete[] tc_vector[i].d;
|
||||
delete[] tc_vector[i].c;
|
||||
}
|
||||
results_vec.clear();
|
||||
tc_vector.clear();
|
||||
}
|
||||
if(break_value < 0)
|
||||
break;
|
||||
}
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_close();
|
||||
#endif
|
||||
if(all_ok)
|
||||
{
|
||||
cout << "All output values within acceptable error\n";
|
||||
cout << "Baseline double precision compute time "<<baseline_compute_time*1e-9<<"\n";
|
||||
}
|
||||
cout << "Num testcase "<<num_testcases<< " num double invocations "<<num_double_calls<<"\n";
|
||||
cout << "Vector compute time "<< vector_compute_time*1e-9 << "\n";
|
||||
#ifdef USE_PAPI
|
||||
for(unsigned i=0;i<NUM_PAPI_COUNTERS;++i)
|
||||
cout << eventnames[i] << " : "<<accum_values[i]<<"\n";
|
||||
#endif
|
||||
|
||||
if(use_old_read_testcase)
|
||||
fclose(fptr);
|
||||
else
|
||||
ifptr.close();
|
||||
}
|
||||
|
|
@ -0,0 +1,75 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef PAIRHMM_UTIL_H
|
||||
#define PAIRHMM_UTIL_H
|
||||
|
||||
#include "template.h"
|
||||
|
||||
template<class T>
|
||||
std::string to_string(T obj)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::string ret_string;
|
||||
ss.clear();
|
||||
ss << std::scientific << obj;
|
||||
ss >> ret_string;
|
||||
ss.clear();
|
||||
return ret_string;
|
||||
}
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
|
||||
int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false);
|
||||
|
||||
bool is_avx_supported();
|
||||
bool is_sse42_supported();
|
||||
extern float (*g_compute_full_prob_float)(testcase *tc, float *before_last_log);
|
||||
extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log);
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
|
||||
double getCurrClk();
|
||||
void get_time(struct timespec* x);
|
||||
uint64_t diff_time(struct timespec& prev_time);
|
||||
|
||||
//bit 0 is sse4.2, bit 1 is AVX
|
||||
enum ProcessorCapabilitiesEnum
|
||||
{
|
||||
SSE41_CUSTOM_IDX=0,
|
||||
SSE42_CUSTOM_IDX,
|
||||
AVX_CUSTOM_IDX
|
||||
};
|
||||
#define ENABLE_ALL_HARDWARE_FEATURES 0xFFFFFFFFFFFFFFFFull
|
||||
uint64_t get_machine_capabilities();
|
||||
void initialize_function_pointers(uint64_t mask=ENABLE_ALL_HARDWARE_FEATURES);
|
||||
void do_compute(char* filename, bool use_old_read_testcase=true, unsigned chunk_size=10000, bool do_check=true);
|
||||
|
||||
//#define DO_WARMUP
|
||||
//#define DO_REPEAT_PROFILING
|
||||
/*#define DUMP_COMPUTE_VALUES 1*/
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
#undef SIMD_ENGINE_SSE
|
||||
|
||||
#define SIMD_ENGINE avx
|
||||
#define SIMD_ENGINE_AVX
|
||||
|
||||
#include "define-float.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
|
||||
#define SIMD_ENGINE sse
|
||||
#define SIMD_ENGINE_SSE
|
||||
|
||||
|
||||
#include "define-sse-float.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#include "define-sse-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#undef SIMD_ENGINE
|
||||
#undef SIMD_ENGINE_AVX
|
||||
#undef SIMD_ENGINE_SSE
|
||||
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*Copyright (c) 2012 The Broad Institute
|
||||
|
||||
*Permission is hereby granted, free of charge, to any person
|
||||
*obtaining a copy of this software and associated documentation
|
||||
*files (the "Software"), to deal in the Software without
|
||||
*restriction, including without limitation the rights to use,
|
||||
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
*copies of the Software, and to permit persons to whom the
|
||||
*Software is furnished to do so, subject to the following
|
||||
*conditions:
|
||||
|
||||
*The above copyright notice and this permission notice shall be
|
||||
*included in all copies or substantial portions of the Software.
|
||||
|
||||
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
inline void CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
inline void CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
inline void CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
inline void CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, SIMD_TYPE& distm, SIMD_TYPE& _1_distm, SIMD_TYPE& distmChosen);
|
||||
template<class NUMBER> inline void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, SIMD_TYPE *p_MM, SIMD_TYPE *p_GAPM, SIMD_TYPE *p_MX, SIMD_TYPE *p_XX, SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, SIMD_TYPE *distm1D);
|
||||
template<class NUMBER> inline void CONCAT(CONCAT(stripINITIALIZATION,SIMD_ENGINE), PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, SIMD_TYPE &pGAPM, SIMD_TYPE &pMM, SIMD_TYPE &pMX, SIMD_TYPE &pXX, SIMD_TYPE &pMY, SIMD_TYPE &pYY,
|
||||
SIMD_TYPE &rs, UNION_TYPE &rsN, SIMD_TYPE &distm, SIMD_TYPE &_1_distm, SIMD_TYPE *distm1D, SIMD_TYPE N_packed256, SIMD_TYPE *p_MM , SIMD_TYPE *p_GAPM ,
|
||||
SIMD_TYPE *p_MX, SIMD_TYPE *p_XX , SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
|
||||
inline SIMD_TYPE CONCAT(CONCAT(computeDISTM,SIMD_ENGINE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, SIMD_TYPE rs, UNION_TYPE rsN, SIMD_TYPE N_packed256,
|
||||
SIMD_TYPE distm, SIMD_TYPE _1_distm);
|
||||
inline void CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
SIMD_TYPE pMM, SIMD_TYPE pGAPM, SIMD_TYPE pMX, SIMD_TYPE pXX, SIMD_TYPE pMY, SIMD_TYPE pYY, SIMD_TYPE distmSel);
|
||||
template<class NUMBER> NUMBER CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);
|
||||
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
<name>GATK External Example</name>
|
||||
|
||||
<properties>
|
||||
<sting.version>3.0</sting.version>
|
||||
<sting.version>3.1</sting.version>
|
||||
<!--
|
||||
sting.basedir property must point to your checkout of Sting/GATK until we can get all the
|
||||
dependencies out of the committed sting repo and into central.
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
<parent>
|
||||
<groupId>org.broadinstitute.sting</groupId>
|
||||
<artifactId>sting-aggregator</artifactId>
|
||||
<version>3.0</version>
|
||||
<version>3.1</version>
|
||||
<relativePath>../..</relativePath>
|
||||
</parent>
|
||||
|
||||
|
|
@ -86,7 +86,7 @@
|
|||
<id>package-knowledgebasetests</id>
|
||||
</execution>
|
||||
<execution>
|
||||
<id>package-pipelinetests</id>
|
||||
<id>package-queuetests</id>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
|
|
|
|||
|
|
@ -337,15 +337,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "globalQScorePrior", shortName = "globalQScorePrior", doc = "Global Qscore Bayesian prior to use for BQSR", required = false)
|
||||
public double globalQScorePrior = -1.0;
|
||||
|
||||
/**
|
||||
* It is absolutely not recommended practice to run base quality score recalibration on BAM files that have been
|
||||
* processed with ReduceReads. By default, the GATK will error out if it detects that you are trying to recalibrate
|
||||
* a reduced BAM file. However, this flag allows you to disable the warning and proceed anyway. For the sake of your
|
||||
* data, please only use this option if you really know what you are doing.
|
||||
*/
|
||||
@Advanced
|
||||
@Argument(fullName = "allow_bqsr_on_reduced_bams_despite_repeated_warnings", shortName="allowBqsrOnReducedBams", doc="Ignore all warnings about how it's a really bad idea to run BQSR on a reduced BAM file (AT YOUR OWN RISK!)", required = false)
|
||||
public boolean ALLOW_BQSR_ON_REDUCED_BAMS = false;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.io.storage;
|
|||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import net.sf.samtools.util.ProgressLoggerInterface;
|
||||
import net.sf.samtools.util.RuntimeIOException;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterStub;
|
||||
|
|
@ -149,4 +150,8 @@ public class SAMFileWriterStorage implements SAMFileWriter, Storage<SAMFileWrite
|
|||
return writer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setProgressLogger(final ProgressLoggerInterface logger) {
|
||||
writer.setProgressLogger(logger);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.io.stubs;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.ProgressLoggerInterface;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
|
|
@ -313,4 +314,12 @@ public class SAMFileWriterStub implements Stub<SAMFileWriter>, StingSAMFileWrite
|
|||
public void close() {
|
||||
outputTracker.getStorage(this).close();
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws java.lang.UnsupportedOperationException No progress logging in this implementation.
|
||||
*/
|
||||
@Override
|
||||
public void setProgressLogger(final ProgressLoggerInterface logger) {
|
||||
throw new UnsupportedOperationException("Progress logging not supported");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -205,7 +205,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
|
|||
|
||||
if ( doNotWriteGenotypes ) options.add(Options.DO_NOT_WRITE_GENOTYPES);
|
||||
if ( engine.lenientVCFProcessing() ) options.add(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
|
||||
if ( indexOnTheFly && ! isCompressed() ) options.add(Options.INDEX_ON_THE_FLY);
|
||||
if ( indexOnTheFly) options.add(Options.INDEX_ON_THE_FLY);
|
||||
|
||||
if ( forceBCF || (getOutputFile() != null && VariantContextWriterFactory.isBCFOutput(getOutputFile())) )
|
||||
options.add(Options.FORCE_BCF);
|
||||
|
|
|
|||
|
|
@ -29,10 +29,11 @@ import net.sf.samtools.SAMSequenceDictionary;
|
|||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.MutableIndex;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
|
@ -70,23 +71,29 @@ public class IndexDictionaryUtils {
|
|||
* @param dict the sequence dictionary to add contigs to
|
||||
* @return the filled-in sequence dictionary
|
||||
*/
|
||||
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
||||
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
||||
static SAMSequenceDictionary createSequenceDictionaryFromContigList(final Index index, final SAMSequenceDictionary dict) {
|
||||
final List<String> seqNames = index.getSequenceNames();
|
||||
if (seqNames == null) {
|
||||
return dict;
|
||||
}
|
||||
for (String name : seqNames) {
|
||||
for (final String name : seqNames) {
|
||||
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
||||
dict.addSequence(seq);
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the sequence dictionary of the given index. THE INDEX MUST BE MUTABLE (i.e. not Tabix).
|
||||
*
|
||||
* @param index the (mutable) index file to use
|
||||
* @param dict the dictionary to use
|
||||
*/
|
||||
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
|
||||
for ( SAMSequenceRecord seq : dict.getSequences() ) {
|
||||
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
|
||||
final String length = String.valueOf(seq.getSequenceLength());
|
||||
index.addProperty(contig,length);
|
||||
((MutableIndex)index).addProperty(contig, length);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,21 +25,29 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.fasta;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.HelpConstants;
|
||||
import org.broadinstitute.variant.variantcontext.Genotype;
|
||||
import org.broadinstitute.variant.variantcontext.VariantContext;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -88,53 +96,94 @@ import java.util.List;
|
|||
public class FastaAlternateReferenceMaker extends FastaReferenceMaker {
|
||||
|
||||
/**
|
||||
* Variants from these input files are used by this tool to construct an alternate reference.
|
||||
* Variants from this input file are used by this tool to construct an alternate reference.
|
||||
*/
|
||||
@Input(fullName = "variant", shortName = "V", doc="variants to model", required=false)
|
||||
public List<RodBinding<VariantContext>> variants = Collections.emptyList();
|
||||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
/**
|
||||
* Snps from this file are used as a mask when constructing the alternate reference.
|
||||
* Snps from this file are used as a mask (inserting N's in the sequence) when constructing the alternate reference
|
||||
* (regardless of whether they overlap a variant site).
|
||||
*/
|
||||
@Input(fullName="snpmask", shortName = "snpmask", doc="SNP mask VCF file", required=false)
|
||||
public RodBinding<VariantContext> snpmask;
|
||||
protected RodBinding<VariantContext> snpmask;
|
||||
|
||||
/**
|
||||
* This option works only for VCFs with genotypes for exactly one sample; anything else will generate an error.
|
||||
* Non-diploid (or non-called) genotypes are ignored.
|
||||
*/
|
||||
@Argument(fullName="useIUPAC", shortName="useIUPAC", doc = "If specified, heterozygous SNP sites will be output using IUPAC codes", required=false)
|
||||
protected boolean useIUPACcodes = false;
|
||||
private String iupacSample = null;
|
||||
|
||||
private int deletionBasesRemaining = 0;
|
||||
|
||||
public Pair<GenomeLoc, String> map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
@Override
|
||||
public void initialize() {
|
||||
super.initialize();
|
||||
if ( useIUPACcodes ) {
|
||||
final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
|
||||
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
|
||||
if ( samples.size() != 1 )
|
||||
throw new UserException.BadInput("the --useIUPAC option works only on VCF files with genotypes for exactly one sample, but the input file has " + samples.size() + " samples");
|
||||
iupacSample = samples.iterator().next();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Pair<GenomeLoc, String> map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
|
||||
if (deletionBasesRemaining > 0) {
|
||||
deletionBasesRemaining--;
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), "");
|
||||
return new Pair<>(context.getLocation(), "");
|
||||
}
|
||||
|
||||
String refBase = String.valueOf((char)ref.getBase());
|
||||
final String refBase = String.valueOf((char)ref.getBase());
|
||||
|
||||
// Check to see if we have a called snp
|
||||
for ( VariantContext vc : tracker.getValues(variants, ref.getLocus()) ) {
|
||||
for ( final VariantContext vc : tracker.getValues(variantCollection.variants, ref.getLocus()) ) {
|
||||
if ( vc.isFiltered() )
|
||||
continue;
|
||||
|
||||
if ( vc.isSimpleDeletion()) {
|
||||
deletionBasesRemaining = vc.getReference().length() - 1;
|
||||
// delete the next n bases, not this one
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
return new Pair<>(context.getLocation(), refBase);
|
||||
} else if ( vc.isSimpleInsertion()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
return new Pair<>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
} else if (vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), vc.getAlternateAllele(0).toString());
|
||||
final String base = useIUPACcodes ? getIUPACbase(vc.getGenotype(iupacSample), refBase) : vc.getAlternateAllele(0).toString();
|
||||
return new Pair<>(context.getLocation(), base);
|
||||
}
|
||||
}
|
||||
|
||||
// if we don't have a called site, and we have a mask at this site, mask it
|
||||
for ( VariantContext vc : tracker.getValues(snpmask) ) {
|
||||
for ( final VariantContext vc : tracker.getValues(snpmask) ) {
|
||||
if ( vc.isSNP()) {
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), "N");
|
||||
return new Pair<>(context.getLocation(), "N");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// if we got here then we're just ref
|
||||
return new Pair<GenomeLoc, String>(context.getLocation(), refBase);
|
||||
return new Pair<>(context.getLocation(), refBase);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the IUPAC encoding for the given genotype or the reference base if not possible
|
||||
*
|
||||
* @param genotype the genotype to encode
|
||||
* @param ref the reference base
|
||||
* @return non-null, non-empty String
|
||||
*/
|
||||
private String getIUPACbase(final Genotype genotype, final String ref) {
|
||||
if ( genotype == null )
|
||||
throw new IllegalStateException("The genotype is null for sample " + iupacSample);
|
||||
|
||||
if ( !genotype.isHet() )
|
||||
return genotype.isHom() ? genotype.getAllele(0).getBaseString() : ref;
|
||||
|
||||
final byte allele1 = genotype.getAllele(0).getBases()[0];
|
||||
final byte allele2 = genotype.getAllele(1).getBases()[0];
|
||||
return new String(new byte[] {BaseUtils.basesToIUPAC(allele1, allele2)});
|
||||
}
|
||||
}
|
||||
|
|
@ -58,10 +58,10 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
@ArgumentCollection
|
||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||
|
||||
@Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=true)
|
||||
@Output(fullName="out1", shortName="o1", doc="File #1 to which variants should be written", required=false, exclusiveOf = "splitToManyFiles")
|
||||
protected VariantContextWriter vcfWriter1 = null;
|
||||
|
||||
@Output(fullName="out2", shortName="o2", doc="File #2 to which variants should be written", required=true)
|
||||
@Output(fullName="out2", shortName="o2", doc="File #2 to which variants should be written", required=false, exclusiveOf = "splitToManyFiles")
|
||||
// there's a reported bug in the GATK where we can't have 2 @Output writers
|
||||
protected File file2 = null;
|
||||
protected VariantContextWriter vcfWriter2 = null;
|
||||
|
|
@ -69,6 +69,17 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
@Argument(fullName="fractionToOut1", shortName="fraction", doc="Fraction of records to be placed in out1 (must be 0 >= fraction <= 1); all other records are placed in out2", required=false)
|
||||
protected double fraction = 0.5;
|
||||
|
||||
@Argument(fullName="splitToManyFiles", shortName = "splitToMany", doc="split (with uniform distribution) to more than 2 files. numOfFiles and baseOutputName parameters are required", required = false)
|
||||
protected boolean splitToMany = false;
|
||||
|
||||
@Argument(fullName = "numOfOutputVCFFiles", shortName = "N", doc = "number of output VCF files. Only works with SplitToMany = true", required = false, maxRecommendedValue = 20, minValue = 2)
|
||||
protected int numOfFiles = -1;
|
||||
|
||||
@Argument(fullName = "prefixForAllOutputFileNames", shortName = "baseOutputName", doc = "the name of the output VCF file will be: <baseOutputName>.split.<number>.vcf. Required with SplitToMany option", required = false)
|
||||
protected String baseFileName = null;
|
||||
|
||||
private VariantContextWriter[] writers = null;
|
||||
|
||||
/**
|
||||
* Set up the VCF writer, the sample expressions and regexs, and the JEXL matcher
|
||||
*/
|
||||
|
|
@ -76,15 +87,37 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
if ( fraction < 0.0 || fraction > 1.0 )
|
||||
throw new UserException.BadArgumentValue("fractionToOut1", "this value needs to be a number between 0 and 1");
|
||||
|
||||
if (splitToMany){
|
||||
if (numOfFiles < 2)
|
||||
throw new UserException.BadArgumentValue("numOfFiles", "this value must be greater than 2 when using the splitToMany option");
|
||||
if (baseFileName == null)
|
||||
throw new UserException.BadArgumentValue("baseFileName", "this value cannot be null (unprovided) when using the splitToMany option");
|
||||
}
|
||||
else{
|
||||
if(vcfWriter1 == null || vcfWriter2 == null)
|
||||
throw new UserException.BadArgumentValue("out1 or out2", "this value cannot be null (unprovided) unless you are using the splitToMany option");
|
||||
}
|
||||
|
||||
// setup the header info
|
||||
final List<String> inputNames = Arrays.asList(variantCollection.variants.getName());
|
||||
Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames);
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames);
|
||||
final Set<VCFHeaderLine> hInfo = new HashSet<>();
|
||||
hInfo.addAll(GATKVCFUtils.getHeaderFields(getToolkit(), inputNames));
|
||||
|
||||
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
|
||||
vcfWriter2 = VariantContextWriterFactory.create(file2, getMasterSequenceDictionary());
|
||||
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
|
||||
|
||||
if(splitToMany){
|
||||
writers = new VariantContextWriter[numOfFiles];
|
||||
for(int i = 0; i<writers.length; i++){
|
||||
writers[i] = VariantContextWriterFactory.create(new File(baseFileName+".split."+i+".vcf"), getMasterSequenceDictionary());
|
||||
writers[i].writeHeader(new VCFHeader(hInfo,samples));
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
vcfWriter1.writeHeader(new VCFHeader(hInfo, samples));
|
||||
vcfWriter2 = VariantContextWriterFactory.create(file2, getMasterSequenceDictionary());
|
||||
vcfWriter2.writeHeader(new VCFHeader(hInfo, samples));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -95,17 +128,23 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
* @param context alignment info
|
||||
* @return 1 if the record was printed to the output file, 0 if otherwise
|
||||
*/
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
public Integer map(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context) {
|
||||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
for ( VariantContext vc : vcs ) {
|
||||
double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||
if ( random < fraction )
|
||||
vcfWriter1.add(vc);
|
||||
else
|
||||
vcfWriter2.add(vc);
|
||||
final Collection<VariantContext> vcs = tracker.getValues(variantCollection.variants, context.getLocation());
|
||||
for ( final VariantContext vc : vcs ) {
|
||||
final double random = GenomeAnalysisEngine.getRandomGenerator().nextDouble();
|
||||
if(splitToMany){
|
||||
final int index = (int)(numOfFiles * random);
|
||||
writers[index].add(vc);
|
||||
}
|
||||
else{
|
||||
if ( random < fraction )
|
||||
vcfWriter1.add(vc);
|
||||
else
|
||||
vcfWriter2.add(vc);
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
@ -113,10 +152,14 @@ public class RandomlySplitVariants extends RodWalker<Integer, Integer> {
|
|||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
public Integer reduce(final Integer value, final Integer sum) { return value + sum; }
|
||||
|
||||
public void onTraversalDone(Integer result) {
|
||||
public void onTraversalDone(final Integer result) {
|
||||
logger.info(result + " records processed.");
|
||||
vcfWriter2.close();
|
||||
if(splitToMany)
|
||||
for(final VariantContextWriter writer: writers)
|
||||
writer.close();
|
||||
else
|
||||
vcfWriter2.close();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
|
||||
/**
|
||||
* BaseUtils contains some basic utilities for manipulating nucleotides.
|
||||
|
|
@ -284,6 +285,36 @@ public class BaseUtils {
|
|||
return bases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a pair of bases to their IUPAC ambiguity code
|
||||
*
|
||||
* @param base1 1st base
|
||||
* @param base2 2nd base
|
||||
* @return byte
|
||||
*/
|
||||
static public byte basesToIUPAC(final byte base1, final byte base2) {
|
||||
// ensure that the bases come in order
|
||||
if ( base2 < base1 )
|
||||
return basesToIUPAC(base2, base1);
|
||||
|
||||
// ensure that the bases are regular ones
|
||||
if ( !isRegularBase(base1) || !isRegularBase(base2) )
|
||||
return Base.N.base;
|
||||
|
||||
// IUPAC codes are not needed if the bases are identical
|
||||
if ( basesAreEqual(base1, base2) )
|
||||
return base1;
|
||||
|
||||
if ( base1 == Base.A.base )
|
||||
return (byte)(base2 == Base.C.base ? 'M' : (base2 == Base.G.base ? 'R' : 'W'));
|
||||
|
||||
if ( base1 == Base.C.base )
|
||||
return (byte)(base2 == Base.G.base ? 'S' : 'Y');
|
||||
|
||||
// the only possibility left is G/T
|
||||
return 'K';
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a simple base to a base index
|
||||
*
|
||||
|
|
@ -559,4 +590,26 @@ public class BaseUtils {
|
|||
throw new ReviewedStingException("base must be A, C, G or T. " + (char) base + " is not a valid base.");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Lexicographical sorting of base arrays {@link Comparator}.
|
||||
*/
|
||||
public static final Comparator<byte[]> BASES_COMPARATOR = new Comparator<byte[]> (){
|
||||
|
||||
@Override
|
||||
public int compare(final byte[] o1,final byte[] o2) {
|
||||
final int minLength = Math.min(o1.length,o2.length);
|
||||
for (int i = 0; i < minLength; i++) {
|
||||
final int cmp = Byte.compare(o1[i],o2[i]);
|
||||
if (cmp != 0) return cmp;
|
||||
}
|
||||
if (o1.length == o2.length)
|
||||
return 0;
|
||||
else if (o1.length == minLength)
|
||||
return -1;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -852,4 +852,34 @@ public class Utils {
|
|||
|
||||
return lst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares sections from to byte arrays to verify whether they contain the same values.
|
||||
*
|
||||
* @param left first array to compare.
|
||||
* @param leftOffset first position of the first array to compare.
|
||||
* @param right second array to compare.
|
||||
* @param rightOffset first position of the second array to compare.
|
||||
* @param length number of positions to compare.
|
||||
*
|
||||
* @throws IllegalArgumentException if <ul>
|
||||
* <li>either {@code left} or {@code right} is {@code null} or</li>
|
||||
* <li>any off the offset or length combine point outside any of the two arrays</li>
|
||||
* </ul>
|
||||
* @return {@code true} iff {@code length} is 0 or all the bytes in both ranges are the same two-by-two.
|
||||
*/
|
||||
public static boolean equalRange(final byte[] left, final int leftOffset, byte[] right, final int rightOffset, final int length) {
|
||||
if (left == null) throw new IllegalArgumentException("left cannot be null");
|
||||
if (right == null) throw new IllegalArgumentException("right cannot be null");
|
||||
if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
|
||||
if (leftOffset < 0) throw new IllegalArgumentException("left offset cannot be negative");
|
||||
if (leftOffset + length > left.length) throw new IllegalArgumentException("length goes beyond end of left array");
|
||||
if (rightOffset < 0) throw new IllegalArgumentException("right offset cannot be negative");
|
||||
if (rightOffset + length > right.length) throw new IllegalArgumentException("length goes beyond end of right array");
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
if (left[leftOffset + i] != right[rightOffset + i])
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ public class Haplotype extends Allele {
|
|||
private EventMap eventMap = null;
|
||||
private Cigar cigar;
|
||||
private int alignmentStartHapwrtRef;
|
||||
private double score = 0;
|
||||
private double score = Double.NaN;
|
||||
|
||||
/**
|
||||
* Main constructor
|
||||
|
|
@ -301,7 +301,7 @@ public class Haplotype extends Allele {
|
|||
* @return a double, where higher values are better
|
||||
*/
|
||||
public double getScore() {
|
||||
return this.isReference() ? Double.MAX_VALUE : score;
|
||||
return score;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -312,7 +312,7 @@ public class Haplotype extends Allele {
|
|||
* @param score a double, where higher values are better
|
||||
*/
|
||||
public void setScore(double score) {
|
||||
this.score = this.isReference() ? Double.MAX_VALUE : score;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -57,6 +57,10 @@ public abstract class PairHMM {
|
|||
ORIGINAL,
|
||||
/* Optimized version of the PairHMM which caches per-read computations and operations in real space to avoid costly sums of log10'ed likelihoods */
|
||||
LOGLESS_CACHING,
|
||||
/* Optimized AVX implementation of LOGLESS_CACHING called through JNI */
|
||||
VECTOR_LOGLESS_CACHING,
|
||||
/* Debugging for vector implementation of LOGLESS_CACHING */
|
||||
DEBUG_VECTOR_LOGLESS_CACHING,
|
||||
/* Logless caching PairHMM that stores computations in 1D arrays instead of matrices, and which proceeds diagonally over the (read x haplotype) intersection matrix */
|
||||
ARRAY_LOGLESS
|
||||
}
|
||||
|
|
@ -70,6 +74,14 @@ public abstract class PairHMM {
|
|||
protected boolean doNotUseTristateCorrection = false;
|
||||
protected void doNotUseTristateCorrection() { doNotUseTristateCorrection = true; }
|
||||
|
||||
//debug array
|
||||
protected double[] mLikelihoodArray;
|
||||
|
||||
//profiling information
|
||||
protected static final boolean doProfiling = true;
|
||||
protected long computeTime = 0;
|
||||
protected long startTime = 0;
|
||||
|
||||
/**
|
||||
* Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths
|
||||
*
|
||||
|
|
@ -95,6 +107,26 @@ public abstract class PairHMM {
|
|||
initialized = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Called at the end of PairHMM for a region - mostly used by the JNI implementations
|
||||
*/
|
||||
public void finalizeRegion()
|
||||
{
|
||||
;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths
|
||||
* This function is used by the JNI implementations to transfer all data once to the native code
|
||||
* @param haplotypes the list of haplotypes
|
||||
* @param perSampleReadList map from sample name to list of reads
|
||||
* @param haplotypeMaxLength the max length of haplotypes we want to use with this PairHMM
|
||||
* @param readMaxLength the max length of reads we want to use with this PairHMM
|
||||
*/
|
||||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
initialize(readMaxLength, haplotypeMaxLength);
|
||||
}
|
||||
|
||||
protected int findMaxReadLength(final List<GATKSAMRecord> reads) {
|
||||
int listMaxReadLength = 0;
|
||||
for(GATKSAMRecord read : reads){
|
||||
|
|
@ -125,6 +157,8 @@ public abstract class PairHMM {
|
|||
* said read coming from the said haplotype under the provided error model
|
||||
*/
|
||||
public PerReadAlleleLikelihoodMap computeLikelihoods(final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap) {
|
||||
if(doProfiling)
|
||||
startTime = System.nanoTime();
|
||||
|
||||
// (re)initialize the pairHMM only if necessary
|
||||
final int readMaxLength = findMaxReadLength(reads);
|
||||
|
|
@ -132,6 +166,8 @@ public abstract class PairHMM {
|
|||
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) { initialize(readMaxLength, haplotypeMaxLength); }
|
||||
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
mLikelihoodArray = new double[reads.size()*alleleHaplotypeMap.size()];
|
||||
int idx = 0;
|
||||
for(GATKSAMRecord read : reads){
|
||||
final byte[] readBases = read.getReadBases();
|
||||
final byte[] readQuals = read.getBaseQualities();
|
||||
|
|
@ -144,12 +180,16 @@ public abstract class PairHMM {
|
|||
boolean isFirstHaplotype = true;
|
||||
Allele currentAllele = null;
|
||||
double log10l;
|
||||
for (final Allele allele : alleleHaplotypeMap.keySet()){
|
||||
final Haplotype haplotype = alleleHaplotypeMap.get(allele);
|
||||
//for (final Allele allele : alleleHaplotypeMap.keySet()){
|
||||
for (Map.Entry<Allele,Haplotype> currEntry : alleleHaplotypeMap.entrySet()){
|
||||
//final Haplotype haplotype = alleleHaplotypeMap.get(allele);
|
||||
final Allele allele = currEntry.getKey();
|
||||
final Haplotype haplotype = currEntry.getValue();
|
||||
final byte[] nextHaplotypeBases = haplotype.getBases();
|
||||
if (currentHaplotypeBases != null) {
|
||||
log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases,
|
||||
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, nextHaplotypeBases);
|
||||
mLikelihoodArray[idx++] = log10l;
|
||||
likelihoodMap.add(read, currentAllele, log10l);
|
||||
}
|
||||
// update the current haplotype
|
||||
|
|
@ -163,8 +203,11 @@ public abstract class PairHMM {
|
|||
log10l = computeReadLikelihoodGivenHaplotypeLog10(currentHaplotypeBases,
|
||||
readBases, readQuals, readInsQuals, readDelQuals, overallGCP, isFirstHaplotype, null);
|
||||
likelihoodMap.add(read, currentAllele, log10l);
|
||||
mLikelihoodArray[idx++] = log10l;
|
||||
}
|
||||
}
|
||||
if(doProfiling)
|
||||
computeTime += (System.nanoTime() - startTime);
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
||||
|
|
@ -270,4 +313,17 @@ public abstract class PairHMM {
|
|||
|
||||
return Math.min(haplotype1.length, haplotype2.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the results of the computeLikelihoods function
|
||||
*/
|
||||
public double[] getLikelihoodArray() { return mLikelihoodArray; }
|
||||
/**
|
||||
* Called at the end of the program to close files, print profiling information etc
|
||||
*/
|
||||
public void close()
|
||||
{
|
||||
if(doProfiling)
|
||||
System.out.println("Total compute time in PairHMM computeLikelihoods() : "+(computeTime*1e-9));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.ProgressLoggerInterface;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -119,4 +119,12 @@ public class ArtificialStingSAMFileWriter implements StingSAMFileWriter {
|
|||
@Override
|
||||
public void setMaxRecordsInRam(int maxRecordsInRam) {
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws java.lang.UnsupportedOperationException No progress logging in this implementation.
|
||||
*/
|
||||
@Override
|
||||
public void setProgressLogger(final ProgressLoggerInterface logger) {
|
||||
throw new UnsupportedOperationException("Progress logging not supported");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.utils.sam;
|
||||
|
||||
import net.sf.samtools.*;
|
||||
import net.sf.samtools.util.ProgressLoggerInterface;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
|
@ -174,4 +175,11 @@ public class NWaySAMFileWriter implements SAMFileWriter {
|
|||
public void close() {
|
||||
for ( SAMFileWriter w : writerMap.values() ) w.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setProgressLogger(final ProgressLoggerInterface logger) {
|
||||
for (final SAMFileWriter writer: writerMap.values()) {
|
||||
writer.setProgressLogger(logger);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.utils.sam;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMFileWriter;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.ProgressLoggerInterface;
|
||||
|
||||
/**
|
||||
* XXX
|
||||
|
|
@ -77,4 +78,9 @@ public class SimplifyingSAMFileWriter implements SAMFileWriter {
|
|||
read.setAttribute("RG", rg);
|
||||
return read;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setProgressLogger(final ProgressLoggerInterface logger) {
|
||||
dest.setProgressLogger(logger);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -194,14 +194,13 @@ public class GATKVCFUtils {
|
|||
public static IndexCreator getIndexCreator(GATKVCFIndexType type, int parameter, File outFile) {
|
||||
IndexCreator idxCreator;
|
||||
switch (type) {
|
||||
case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break;
|
||||
case DYNAMIC_SIZE: idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SIZE); break;
|
||||
case LINEAR: idxCreator = new LinearIndexCreator(); break;
|
||||
case INTERVAL: idxCreator = new IntervalIndexCreator(); break;
|
||||
case DYNAMIC_SEEK: idxCreator = new DynamicIndexCreator(outFile, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); break;
|
||||
case DYNAMIC_SIZE: idxCreator = new DynamicIndexCreator(outFile, IndexFactory.IndexBalanceApproach.FOR_SIZE); break;
|
||||
case LINEAR: idxCreator = new LinearIndexCreator(outFile, parameter); break;
|
||||
case INTERVAL: idxCreator = new IntervalIndexCreator(outFile, parameter); break;
|
||||
default: throw new IllegalArgumentException("Unknown IndexCreator type: " + type);
|
||||
}
|
||||
|
||||
idxCreator.initialize(outFile, parameter);
|
||||
return idxCreator;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -91,6 +91,7 @@ public abstract class BaseTest {
|
|||
//public static final String b37KGReference = "/Users/depristo/Desktop/broadLocal/localData/human_g1k_v37.fasta";
|
||||
public static final String b37KGReference = "/humgen/1kg/reference/human_g1k_v37.fasta";
|
||||
public static final String b37KGReferenceWithDecoy = "/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37_decoy.fasta";
|
||||
public static final String hg19RefereneWithChrPrefixInChromosomeNames = "/humgen/gsa-hpprojects/GATK/bundle/current/hg19/ucsc.hg19.fasta";
|
||||
public static final String GATKDataLocation = "/humgen/gsa-hpprojects/GATK/data/";
|
||||
public static final String validationDataLocation = GATKDataLocation + "Validation_Data/";
|
||||
public static final String evaluationDataLocation = GATKDataLocation + "Evaluation_Data/";
|
||||
|
|
@ -135,7 +136,7 @@ public abstract class BaseTest {
|
|||
public final static String NA12878_PCRFREE = privateTestDir + "PCRFree.2x250.Illumina.20_10_11.bam";
|
||||
public final static String NA12878_WEx = privateTestDir + "CEUTrio.HiSeq.WEx.b37_decoy.NA12878.20_10_11mb.bam";
|
||||
|
||||
public static final boolean pipelineTestRunModeIsSet = System.getProperty("pipeline.run", "").equals("run");
|
||||
public static final boolean queueTestRunModeIsSet = System.getProperty("queuetest.run", "").equals("true");
|
||||
|
||||
/** before the class starts up */
|
||||
static {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright (c) 2012 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.phonehome;
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountRODs;
|
||||
import org.broadinstitute.sting.gatk.walkers.qc.CountReads;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
|
||||
import org.broadinstitute.sting.utils.activeregion.ActivityProfileState;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.jets3t.service.S3Service;
|
||||
import org.jets3t.service.S3ServiceException;
|
||||
import org.jets3t.service.ServiceException;
|
||||
import org.jets3t.service.model.S3Object;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Properties;
|
||||
|
||||
public class GATKRunReportUnitTest extends BaseTest {
|
||||
private final static boolean DEBUG = false;
|
||||
private static final long S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING = 30 * 1000;
|
||||
private static final String AWS_DOWNLOADER_CREDENTIALS_PROPERTIES_FILE = privateTestDir + "phonehome/awsDownloaderCredentials.properties";
|
||||
|
||||
private Walker walker;
|
||||
private Exception exception;
|
||||
private GenomeAnalysisEngine engine;
|
||||
private String downloaderAccessKey;
|
||||
private String downloaderSecretKey;
|
||||
|
||||
@BeforeClass
|
||||
public void setup() throws Exception {
|
||||
walker = new CountReads();
|
||||
exception = new IllegalArgumentException("javaException");
|
||||
engine = new GenomeAnalysisEngine();
|
||||
engine.setArguments(new GATKArgumentCollection());
|
||||
|
||||
Properties awsProperties = new Properties();
|
||||
awsProperties.load(new FileInputStream(AWS_DOWNLOADER_CREDENTIALS_PROPERTIES_FILE));
|
||||
downloaderAccessKey = awsProperties.getProperty("accessKey");
|
||||
downloaderSecretKey = awsProperties.getProperty("secretKey");
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG)
|
||||
public void testAWSKeysAreValid() {
|
||||
// throws an exception if they aren't
|
||||
GATKRunReport.checkAWSAreValid();
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG)
|
||||
public void testAccessKey() throws Exception {
|
||||
testAWSKey(GATKRunReport.getAWSUploadAccessKey(), GATKRunReport.AWS_ACCESS_KEY_MD5);
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG)
|
||||
public void testSecretKey() throws Exception {
|
||||
testAWSKey(GATKRunReport.getAWSUploadSecretKey(), GATKRunReport.AWS_SECRET_KEY_MD5);
|
||||
}
|
||||
|
||||
private void testAWSKey(final String accessKey, final String expectedMD5) throws Exception {
|
||||
Assert.assertNotNull(accessKey, "AccessKey should not be null");
|
||||
final String actualmd5 = Utils.calcMD5(accessKey);
|
||||
Assert.assertEquals(actualmd5, expectedMD5);
|
||||
}
|
||||
|
||||
@DataProvider(name = "GATKReportCreationTest")
|
||||
public Object[][] makeGATKReportCreationTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
final Walker readWalker = new CountReads();
|
||||
final Walker lociWalker = new CountLoci();
|
||||
final Walker rodWalker = new CountRODs();
|
||||
final Walker artWalker = new RunReportDummyActiveRegionWalker();
|
||||
|
||||
final Exception noException = null;
|
||||
final Exception javaException = new IllegalArgumentException("javaException");
|
||||
final Exception stingException = new ReviewedStingException("StingException");
|
||||
final Exception userException = new UserException("userException");
|
||||
|
||||
final GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
|
||||
engine.setArguments(new GATKArgumentCollection());
|
||||
|
||||
for ( final Walker walker : Arrays.asList(readWalker, lociWalker, rodWalker, artWalker) ) {
|
||||
for ( final Exception exception : Arrays.asList(noException, javaException, stingException, userException) ) {
|
||||
tests.add(new Object[]{walker, exception, engine});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = !DEBUG, dataProvider = "GATKReportCreationTest")
|
||||
public void testGATKReportCreationReadingAndWriting(final Walker walker, final Exception exception, final GenomeAnalysisEngine engine) throws Exception {
|
||||
final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.STDOUT);
|
||||
final ByteArrayOutputStream captureStream = new ByteArrayOutputStream();
|
||||
final boolean succeeded = report.postReportToStream(captureStream);
|
||||
Assert.assertTrue(succeeded, "Failed to write report to stream");
|
||||
Assert.assertFalse(report.exceptionOccurredDuringPost(), "Post succeeded but report says it failed");
|
||||
Assert.assertNull(report.getErrorMessage(), "Post succeeded but there was an error message");
|
||||
Assert.assertNull(report.getErrorThrown(), "Post succeeded but there was an error message");
|
||||
final InputStream readStream = new ByteArrayInputStream(captureStream.toByteArray());
|
||||
|
||||
GATKRunReport deserialized = null;
|
||||
try {
|
||||
deserialized = GATKRunReport.deserializeReport(readStream);
|
||||
} catch ( Exception e ) {
|
||||
final String reportString = new String(captureStream.toByteArray());
|
||||
Assert.fail("Failed to deserialize GATK report " + reportString + " with exception " + e);
|
||||
}
|
||||
|
||||
if ( deserialized != null )
|
||||
Assert.assertEquals(report, deserialized);
|
||||
}
|
||||
|
||||
@DataProvider(name = "GATKAWSReportMode")
|
||||
public Object[][] makeGATKAWSReportMode() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final GATKRunReport.AWSMode mode : GATKRunReport.AWSMode.values() ) {
|
||||
tests.add(new Object[]{mode});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
// Will fail with timeout if AWS time out isn't working
|
||||
// Will fail with exception if AWS doesn't protect itself from errors
|
||||
@Test(enabled = ! DEBUG, dataProvider = "GATKAWSReportMode", timeOut = S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING * 2)
|
||||
public void testAWS(final GATKRunReport.AWSMode awsMode) {
|
||||
logger.warn("Starting testAWS mode=" + awsMode);
|
||||
|
||||
// Use a shorter timeout than usual when we're testing GATKRunReport.AWSMode.TIMEOUT
|
||||
final long thisTestS3Timeout = awsMode == GATKRunReport.AWSMode.TIMEOUT ? 30 * 1000 : S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING;
|
||||
final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.AWS, thisTestS3Timeout);
|
||||
report.sendAWSToTestBucket();
|
||||
report.setAwsMode(awsMode);
|
||||
final S3Object s3Object = report.postReportToAWSS3();
|
||||
|
||||
if ( awsMode == GATKRunReport.AWSMode.NORMAL ) {
|
||||
Assert.assertNotNull(s3Object, "Upload to AWS failed, s3Object was null. error was " + report.formatError());
|
||||
Assert.assertFalse(report.exceptionOccurredDuringPost(), "The upload should have succeeded but the report says it didn't. Error was " + report.formatError());
|
||||
Assert.assertNull(report.getErrorMessage(), "Report succeeded but an error message was found");
|
||||
Assert.assertNull(report.getErrorThrown(), "Report succeeded but an thrown error was found");
|
||||
try {
|
||||
final GATKRunReport deserialized = GATKRunReport.deserializeReport(downloaderAccessKey, downloaderSecretKey, report.getS3ReportBucket(), s3Object);
|
||||
Assert.assertEquals(report, deserialized);
|
||||
deleteFromS3(report);
|
||||
} catch ( Exception e ) {
|
||||
Assert.fail("Failed to read, deserialize, or delete GATK report " + s3Object.getName() + " with exception " + e);
|
||||
}
|
||||
} else {
|
||||
Assert.assertNull(s3Object, "AWS upload should have failed for mode " + awsMode + " but got non-null s3 object back " + s3Object + " error was " + report.formatError());
|
||||
Assert.assertTrue(report.exceptionOccurredDuringPost(), "S3 object was null but the report says that the upload succeeded");
|
||||
Assert.assertNotNull(report.getErrorMessage(), "Report succeeded but an error message wasn't found");
|
||||
if ( awsMode == GATKRunReport.AWSMode.FAIL_WITH_EXCEPTION )
|
||||
Assert.assertNotNull(report.getErrorThrown());
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteFromS3(final GATKRunReport report) throws Exception {
|
||||
final S3Service s3Service = GATKRunReport.initializeAWSService(downloaderAccessKey, downloaderSecretKey);
|
||||
// Retrieve the whole data object we created previously
|
||||
s3Service.deleteObject(report.getS3ReportBucket(), report.getReportFileName());
|
||||
}
|
||||
|
||||
@DataProvider(name = "PostReportByType")
|
||||
public Object[][] makePostReportByType() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( final GATKRunReport.PhoneHomeOption et : GATKRunReport.PhoneHomeOption.values() ) {
|
||||
tests.add(new Object[]{et});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(enabled = ! DEBUG, dataProvider = "PostReportByType", timeOut = S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING * 2)
|
||||
public void testPostReportByType(final GATKRunReport.PhoneHomeOption type) {
|
||||
final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.AWS, S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING);
|
||||
Assert.assertFalse(report.exceptionOccurredDuringPost(), "An exception occurred during posting the report");
|
||||
final boolean succeeded = report.postReport(type);
|
||||
|
||||
if ( type == GATKRunReport.PhoneHomeOption.NO_ET )
|
||||
Assert.assertFalse(succeeded, "NO_ET option shouldn't write a report");
|
||||
else {
|
||||
Assert.assertTrue(succeeded, "Any non NO_ET option should succeed in writing a report");
|
||||
|
||||
if ( type == GATKRunReport.PhoneHomeOption.STDOUT ) {
|
||||
// nothing to do
|
||||
} else {
|
||||
// must have gone to AWS
|
||||
try {
|
||||
Assert.assertTrue(report.wentToAWS(), "The report should have gone to AWS but the report says it wasn't");
|
||||
deleteFromS3(report);
|
||||
} catch ( Exception e ) {
|
||||
Assert.fail("Failed delete GATK report " + report.getReportFileName() + " with exception " + e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public interface S3Op {
|
||||
public void apply() throws ServiceException;
|
||||
}
|
||||
|
||||
// Will fail with timeout if AWS time out isn't working
|
||||
// Will fail with exception if AWS doesn't protect itself from errors
|
||||
@Test(timeOut = S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING * 2)
|
||||
public void testAWSPublicKeyHasAccessControls() throws Exception {
|
||||
final GATKRunReport report = new GATKRunReport(walker, exception, engine, GATKRunReport.PhoneHomeOption.AWS, S3_PUT_TIMEOUT_IN_MILLISECONDS_FOR_TESTING);
|
||||
report.sendAWSToTestBucket();
|
||||
final S3Object s3Object = report.postReportToAWSS3();
|
||||
Assert.assertNotNull(s3Object, "Upload to AWS failed, s3Object was null. error was " + report.formatError());
|
||||
|
||||
// create a service with the public key, and make sure it cannot list or delete
|
||||
final S3Service s3Service = GATKRunReport.initializeAWSService(GATKRunReport.getAWSUploadAccessKey(), GATKRunReport.getAWSUploadSecretKey());
|
||||
assertOperationNotAllowed("listAllBuckets", new S3Op() {
|
||||
@Override
|
||||
public void apply() throws S3ServiceException {
|
||||
s3Service.listAllBuckets();
|
||||
}
|
||||
});
|
||||
assertOperationNotAllowed("listBucket", new S3Op() {
|
||||
@Override
|
||||
public void apply() throws S3ServiceException { s3Service.listObjects(report.getS3ReportBucket()); }
|
||||
});
|
||||
assertOperationNotAllowed("createBucket", new S3Op() {
|
||||
@Override
|
||||
public void apply() throws S3ServiceException { s3Service.createBucket("ShouldNotCreate"); }
|
||||
});
|
||||
assertOperationNotAllowed("deleteObject", new S3Op() {
|
||||
@Override
|
||||
public void apply() throws ServiceException { s3Service.deleteObject(report.getS3ReportBucket(), report.getReportFileName()); }
|
||||
});
|
||||
}
|
||||
|
||||
private void assertOperationNotAllowed(final String name, final S3Op op) {
|
||||
try {
|
||||
op.apply();
|
||||
// only gets here if the operation was successful
|
||||
Assert.fail("Operation " + name + " ran successfully but we expected to it fail");
|
||||
} catch ( ServiceException e ) {
|
||||
Assert.assertEquals(e.getErrorCode(), "AccessDenied");
|
||||
}
|
||||
}
|
||||
|
||||
class RunReportDummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
|
||||
@Override
|
||||
public ActivityProfileState isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
return new ActivityProfileState(ref.getLocus(), 0.0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduceInit() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer reduce(Integer value, Integer sum) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -35,7 +35,7 @@ import org.testng.annotations.Test;
|
|||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
public class JnaSessionPipelineTest extends BaseTest {
|
||||
public class JnaSessionQueueTest extends BaseTest {
|
||||
private String implementation = null;
|
||||
private static final SessionFactory factory = new JnaSessionFactory();
|
||||
|
||||
|
|
@ -52,15 +52,15 @@ public class JnaSessionPipelineTest extends BaseTest {
|
|||
|
||||
@Test(dependsOnMethods = { "testDrmaa" })
|
||||
public void testSubmitEcho() throws Exception {
|
||||
if ( ! pipelineTestRunModeIsSet ) {
|
||||
throw new SkipException("Skipping testSubmitEcho because we are in pipeline test dry run mode");
|
||||
if ( ! queueTestRunModeIsSet ) {
|
||||
throw new SkipException("Skipping testSubmitEcho because we are in queue test dry run mode");
|
||||
}
|
||||
|
||||
if (implementation.contains("LSF")) {
|
||||
System.err.println(" ***********************************************************");
|
||||
System.err.println(" *************************************************************");
|
||||
System.err.println(" **** ****");
|
||||
System.err.println(" **** Skipping JnaSessionPipelineTest.testSubmitEcho() ****");
|
||||
System.err.println(" **** Skipping JnaSessionQueueTest.testSubmitEcho() ****");
|
||||
System.err.println(" **** Are you using the dotkit .combined_LSF_SGE? ****");
|
||||
System.err.println(" **** ****");
|
||||
System.err.println(" *************************************************************");
|
||||
|
|
@ -68,7 +68,7 @@ public class JnaSessionPipelineTest extends BaseTest {
|
|||
throw new SkipException("Skipping testSubmitEcho because correct DRMAA implementation not found");
|
||||
}
|
||||
|
||||
File outFile = tryCreateNetworkTempFile("JnaSessionPipelineTest.out");
|
||||
File outFile = tryCreateNetworkTempFile("JnaSessionQueueTest.out");
|
||||
Session session = factory.getSession();
|
||||
session.init(null);
|
||||
try {
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue