diff --git a/ant-bridge.sh b/ant-bridge.sh index 9f4713d7c..fe0549fa1 100755 --- a/ant-bridge.sh +++ b/ant-bridge.sh @@ -103,14 +103,14 @@ for arg in "${@}" ; do mvn_args="${mvn_args} -Dsting.packagetests.enabled=true" mvn_args="${mvn_args} -Dsting.packagecommittests.skipped=false" - # TODO: This runs only the pipeline tests (full, non-dry run), but not the commit tests for Queue. + # TODO: This runs only the queue tests (full, non-dry run), but not the commit tests for Queue. elif [[ "${arg}" == "queuefull.binary.release.tests" ]] ; then local_repo="sitetemprepo" mvn_args="install -Dmaven.repo.local=${local_repo} && mvn verify" mvn_args="${mvn_args} -Dmaven.repo.local=${local_repo}" mvn_args="${mvn_args} -Dsting.packagetests.enabled=true" - mvn_args="${mvn_args} -Dsting.packagepipelinetests.skipped=false" - mvn_args="${mvn_args} -Dsting.pipelinetests.run=true" + mvn_args="${mvn_args} -Dsting.packagequeuetests.skipped=false" + mvn_args="${mvn_args} -Dsting.queuetests.run=true" elif [[ "${arg}" == "committests" ]] ; then mvn_args="verify -Dsting.committests.skipped=false" @@ -130,11 +130,11 @@ for arg in "${@}" ; do elif [[ "${arg}" == "knowledgebasetest" ]] ; then mvn_args="verify -Dsting.knowledgebasetests.skipped=false" - elif [[ "${arg}" == "pipelinetest" ]] ; then - mvn_args="verify -Dsting.pipelinetests.skipped=false" + elif [[ "${arg}" == "queuetest" ]] ; then + mvn_args="verify -Dsting.queuetests.skipped=false" - elif [[ "${arg}" == "pipelinetestrun" ]] ; then - mvn_args="verify -Dsting.pipelinetests.skipped=false -Dsting.pipelinetests.run=true" + elif [[ "${arg}" == "queuetestrun" ]] ; then + mvn_args="verify -Dsting.queuetests.skipped=false -Dsting.queuetests.run=true" elif [[ "${arg}" == "fasttest" ]] ; then mvn_args="verify -Dsting.committests.skipped=false -pl private/gatk-private -am -Dresource.bundle.skip=true" diff --git a/pom.xml b/pom.xml index d899506b5..b8802cb88 100644 --- a/pom.xml +++ b/pom.xml @@ -13,7 +13,7 @@ org.broadinstitute.sting sting-root - 3.0 + 3.1 public/sting-root @@ -48,7 +48,7 @@ true ${sting.packagecommittests.skipped} ${sting.packagecommittests.skipped} - ${sting.packagecommittests.skipped} + ${sting.packagecommittests.skipped} true true @@ -62,7 +62,7 @@ true ${sting.serialcommittests.skipped} ${sting.serialcommittests.skipped} - ${sting.serialcommittests.skipped} + ${sting.serialcommittests.skipped} true true @@ -340,6 +340,18 @@ org.broadinstitute.sting:*:tar.bz2:example-resources + + + + *:* + + META-INF/services/javax.annotation.processing.Processor + + + @@ -561,7 +573,7 @@ ${sting.packagetests.artifactId} ${project.build.testOutputDirectory} ${project.basedir} - ${sting.pipelinetests.run} + ${sting.queuetests.run} ${maven.surefire.debug} ${maven.failsafe.debug} @@ -613,7 +625,7 @@ - package-pipelinetests + package-queuetests integration-test verify @@ -622,11 +634,11 @@ verify - ${project.build.directory}/invoker-reports/pipeline/${it.test} - ${sting.packagepipelinetests.skipped} + ${project.build.directory}/invoker-reports/queuetest/${it.test} + ${sting.packagequeuetests.skipped} true - ${sting.packagepipelinetests.skipped} + ${sting.packagequeuetests.skipped} diff --git a/protected/gatk-protected/pom.xml b/protected/gatk-protected/pom.xml index 26aabd187..74b5de077 100644 --- a/protected/gatk-protected/pom.xml +++ b/protected/gatk-protected/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 3.0 + 3.1 ../.. @@ -99,7 +99,7 @@ package-knowledgebasetests - package-pipelinetests + package-queuetests diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java index a90f555a1..671fd88da 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java @@ -53,13 +53,10 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.genotyper.MostLikelyAllele; import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; import org.broadinstitute.sting.utils.QualityUtils; -import org.broadinstitute.variant.variantcontext.Genotype; -import org.broadinstitute.variant.variantcontext.GenotypesContext; import org.broadinstitute.variant.vcf.VCFHeaderLineType; import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; import org.broadinstitute.sting.utils.pileup.PileupElement; @@ -81,7 +78,7 @@ import java.util.*; *

Caveat

*

The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.

*/ -public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation { +public class FisherStrand extends StrandBiasTest implements StandardAnnotation, ActiveRegionBasedAnnotation { private final static boolean ENABLE_DEBUGGING = false; private final static Logger logger = Logger.getLogger(FisherStrand.class); @@ -100,7 +97,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return null; if ( vc.hasGenotypes() ) { - final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes() ); + final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT ); if ( tableFromPerSampleAnnotations != null ) { return pValueForBestTable(tableFromPerSampleAnnotations, null); } @@ -116,8 +113,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat else if (stratifiedPerReadAlleleLikelihoodMap != null) { // either SNP with no alignment context, or indels: per-read likelihood map needed final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc); -// logger.info("VC " + vc); -// printTable(table, 0.0); + //logger.info("VC " + vc); + //printTable(table, 0.0); return pValueForBestTable(table, null); } else @@ -126,45 +123,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat return null; } - /** - * Create the FisherStrand table by retrieving the per-sample strand bias annotation and adding them together - * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation - * @return the table used for the FisherStrand p-value calculation, will be null if none of the genotypes contain the per-sample SB annotation - */ - private int[][] getTableFromSamples( final GenotypesContext genotypes ) { - if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); } - - final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse - boolean foundData = false; - - for( final Genotype g : genotypes ) { - if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) - continue; - - foundData = true; - final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME); - final int[] data = encodeSBBS(sbbsString); - if ( passesMinimumThreshold(data) ) { - for( int index = 0; index < sbArray.length; index++ ) { - sbArray[index] += data[index]; - } - } - } - - return ( foundData ? decodeSBBS(sbArray) : null ); - } - - /** - * Does this strand data array pass the minimum threshold for inclusion? - * - * @param data the array - * @return true if it passes the minimum threshold, false otherwise - */ - private static boolean passesMinimumThreshold(final int[] data) { - // the ref and alt totals must each be greater than MIN_COUNT - return data[0] + data[1] > MIN_COUNT && data[2] + data[3] > MIN_COUNT; - } - /** * Create an annotation for the highest (i.e., least significant) p-value of table1 and table2 * @@ -190,7 +148,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat * @param pValue * @return a hash map from FS -> phred-scaled pValue */ - private Map annotationForOneTable(final double pValue) { + protected Map annotationForOneTable(final double pValue) { final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs return Collections.singletonMap(FS, value); } @@ -218,36 +176,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat list.add(table[1][1]); return list; } - - /** - * Helper function to parse the genotype annotation into the SB annotation array - * @param string the string that is returned by genotype.getAnnotation("SB") - * @return the array used by the per-sample Strand Bias annotation - */ - private static int[] encodeSBBS( final String string ) { - final int[] array = new int[4]; - final StringTokenizer tokenizer = new StringTokenizer(string, ",", false); - for( int index = 0; index < 4; index++ ) { - array[index] = Integer.parseInt(tokenizer.nextToken()); - } - return array; - } - - /** - * Helper function to turn the SB annotation array into the FisherStrand table - * @param array the array used by the per-sample Strand Bias annotation - * @return the table used by the FisherStrand annotation - */ - private static int[][] decodeSBBS( final int[] array ) { - if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); } - final int[][] table = new int[2][2]; - table[0][0] = array[0]; - table[0][1] = array[1]; - table[1][0] = array[2]; - table[1][1] = array[3]; - return table; - } - private Double pValueForContingencyTable(int[][] originalTable) { final int[][] normalizedTable = normalizeContingencyTable(originalTable); @@ -419,7 +347,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat final GATKSAMRecord read = el.getKey(); updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt); } - if ( passesMinimumThreshold(myTable) ) + if ( passesMinimumThreshold(myTable, MIN_COUNT) ) copyToMainTable(myTable, table); } @@ -464,7 +392,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt); } - if ( passesMinimumThreshold(myTable) ) + + if ( passesMinimumThreshold( myTable, MIN_COUNT ) ) copyToMainTable(myTable, table); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasTest.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasTest.java new file mode 100644 index 000000000..2974ae746 --- /dev/null +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasTest.java @@ -0,0 +1,128 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.variant.variantcontext.Genotype; +import org.broadinstitute.variant.variantcontext.GenotypesContext; + +import java.util.*; + +/** + * Class of tests to detect strand bias. + */ +public abstract class StrandBiasTest extends InfoFieldAnnotation { + /** + * Create the contingency table by retrieving the per-sample strand bias annotation and adding them together + * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation + * @param minCount minimum threshold for the sample strand bias counts for each ref and alt. + * If both ref and alt counts are above minCount the whole sample strand bias is added to the resulting table + * @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation + */ + protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) { + if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); } + + final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse + boolean foundData = false; + + for( final Genotype g : genotypes ) { + if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) ) + continue; + + foundData = true; + final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME); + final int[] data = encodeSBBS(sbbsString); + if ( passesMinimumThreshold(data, minCount) ) { + for( int index = 0; index < sbArray.length; index++ ) { + sbArray[index] += data[index]; + } + } + } + + return ( foundData ? decodeSBBS(sbArray) : null ); + } + /** + * Does this strand data array pass the minimum threshold for inclusion? + * + * @param data the array + * @minCount The minimum threshold of counts in the array + * @return true if it passes the minimum threshold, false otherwise + */ + protected static boolean passesMinimumThreshold(final int[] data, final int minCount) { + // the ref and alt totals must each be greater than MIN_COUNT + return data[0] + data[1] > minCount && data[2] + data[3] > minCount; + } + + /** + * Helper function to parse the genotype annotation into the SB annotation array + * @param string the string that is returned by genotype.getAnnotation("SB") + * @return the array used by the per-sample Strand Bias annotation + */ + private static int[] encodeSBBS( final String string ) { + final int[] array = new int[4]; + final StringTokenizer tokenizer = new StringTokenizer(string, ",", false); + for( int index = 0; index < 4; index++ ) { + array[index] = Integer.parseInt(tokenizer.nextToken()); + } + return array; + } + + /** + * Helper function to turn the SB annotation array into a contingency table + * @param array the array used by the per-sample Strand Bias annotation + * @return the table used by the StrandOddsRatio annotation + */ + private static int[][] decodeSBBS( final int[] array ) { + if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); } + final int[][] table = new int[2][2]; + table[0][0] = array[0]; + table[0][1] = array[1]; + table[1][0] = array[2]; + table[1][1] = array[3]; + return table; + } +} diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatio.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatio.java new file mode 100644 index 000000000..e500a725a --- /dev/null +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatio.java @@ -0,0 +1,150 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation; +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.variant.variantcontext.VariantContext; +import org.broadinstitute.variant.vcf.VCFHeaderLineType; +import org.broadinstitute.variant.vcf.VCFInfoHeaderLine; + +import java.util.*; + +/** + * Symmetric Odds Ratio to detect strand bias + * + *

Odds Ratios in the 2x2 contingency table below are R = (X[0][0] * X[1][1]) / (X[0][1] * X[1][0]) and its inverse + * + strand - strand + * Ref X[0][0] X[0][1] + * Alt X[1][0] X[1][0] + * The sum R + 1/R is used to detect a difference in strand bias for ref and for alt (the sum makes it symmetric): + * A high value is indicative of large difference where one entry is very small compared to the others. + *

+ */ +public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBasedAnnotation { + private final static double AUGMENTATION_CONSTANT = 0.1; + private static final int MIN_COUNT = 0; + + private static final String SOR = "SOR"; + + public Map annotate(final RefMetaDataTracker tracker, + final AnnotatorCompatible walker, + final ReferenceContext ref, + final Map stratifiedContexts, + final VariantContext vc, + final Map stratifiedPerReadAlleleLikelihoodMap) { + if ( !vc.isVariant() ) + return null; + + if ( vc.hasGenotypes() ) { + final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT ); + if ( tableFromPerSampleAnnotations != null ) { + final double ratio = symmetricOddsRatio(tableFromPerSampleAnnotations); + return annotationForOneTable(ratio); + } + } + return null; + } + + /** + * Computes the symmetric odds ratio of a table after augmentation. + * Augmentation avoids quotient by zero. + * + * @param originalTable The table before augmentation + * @return the symmetric odds ratio + */ + final protected double symmetricOddsRatio(final int[][] originalTable) { + final double[][] augmentedTable = augmentContingencyTable(originalTable); + + double ratio = 0; + + ratio += (augmentedTable[0][0] / augmentedTable[0][1]) * (augmentedTable[1][1] / augmentedTable[1][0]); + ratio += (augmentedTable[0][1] / augmentedTable[0][0]) * (augmentedTable[1][0] / augmentedTable[1][1]); + + return ratio; + } + + + /** + * Adds the small value AUGMENTATION_CONSTANT to all the entries of the table. + * + * @param table the table to augment + * @return the augmented table + */ + private static double[][] augmentContingencyTable(final int[][] table) { + double[][] augmentedTable = new double[2][2]; + for ( int i = 0; i < 2; i++ ) { + for ( int j = 0; j < 2; j++ ) + augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT; + } + + return augmentedTable; + } + + /** + * Returns an annotation result given a ratio + * + * @param ratio the symmetric odds ratio of the contingency table + * @return a hash map from SOR + */ + protected Map annotationForOneTable(final double ratio) { + final Object value = String.format("%.3f", ratio); + return Collections.singletonMap(SOR, value); + } + + public List getDescriptions() { + return Collections.singletonList(new VCFInfoHeaderLine(SOR, 1, VCFHeaderLineType.Float, "Symmetric Odds Ratio of 2x2 contingency table to detect strand bias")); + } + + public List getKeyNames() { + return Collections.singletonList(SOR); + } +} diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java index 8a35ccb05..8b37e265d 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java @@ -155,4 +155,8 @@ public class GraphBasedLikelihoodCalculationEngine implements LikelihoodCalculat } } } + + @Override + public void close() { + } } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 91e763a0d..b6eddab51 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -289,6 +289,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In @Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false) protected boolean dontRecoverDanglingTails = false; + @Advanced + @Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false) + protected boolean consensusMode = false; + // ----------------------------------------------------------------------------------------------- // general advanced arguments to control haplotype caller behavior // ----------------------------------------------------------------------------------------------- @@ -575,7 +579,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In // initialize the UnifiedGenotyper Engine which is used to call into the exact model final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user // HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine - UAC.OutputMode = SCAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES + UAC.OutputMode = SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) ? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY); @@ -598,6 +602,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger)); } + if( SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) && consensusMode ) { + throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other."); + } + // initialize the output VCF header final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit()); @@ -878,7 +886,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In regionForGenotyping.getLocation(), getToolkit().getGenomeLocParser(), metaDataTracker, - activeAllelesToGenotype, emitReferenceConfidence() ); + ( consensusMode ? Collections.emptyList() : activeAllelesToGenotype ), + emitReferenceConfidence() ); // TODO -- must disable if we are doing NCT, or set the output type of ! presorted if ( bamWriter != null ) { @@ -1051,7 +1060,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In referenceConfidenceModel.close(); //TODO remove the need to call close here for debugging, the likelihood output stream should be managed //TODO (open & close) at the walker, not the engine. - //likelihoodCalculationEngine.close(); + likelihoodCalculationEngine.close(); logger.info("Ran local assembly on " + result + " active regions"); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 0626f2268..04e64186f 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -89,4 +89,6 @@ public interface LikelihoodCalculationEngine { */ public Map computeReadLikelihoods(AssemblyResultSet assemblyResultSet, Map> perSampleReadList); + + public void close(); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java index 8dfeed987..b53445933 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java @@ -213,16 +213,18 @@ public abstract class LocalAssemblyEngine { final Map assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) { // add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes final Set returnHaplotypes = new LinkedHashSet<>(); - returnHaplotypes.add( refHaplotype ); final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef(); + final ArrayList finders = new ArrayList<>(graphs.size()); for( final SeqGraph graph : graphs ) { final SeqVertex source = graph.getReferenceSourceVertex(); final SeqVertex sink = graph.getReferenceSinkVertex(); if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph); final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph,source,sink); + finders.add(haplotypeFinder); final Iterator bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph); + while (bestHaplotypes.hasNext()) { final KBestHaplotype kBestHaplotype = bestHaplotypes.next(); final Haplotype h = kBestHaplotype.haplotype(); @@ -256,9 +258,19 @@ public abstract class LocalAssemblyEngine { } } - - if ( returnHaplotypes.size() < returnHaplotypes.size() ) - logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc); + // Make sure that the ref haplotype is amongst the return haplotypes and calculate its score as + // the first returned by any finder. + if (!returnHaplotypes.contains(refHaplotype)) { + double refScore = Double.NaN; + for (final KBestHaplotypeFinder finder : finders) { + final double candidate = finder.score(refHaplotype); + if (Double.isNaN(candidate)) continue; + refScore = candidate; + break; + } + refHaplotype.setScore(refScore); + returnHaplotypes.add(refHaplotype); + } if( debug ) { if( returnHaplotypes.size() > 1 ) { diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java index 55a1c5dba..7165e61a5 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java @@ -90,6 +90,18 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation return new LoglessPairHMM(); else return new CnyPairHMM(); + case VECTOR_LOGLESS_CACHING: + try + { + return new VectorLoglessPairHMM(); + } + catch(UnsatisfiedLinkError ule) + { + logger.debug("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING"); + return new LoglessPairHMM(); + } + case DEBUG_VECTOR_LOGLESS_CACHING: + return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING); case ARRAY_LOGLESS: if (noFpga || !CnyPairHMM.isAvailable()) return new ArrayLoglessPairHMM(); @@ -162,10 +174,13 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation } } + @Override public void close() { if ( likelihoodsStream != null ) likelihoodsStream.close(); + pairHMMThreadLocal.get().close(); } + private void writeDebugLikelihoods(final GATKSAMRecord processedRead, final Haplotype haplotype, final double log10l){ if ( WRITE_LIKELIHOODS_TO_FILE ) { likelihoodsStream.printf("%s %s %s %s %s %s %f%n", @@ -316,8 +331,8 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation int X_METRIC_LENGTH = 0; for( final Map.Entry> sample : perSampleReadList.entrySet() ) { for( final GATKSAMRecord read : sample.getValue() ) { - final int readLength = read.getReadLength(); - if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } + final int readLength = read.getReadLength(); + if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; } } } int Y_METRIC_LENGTH = 0; @@ -327,7 +342,12 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation } // initialize arrays to hold the probabilities of being in the match, insertion and deletion cases - pairHMMThreadLocal.get().initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH); + pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH); + } + + private void finalizePairHMM() + { + pairHMMThreadLocal.get().finalizeRegion(); } @@ -341,12 +361,14 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation // Add likelihoods for each sample's reads to our stratifiedReadMap final Map stratifiedReadMap = new LinkedHashMap<>(); for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) { - // evaluate the likelihood of the reads given those haplotypes + // evaluate the likelihood of the reads given those haplotypes final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue()); map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE); stratifiedReadMap.put(sampleEntry.getKey(), map); } + //Used mostly by the JNI implementation(s) to free arrays + finalizePairHMM(); return stratifiedReadMap; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java index b8dba7b86..d5d424ca9 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java @@ -79,4 +79,9 @@ public class RandomLikelihoodCalculationEngine implements LikelihoodCalculationE return result; } + + @Override + public void close() { + } + } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java index 8fba6c9d5..d981e6eeb 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java @@ -45,21 +45,21 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; -import java.util.ArrayList; -import java.util.Collection; -import java.util.PriorityQueue; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.*; /** * K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders. * * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ -class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder { +class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder { /** * Collection of subFinders that provided the actual solutions. */ - private final Collection subFinders; + protected final Collection subFinders; /** * Flag indicating whether the sub-finders have been processed or not. @@ -89,17 +89,53 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder { * Creates a new aggregated sub-haplotype finder given its sub-finders. * @param finders set of sub-finders. */ - public AggregatedSubHaplotypeFinder(final Collection finders) { + public AggregatedSubHaplotypeFinder(final Collection finders) { if (finders == null) throw new IllegalArgumentException("finder collection cannot be null"); this.subFinders = finders; } + @Override + public String id() { + final StringBuilder resultBuilder = new StringBuilder(); + for (final KBestSubHaplotypeFinder subFinder : subFinders) + resultBuilder.append(subFinder.id()); + return resultBuilder.toString(); + } + + @Override + public String label() { + return "<OR>"; + } + + @Override + public Set> subFinderLabels() { + final int subFinderCount = subFinders.size(); + final String edgeCost = String.format("%.2f",-Math.log10((double) subFinderCount)); + final Set> result = new LinkedHashSet<>(subFinderCount); + for (final KBestSubHaplotypeFinder subFinder : subFinders) + result.add(new Pair<>(subFinder,edgeCost)); + return result; + } + @Override public int getCount() { processSubFindersIfNeeded(); return count; } + @Override + public double score(final byte[] bases, final int offset, final int length) { + if (bases == null) throw new IllegalArgumentException("bases cannot be null"); + if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative"); + if (length < 0) throw new IllegalArgumentException("the length cannot be negative"); + if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size"); + for (final KBestSubHaplotypeFinder subFinder : subFinders) { + final double score = subFinder.score(bases,offset,length); + if (!Double.isNaN(score)) return score; + } + return Double.NaN; + } + private void processSubFindersIfNeeded() { if (processedSubFinders) return; @@ -144,6 +180,11 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder { return rankedSubHaplotype.get(k); } + @Override + public boolean isReference() { + return false; + } + /** * Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results. */ @@ -167,7 +208,7 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder { } @Override - public int score() { + public double score() { return result.score(); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java index 36216bdd2..3abd2e4bc 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java @@ -52,6 +52,7 @@ import com.google.java.contract.Requires; import org.apache.commons.lang.ArrayUtils; import org.apache.log4j.Logger; import org.jgrapht.EdgeFactory; +import org.jgrapht.alg.CycleDetector; import org.jgrapht.graph.DefaultDirectedGraph; import java.io.File; @@ -146,6 +147,39 @@ public class BaseGraph extends Default return set; } + /** + * Convert this kmer graph to a simple sequence graph. + * + * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer + * graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence + * + * @return a newly allocated SequenceGraph + */ + public SeqGraph convertToSequenceGraph() { + + final SeqGraph seqGraph = new SeqGraph(kmerSize); + final Map vertexMap = new HashMap<>(); + + + // create all of the equivalent seq graph vertices + for ( final V dv : vertexSet() ) { + final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv))); + sv.setAdditionalInfo(dv.additionalInfo()); + vertexMap.put(dv, sv); + seqGraph.addVertex(sv); + } + + // walk through the nodes and connect them to their equivalent seq vertices + for( final E e : edgeSet() ) { + final SeqVertex seqInV = vertexMap.get(getEdgeSource(e)); + final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e)); + //logger.info("Adding edge " + seqInV + " -> " + seqOutV); + seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity())); + } + + return seqGraph; + } + /** * Pull out the additional sequence implied by traversing this node in the graph * @param v the vertex from which to pull out the additional base sequence @@ -712,4 +746,13 @@ public class BaseGraph extends Default if (!containsVertex(vertex)) return false; return true; } + + /** + * Checks for the presence of directed cycles in the graph. + * + * @return {@code true} if the graph has cycles, {@code false} otherwise. + */ + public boolean hasCycles() { + return new CycleDetector<>(this).detectCycles(); + } } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java index 69b42cee6..71ce9929b 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java @@ -122,7 +122,7 @@ public class CommonSuffixSplitter { } else { incomingTarget = prefixV; graph.addVertex(prefixV); - graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 0)); + graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1)); edgesToRemove.add(out); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java index ae270ed7b..5ceaa29c5 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java @@ -45,6 +45,11 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.Collections; +import java.util.Set; + /** * Represents a trivial k-best sub haplotype finder with no solutions. * @@ -65,6 +70,21 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder { protected DeadEndKBestSubHaplotypeFinder() { } + @Override + public String id() { + return ""; + } + + @Override + public String label() { + return "<DEAD>"; + } + + @Override + public Set> subFinderLabels() { + return Collections.emptySet(); + } + @Override public int getCount() { return 0; @@ -77,4 +97,18 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder { else throw new IllegalArgumentException("k cannot be equal or greater to the haplotype count"); } + + @Override + public boolean isReference() { + return false; + } + + @Override + public double score(final byte[] bases, final int offset, final int length) { + if (bases == null) throw new IllegalArgumentException("bases cannot be null"); + if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative"); + if (length < 0) throw new IllegalArgumentException("the length cannot be negative"); + if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size"); + return Double.NaN; + } } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java index 0e50ec02b..1a642d200 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java @@ -45,6 +45,12 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.Collections; +import java.util.Set; + /** * Trivial k-best sub-haplotype finder where the source and sink vertex are the same one. * @@ -67,6 +73,21 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder { singleHaplotypePath = new MyBestHaplotypePath(graph,vertex); } + @Override + public String id() { + return "v" + singleHaplotypePath.head().getId(); + } + + @Override + public String label() { + return singleHaplotypePath.head().getSequenceString(); + } + + @Override + public Set> subFinderLabels() { + return Collections.emptySet(); + } + @Override public int getCount() { return 1; @@ -81,6 +102,24 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder { return singleHaplotypePath; } + @Override + public boolean isReference() { + return singleHaplotypePath.isReference(); + } + + @Override + public double score(final byte[] bases, final int offset, final int length) { + if (bases == null) throw new IllegalArgumentException("bases cannot be null"); + if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative"); + if (length < 0) throw new IllegalArgumentException("the length cannot be negative"); + if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size"); + final byte[] vertexBases = singleHaplotypePath.head().getSequence(); + if (length != vertexBases.length) + return Double.NaN; + else + return Utils.equalRange(bases, offset, vertexBases, 0, length)? 0 : Double.NaN; + } + /** * Custom extension of {@link KBestHaplotype} that implements the single solution behaviour. */ @@ -120,7 +159,7 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder { } @Override - public int score() { + public double score() { return 0; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java index ca22f17ec..d1b5bd614 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java @@ -68,7 +68,7 @@ public abstract class KBestHaplotype implements Comparable { * * @return 0 or greater. */ - public abstract int score(); + public abstract double score(); /** * Indicates whether this result is the reference haplotype. @@ -122,6 +122,8 @@ public abstract class KBestHaplotype implements Comparable { public Haplotype haplotype() { if (haplotype != null) return haplotype; haplotype = new Haplotype(bases(),isReference()); + if (score() > 0) + throw new IllegalStateException("score cannot be greater than 0: " + score()); haplotype.setScore(score()); return haplotype; } @@ -152,7 +154,35 @@ public abstract class KBestHaplotype implements Comparable { */ public int compareTo(final KBestHaplotype other) { if (other == null) throw new IllegalArgumentException("the other object cannot be null"); - return - 1 * (score() - other.score()); + return - Double.compare(score(), other.score()); + } + + @Override + public int hashCode() { + return haplotype().hashCode(); + } + + @Override + public boolean equals(final Object other) { + return other == null ? false: (other instanceof KBestHaplotype ? equals((KBestHaplotype)other) : false); + } + + @Override + public String toString() { + return haplotype().toString() + " Score = " + score(); + } + + /** + * Checks whether both solutions are equal. + *

+ * Both solutions are considered equal when the underlying haplotypes are equal. The path on the respective + * graph might deffer though. + *

+ * + * @return {@code true} iff both haplotypes are the same (considering the ref state). + */ + protected boolean equals(final KBestHaplotype other) { + return haplotype().equals(other.haplotype(),false); } /** diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java index f27cca12c..5e971792c 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java @@ -45,8 +45,13 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; +import org.broadinstitute.sting.utils.collections.Pair; +import org.broadinstitute.sting.utils.haplotype.Haplotype; import org.jgrapht.alg.CycleDetector; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.PrintWriter; import java.util.*; /** @@ -233,7 +238,7 @@ public class KBestHaplotypeFinder extends AbstractList implement } @Override - public KBestHaplotype get(int index) { + public KBestHaplotype get(final int index) { if (index < 0 || index >= size()) throw new IndexOutOfBoundsException(); return topFinder.getKBest(index); @@ -305,28 +310,28 @@ public class KBestHaplotypeFinder extends AbstractList implement /** * Creates a finder from a vertex. * - * @param source the source vertex for the finder. + * @param vertex the source vertex for the finder. * * @return never {@code null}, perhaps a finder that returns no haplotypes though. */ - protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex source) { - KBestSubHaplotypeFinder node = finderByVertex.get(source); - if (node == null) { - if (sinks.contains(source)) - node = new EmptyPathHaplotypeFinderNode(graph,source); + protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex vertex) { + KBestSubHaplotypeFinder finder = finderByVertex.get(vertex); + if (finder == null) { + if (sinks.contains(vertex)) + finder = new EmptyPathHaplotypeFinderNode(graph,vertex); else { - final Set outgoingEdges = graph.outgoingEdgesOf(source); + final Set outgoingEdges = graph.outgoingEdgesOf(vertex); if (outgoingEdges.isEmpty()) - node = DeadEndKBestSubHaplotypeFinder.INSTANCE; + finder = DeadEndKBestSubHaplotypeFinder.INSTANCE; else { final Map undeadChildren = createChildrenFinders(outgoingEdges); - node = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE : - new RecursiveSubHaplotypeFinder(source,undeadChildren); + finder = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE : + new RecursiveSubHaplotypeFinder(graph,vertex,undeadChildren); } } - finderByVertex.put(source, node); + finderByVertex.put(vertex, finder); } - return node; + return finder; } /** @@ -340,7 +345,7 @@ public class KBestHaplotypeFinder extends AbstractList implement * @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this * finder. */ - private Map createChildrenFinders(Set baseEdges) { + private Map createChildrenFinders(final Set baseEdges) { final Map result = new LinkedHashMap<>(baseEdges.size()); for (final BaseEdge edge : baseEdges) { final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge)); @@ -349,4 +354,156 @@ public class KBestHaplotypeFinder extends AbstractList implement } return result; } + + /** + * Print a DOT representation of search graph. + * + * @param out character stream printer where to print the DOT representation to. + * + * @throws IllegalArgumentException if {@code out} is {@code null}. + */ + public void printDOT(final PrintWriter out) { + if (out == null) + throw new IllegalArgumentException("the out writer cannot be null"); + out.println("digraph {"); + out.println(" rankdir = LR"); + out.println(" node [shape=box, margin=0.01]"); + out.println(" subgraph cluster_dummy { style = invis; x [label=\"\",shape=none,margin=0] }"); + final StringBuilder referenceCluster = new StringBuilder(1000); + + referenceCluster.append(" subgraph cluster_ref {\n"); + referenceCluster.append(" node [penwidth=2]\n"); + for (final KBestSubHaplotypeFinder finder : finderByVertex.values() ) { + final String id = finder.id(); + final String line = String.format(" %s [label=<%s>]",id,finder.label()); + if (finder.isReference()) + referenceCluster.append(" ").append(line).append('\n'); + else + out.println(line); + } + referenceCluster.append(" }"); + out.println(referenceCluster.toString()); + + for (final KBestSubHaplotypeFinder finder : finderByVertex.values()) + for (final Pair subFinderLabel : finder.subFinderLabels()) { + final KBestSubHaplotypeFinder subFinder = subFinderLabel.getFirst(); + + final String edgeLabel = subFinderLabel.getSecond(); + out.println(String.format(" %s -> %s [label=%s]",finder.id(),subFinder.id(),edgeLabel)); + } + out.println("}"); + } + + /** + * Print a DOT representation of search graph. + * + * @param file file where to print the DOT representation to. + * + * @throws IllegalArgumentException if {@code file} is {@code null}. + * @throws FileNotFoundException if {@code file} cannot be created or written. + * @throws IllegalStateException if there was some trouble when writing the DOT representation. + */ + public void printDOT(final File file) throws FileNotFoundException { + if (file == null) + throw new IllegalArgumentException("the output file cannot be null"); + final PrintWriter out = new PrintWriter(file); + printDOT(out); + if (out.checkError()) + throw new IllegalStateException("error occurred while writing k-best haplotype search graph into file '" + + file.getAbsolutePath() + "'"); + out.close(); + } + + /** + * Print a DOT representation of search graph. + * + * @param fileName name of the file where to print the DOT representation to. + * + * @throws IllegalArgumentException if {@code fileName} is {@code null}. + * @throws FileNotFoundException if no file named {@code fileName} cannot be created or written. + * @throws IllegalStateException if there was some trouble when writing the DOT representation. + */ + @SuppressWarnings("unused") // Available for debugging purposes. + public void printDOTFile(final String fileName) throws FileNotFoundException { + printDOT(new File(fileName)); + } + + /** + * Get the score of a give sequence of bases + * + * @param bases the base sequence. + * + * @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible + * throw this finder. + */ + public double score(final byte[] bases) { + return topFinder.score(bases,0,bases.length); + } + + /** + * Get the score of a give sequence of bases + * + * @param haplotype the haplotype. + * + * @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible + * throw this finder. + */ + public double score(final Haplotype haplotype) { + return score(haplotype.getBases()); + } + + + /** + * Returns a unique list of haplotypes solutions. + *

+ * The result will not contain more than one haplotype with the same base sequence. The solution of the best + * score is returned. + *

+ *

+ * This makes sense when there are more than one possible path through the graph to create the same haplotype. + *

+ *

+ * The resulting list is sorted by the score with more likely haplotype search results first. + *

+ * + * @param maxSize maximum number of unique results to return. + * + * @throws IllegalArgumentException if {@code maxSize} is negative. + * + * @return never {@code null}, perhaps an empty list. + */ + public List unique(final int maxSize) { + if (maxSize < 0) throw new IllegalArgumentException("maxSize cannot be negative"); + final int requiredCapacity = Math.min(maxSize,size()); + final Set haplotypes = new HashSet<>(requiredCapacity); + int resultSize = 0; + final List result = new ArrayList<>(requiredCapacity); + for (final KBestHaplotype kbh : this) { + if (haplotypes.add(kbh.haplotype())) { + result.add(kbh); + if (resultSize == maxSize) break; + } + } + return result; + } + + /** + * Returns a unique list of haplotypes solutions. + * + *

+ * The result will not contain more than one haplotype with the same base sequence. The solution of the best + * score is returned. + *

+ *

+ * This makes sense when there are more than one possible path through the graph to create the same haplotype. + *

+ *

+ * The resulting list is sorted by the score with more likely haplotype search results first. + *

+ * + * @return never {@code null}, perhaps an empty list. + */ + public List unique() { + return unique(size()); + } } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java index 9c185b52c..eb3360500 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java @@ -45,6 +45,10 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.Set; + /** * Common interface for K-Best sub-haplotype finders. * @@ -52,6 +56,29 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; */ interface KBestSubHaplotypeFinder { + /** + * Return an unique id for this sub-haplotype finder to be used when outputting diagrams. + * + * @return never {@code null}. + */ + public String id(); + + /** + * Returns a label with human readable representation of this finder. + * + *

This is used when generating a diagram to illustrate the search space and costs

+ * + * @return never {@code null}. + */ + public String label(); + + /** + * Returns the set of subfinder from this finder together with a label for the connection with the current finder. + * + *

The label is used when generating a diagram to illustrate the search space and costs

+ */ + public Set> subFinderLabels(); + /** * Returns the total number of possible sub-haplotypes. * @return 0 or greater. @@ -67,5 +94,22 @@ interface KBestSubHaplotypeFinder { * * @return never {@code null}. */ - public abstract KBestHaplotype getKBest(int k); + public KBestHaplotype getKBest(int k); + + /** + * Checks whether the top vertex for this finder is a reference haplotype vertex. + * + * @return {@code true} iff the top vertex for this finder is a reference vertex. + */ + public boolean isReference(); + + /** + * Calculate the score of a sequence of bases. + * + * @param bases array containing the query base sequence. + * @param offset first position of the query base sequence in {@code bases} . + * @param length length of the query base sequence. + * @return {@link Double#NaN} if there is no score for this sequence, otherwise a valid score value. + */ + public double score(byte[] bases, int offset, int length); } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java index 978d83eb4..657ecfd85 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java @@ -49,20 +49,24 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import java.util.PriorityQueue; /** - * edge class for connecting nodes in the graph that tracks some per-sample information - * + * Edge class for connecting nodes in the graph that tracks some per-sample information. + *

* This class extends BaseEdge with the additional functionality of tracking the maximum * multiplicity seen within any single sample. The workflow for using this class is: - * - * MultiSampleEdge e = new MultiSampleEdge(ref, 1) - * e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1 - * e.getPruningMultiplicity() // = 1 - * e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2 - * e.getPruningMultiplicity() // = 2 - * e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2 - * e.getPruningMultiplicity() // = 2 - * e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3 - * e.getPruningMultiplicity() // = 3 + *

+ *
+ * {@code
+ *      MultiSampleEdge e = new MultiSampleEdge(ref, 1)
+ *      e.incMultiplicity(1)              // total is 2, per sample is 2, max per sample is 1
+ *      e.getPruningMultiplicity()        // = 1
+ *      e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
+ *      e.getPruningMultiplicity()        // = 2
+ *      e.incMultiplicity(3)              // total is 5, per sample is 3, max per sample is 2
+ *      e.getPruningMultiplicity()        // = 2
+ *      e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
+ *      e.getPruningMultiplicity()        // = 3
+ * }
+ * 
*/ public class MultiSampleEdge extends BaseEdge { private int currentSingleSampleMultiplicity; diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java index 0fbbfdc64..fe85befef 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java @@ -45,9 +45,10 @@ */ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Map; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.collections.Pair; + +import java.util.*; /** * General recursive sub-haplotype finder. @@ -67,7 +68,11 @@ import java.util.Map; * * @author Valentin Ruano-Rubio <valentin@broadinstitute.org> */ -class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { +class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { + + + private final SeqVertex vertex; + private final boolean isReference; /** * Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges @@ -80,20 +85,83 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { * @param vertex first vertex for all sub-haplotype solutions provided by this finder * @param children map from outgoing edge to the corresponding sub-sub-haplotype finder. */ - public RecursiveSubHaplotypeFinder(final SeqVertex vertex, + public RecursiveSubHaplotypeFinder(final SeqGraph graph, final SeqVertex vertex, final Map children) { super(createChildFinderCollection(vertex, children)); + this.vertex = vertex; + this.isReference = graph.isReferenceNode(vertex); } - private static Collection createChildFinderCollection(final SeqVertex vertex, final Map finders) { + /** + * Wraps the descendant vertices finders in order to take advantage of the {@link AggregatedSubHaplotypeFinder} + * common code. + *

+ * Automatically calibrates the edge score (cost) so that it takes into account the total across all edges. + *

+ * + * @param vertex the parent vertex. + * @param finders the child vertices indexed by the connecting edge. + * @return never {@code null} but potentially an empty collection if there is child returning some sub-haplotype + * solution. + */ + private static Collection createChildFinderCollection(final SeqVertex vertex, + final Map finders) { if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null"); - final Collection result = new ArrayList<>(finders.size()); - for (final Map.Entry e : finders.entrySet()) - result.add(new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue())); + final ArrayList result = new ArrayList<>(finders.size()); + for (final Map.Entry e : finders.entrySet()) { + final EdgeSubHaplotypeFinder subFinder = new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue()); + if (subFinder.getCount() == 0) continue; + result.add(subFinder); + } + if (result.size() == 0) + return Collections.emptySet(); + else if (result.size() == 1) // no calibration needed, by default edgeScore is 0. + return Collections.singleton(result.get(0)); + else { + double totalEdgeMultiplicityAcrossEdges = 0; + for (final EdgeSubHaplotypeFinder finder : result) + totalEdgeMultiplicityAcrossEdges += Math.max(0.5,finder.edge.getMultiplicity()); + final double log10TotalEdgeMultiplicityAcrossEdges = Math.log10(totalEdgeMultiplicityAcrossEdges); + for (final EdgeSubHaplotypeFinder finder : result) + finder.calibrateEdgeScore(log10TotalEdgeMultiplicityAcrossEdges); + return result; + } + } + + @Override + public boolean isReference() { + return isReference; + } + + @Override + public String label() { + return vertex.getSequenceString(); + } + + @Override + public Set> subFinderLabels() { + final Set> result = new LinkedHashSet<>(subFinders.size()); + for (final EdgeSubHaplotypeFinder subFinder : subFinders) + result.add(new Pair<>(subFinder,simplifyZeros(String.format("%.4f", subFinder.edgeScore)))); return result; } - private static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder { + /** + * Removes zeros decimal positions from edge-labels. + * + * @param edgeLabel the original label to reformat. + * @return never {@code null}, the reformatted label. + */ + private String simplifyZeros(final String edgeLabel) { + if (edgeLabel.equals("0.000") || edgeLabel.equals("-0.000") ) + return "0."; + int i = edgeLabel.length() - 1; + while (edgeLabel.charAt(i) == '0') + i--; + return (i == edgeLabel.length() - 1) ? edgeLabel : edgeLabel.substring(0,i); + } + + protected static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder { private final KBestSubHaplotypeFinder childFinder; @@ -101,10 +169,32 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { private final BaseEdge edge; + private double edgeScore = 0; + private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) { this.childFinder = childFinder; this.edge = edge; this.vertex = vertex; + this.edgeScore = 0; + } + + private void calibrateEdgeScore(final double log10TotalMultiplicityAcrossOutgoingEdges) { + edgeScore = Math.log10(Math.max(edge.getMultiplicity(),0.5)) - log10TotalMultiplicityAcrossOutgoingEdges; + } + + @Override + public String id() { + return childFinder.id(); + } + + @Override + public String label() { + return childFinder.label(); + } + + @Override + public Set> subFinderLabels() { + return childFinder.subFinderLabels(); } @Override @@ -114,8 +204,31 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { @Override public KBestHaplotype getKBest(int k) { - return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k)); + return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k),edgeScore); } + + @Override + public boolean isReference() { + return childFinder.isReference(); + } + + @Override + public double score(final byte[] bases, final int offset, final int length) { + if (length == 0) + return 0; + final byte[] vertexSequence = vertex.getSequence(); + if (length < vertexSequence.length) // query is not long enough to have any score. + return Double.NaN; + else if (!Utils.equalRange(vertexSequence,0,bases,offset,vertexSequence.length)) + return Double.NaN; + else + return edgeScore + childFinder.score(bases,offset + vertexSequence.length,length - vertexSequence.length); + } + } + + @Override + public String id() { + return "v" + vertex.getId(); } /** @@ -129,13 +242,14 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { */ private static class ChildKBestSubHaplotype extends KBestHaplotype { - private final int score; + private final double score; private final KBestHaplotype child; private final SeqVertex vertex; private final boolean isReference; - public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child) { - this.score = edge.getMultiplicity() + child.score(); + + public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child, final double edgeScore) { + this.score = edgeScore + child.score(); this.vertex = vertex; this.child = child; this.isReference = edge.isRef() && child.isReference(); @@ -147,7 +261,7 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder { } @Override - public int score() { + public double score() { return score; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java index c8c6abb86..087c07be7 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java @@ -291,16 +291,9 @@ public class SeqGraph extends BaseGraph { final SeqVertex addedVertex = mergeLinearChainVertices(linearChain); addVertex(addedVertex); - final Set inEdges = incomingEdgesOf(first); - final Set outEdges = outgoingEdgesOf(last); - - final int nEdges = inEdges.size() + outEdges.size(); - int sharedWeightAmongEdges = nEdges == 0 ? 0 : sumEdgeWeightAlongChain(linearChain) / nEdges; - final BaseEdge inc = new BaseEdge(false, sharedWeightAmongEdges); // template to make .add function call easy - // update the incoming and outgoing edges to point to the new vertex - for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy().add(inc)); } - for( final BaseEdge edge : inEdges ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy().add(inc)); } + for( final BaseEdge edge : outgoingEdgesOf(last) ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy()); } + for( final BaseEdge edge : incomingEdgesOf(first) ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy()); } removeAllVertices(linearChain); return true; @@ -313,29 +306,6 @@ public class SeqGraph extends BaseGraph { return new SeqVertex( seqsCat ); } - /** - * Get the sum of the edge weights on a linear chain of at least 2 elements - * - * @param chain a linear chain of vertices with at least 2 vertices - * @return the sum of the multiplicities along all edges connecting vertices within the chain - */ - @Requires({"chain != null", "chain.size() >= 2"}) - private int sumEdgeWeightAlongChain(final LinkedList chain) { - int sum = 0; - SeqVertex prev = null; - - for ( final SeqVertex v : chain ) { - if ( prev != null ) { - final BaseEdge e = getEdge(prev, v); - if ( e == null ) throw new IllegalStateException("Something wrong with the linear chain, got a null edge between " + prev + " and " + v); - sum += e.getMultiplicity(); - } - prev = v; - } - - return sum; - } - /** * Base class for transformation operations that need to iterate over proposed vertices, where * each proposed vertex is a seed vertex for a potential transformation. diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java index 284062749..401cbf18c 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java @@ -247,12 +247,12 @@ public class SharedVertexSequenceSplitter { if ( needPrefixNode ) { outer.addVertex(prefixV); - if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 0)); + if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 1)); } if ( needSuffixNode ) { outer.addVertex(suffixV); - if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 0)); + if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 1)); } if ( topForConnect != null ) { diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java index a7989ac2c..e03e26e0a 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java @@ -52,7 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*; import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.sam.GATKSAMRecord; -import org.jgrapht.alg.CycleDetector; import java.io.File; import java.util.*; @@ -88,8 +87,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme /** * */ - - final boolean debugGraphTransformations; + private final boolean debugGraphTransformations; final byte minBaseQualityToUseInAssembly; protected boolean increaseCountsBackwards = true; @@ -319,13 +317,6 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme removeAllVertices(verticesToRemove); } - /** - * @return true if the graph has cycles, false otherwise - */ - public boolean hasCycles() { - return new CycleDetector<>(this).detectCycles(); - } - /** * Does the graph not have enough complexity? We define low complexity as a situation where the number * of non-unique kmers is more than 20% of the total number of kmers. @@ -419,39 +410,10 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme return counter.getKmersWithCountsAtLeast(2); } - /** - * Convert this kmer graph to a simple sequence graph. - * - * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer - * graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence - * - * @return a newly allocated SequenceGraph - */ - // TODO -- should override base class method + @Override public SeqGraph convertToSequenceGraph() { buildGraphIfNecessary(); - - final SeqGraph seqGraph = new SeqGraph(kmerSize); - final Map vertexMap = new HashMap<>(); - - - // create all of the equivalent seq graph vertices - for ( final MultiDeBruijnVertex dv : vertexSet() ) { - final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv))); - sv.setAdditionalInfo(dv.additionalInfo()); - vertexMap.put(dv, sv); - seqGraph.addVertex(sv); - } - - // walk through the nodes and connect them to their equivalent seq vertices - for( final MultiSampleEdge e : edgeSet() ) { - final SeqVertex seqInV = vertexMap.get(getEdgeSource(e)); - final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e)); - //logger.info("Adding edge " + seqInV + " -> " + seqOutV); - seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity())); - } - - return seqGraph; + return super.convertToSequenceGraph(); } private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers, @@ -749,15 +711,15 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme } private static String pathElementId(final String element) { - final int parentesysPos = element.indexOf('('); + final int openBracketPosition = element.indexOf('('); - if (parentesysPos == -1) + if (openBracketPosition == -1) return null; - final int closeParentesysPos = element.lastIndexOf(')'); - if (closeParentesysPos == -1) + final int closeBracketPosition = element.lastIndexOf(')'); + if (closeBracketPosition == -1) throw new IllegalArgumentException("non-closed id parantesys found in element: " + element); - final String result = element.substring(parentesysPos + 1,closeParentesysPos).trim(); + final String result = element.substring(openBracketPosition + 1,closeBracketPosition).trim(); if (result.isEmpty()) throw new IllegalArgumentException("empty id found in element: " + element); return result; diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 1f355359d..f16399e62 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -247,7 +247,7 @@ public class VariantDataManager { logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." ); } else if( trainingData.size() > VRAC.MAX_NUM_TRAINING_DATA ) { logger.warn( "WARNING: Very large training set detected. Downsampling to " + VRAC.MAX_NUM_TRAINING_DATA + " training variants." ); - Collections.shuffle(trainingData); + Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator()); return trainingData.subList(0, VRAC.MAX_NUM_TRAINING_DATA); } return trainingData; @@ -295,13 +295,13 @@ public class VariantDataManager { public List getRandomDataForPlotting( final int numToAdd, final List trainingData, final List antiTrainingData, final List evaluationData ) { final List returnData = new ExpandingArrayList<>(); - Collections.shuffle(trainingData); - Collections.shuffle(antiTrainingData); - Collections.shuffle(evaluationData); + Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator()); + Collections.shuffle(antiTrainingData, GenomeAnalysisEngine.getRandomGenerator()); + Collections.shuffle(evaluationData, GenomeAnalysisEngine.getRandomGenerator()); returnData.addAll(trainingData.subList(0, Math.min(numToAdd, trainingData.size()))); returnData.addAll(antiTrainingData.subList(0, Math.min(numToAdd, antiTrainingData.size()))); returnData.addAll(evaluationData.subList(0, Math.min(numToAdd, evaluationData.size()))); - Collections.shuffle(returnData); + Collections.shuffle(returnData, GenomeAnalysisEngine.getRandomGenerator()); return returnData; } diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java index 3d1a9da57..b8a0585a2 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java @@ -50,6 +50,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.RodBinding; +import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; @@ -59,6 +60,8 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine; import org.broadinstitute.sting.gatk.walkers.haplotypecaller.PairHMMLikelihoodCalculationEngine; import org.broadinstitute.sting.utils.SampleUtils; import org.broadinstitute.sting.utils.exceptions.UserException; +import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; +import org.broadinstitute.sting.utils.help.HelpConstants; import org.broadinstitute.sting.utils.variant.GATKVCFUtils; import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils; import org.broadinstitute.variant.variantcontext.VariantContext; @@ -148,6 +151,7 @@ import java.util.*; * * */ +@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} ) public class CalculateGenotypePosteriors extends RodWalker { /** diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java index 0f577cb23..580380513 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java @@ -134,6 +134,9 @@ public class CombineGVCFs extends RodWalker 1 ? startingStates.refBases[1] : (byte)'N'); } @@ -289,7 +292,8 @@ public class CombineGVCFs extends RodWalker attrs = new HashMap<>(1); - attrs.put(VCFConstants.END_KEY, Integer.toString(end)); + if ( !USE_BP_RESOLUTION ) + attrs.put(VCFConstants.END_KEY, Integer.toString(end)); // genotypes final GenotypesContext genotypes = GenotypesContext.create(); diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java new file mode 100644 index 000000000..ea93ebe4a --- /dev/null +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java @@ -0,0 +1,517 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.QualityUtils; + +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.haplotype.Haplotype; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.variant.variantcontext.Allele; +import org.broadinstitute.sting.utils.exceptions.UserException; +import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*; + +import java.util.List; +import java.util.Map; +import java.util.HashMap; +import java.io.File; +import java.io.FileWriter; +import java.io.BufferedWriter; +import java.util.Map; +import java.util.HashMap; +import java.io.IOException; + + +/** + * Created with IntelliJ IDEA. + * User: rpoplin, carneiro + * Date: 10/16/12 + */ +public class DebugJNILoglessPairHMM extends LoglessPairHMM { + + private static final boolean dumpSandboxOnly = false; //simulates ifdef + private static final boolean debug = false; //simulates ifdef + private static final boolean verify = !dumpSandboxOnly && (debug || true); //simulates ifdef + private static final boolean debug0_1 = false; //simulates ifdef + private static final boolean debug1 = false; //simulates ifdef + private static final boolean debug2 = false; + private static final boolean debug3 = false; + + //Debugging stats + private int numCalls = 0; + private int numComputeLikelihoodCalls = 0; + protected HashMap filenameToWriter = new HashMap(); + + private JNILoglessPairHMM jniPairHMM = null; + public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType) { + super(); + switch(hmmType) { + case VECTOR_LOGLESS_CACHING: + jniPairHMM = new VectorLoglessPairHMM(); + break; + default: + throw new UserException.BadArgumentValue("pairHMM","Specified JNIPairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are VECTOR_LOGLESS_CACHING"); + } + } + + @Override + public void close() + { + jniPairHMM.close(); + debugClose(); + } + + //Used only when testing parts of the compute kernel + /** + * {@inheritDoc} + */ + @Override + public void initialize( final int readMaxLength, final int haplotypeMaxLength ) { + if(verify) + super.initialize(readMaxLength, haplotypeMaxLength); + if(debug3) + { + System.out.println("Java: alloc initialized readMaxLength : "+readMaxLength+" haplotypeMaxLength : "+haplotypeMaxLength); + debugDump("lengths_java.txt", String.format("%d %d\n",readMaxLength, haplotypeMaxLength), + true); + } + if(debug2) + jniInitialize(readMaxLength, haplotypeMaxLength); + } + + private HashMap haplotypeToHaplotypeListIdxMap = null; + //Used to transfer data to JNI + //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region + /** + * {@inheritDoc} + */ + @Override + public void initialize( final List haplotypes, final Map> perSampleReadList, + final int readMaxLength, final int haplotypeMaxLength ) { + if(verify) + { + super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength); + jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength); + haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap(); + } + } + + /** + * {@inheritDoc} + */ + @Override + public void finalizeRegion() + { + if(!dumpSandboxOnly) + jniPairHMM.finalizeRegion(); + } + + /** + * {@inheritDoc} + */ + @Override + public PerReadAlleleLikelihoodMap computeLikelihoods( final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap ) { + // (re)initialize the pairHMM only if necessary + final int readMaxLength = verify ? findMaxReadLength(reads) : 0; + final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0; + if(verify) + { + if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) + { initialize(readMaxLength, haplotypeMaxLength); } + if ( ! initialized ) + throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode"); + } + int readListSize = reads.size(); + int numHaplotypes = alleleHaplotypeMap.size(); + int numTestcases = readListSize*numHaplotypes; + if(debug0_1) + System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); + int idx = 0; + for(GATKSAMRecord read : reads) + { + byte [] overallGCP = GCPArrayMap.get(read); + if(debug0_1) + System.out.println("Java read length "+read.getReadBases().length); + if(debug3) + { + for(int i=0;i currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always + { + byte[] haplotypeBases = currEntry.getValue().getBases(); + if(debug0_1) + System.out.println("Java haplotype length "+haplotypeBases.length); + if(debug3) + { + for(int i=0;i currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always + { + idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); + likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList]; + ++idx; + } + readIdx += numHaplotypes; + } + //for floating point values, no exact equality + //check whether numbers are close in terms of abs_error or relative_error + //For very large values, relative_error is relevant + //For very small values, abs_error is relevant + for(int i=0;i 1e-5 && relative_error > 1e-5) + { + toDump = true; + break; + } + } + } + //if numbers are not close, then dump out the data that produced the inconsistency + if(toDump) + { + idx = 0; + System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); + boolean firstLine = true; + for(GATKSAMRecord read : reads) + { + byte [] overallGCP = GCPArrayMap.get(read); + byte[] tmpByteArray = new byte[read.getReadBases().length]; + for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always + { + byte[] haplotypeBases = currEntry.getValue().getBases(); + debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true); + debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true); + for(int k=0;k currEntry : filenameToWriter.entrySet()) { + BufferedWriter currWriter = currEntry.getValue(); + try + { + currWriter.flush(); + currWriter.close(); + } + catch(IOException e) + { + e.printStackTrace(); + + } + } + filenameToWriter.clear(); + } +} diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java new file mode 100644 index 000000000..f039cc295 --- /dev/null +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java @@ -0,0 +1,63 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import org.broadinstitute.sting.utils.haplotype.Haplotype; + +import java.util.HashMap; + + +/** + * Created with IntelliJ IDEA. + * User: rpoplin, carneiro + * Date: 10/16/12 + */ +public abstract class JNILoglessPairHMM extends LoglessPairHMM { + public abstract HashMap getHaplotypeToHaplotypeListIdxMap(); + protected long setupTime = 0; + +} diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java index ed35e6970..31a0d1363 100644 --- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java @@ -66,7 +66,7 @@ public class LoglessPairHMM extends N2MemoryPairHMM { protected static final double TRISTATE_CORRECTION = 3.0; - + /** * {@inheritDoc} */ diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java new file mode 100644 index 000000000..e69d9ea50 --- /dev/null +++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java @@ -0,0 +1,340 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.utils.pairhmm; + +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.QualityUtils; + +import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap; +import org.broadinstitute.sting.utils.haplotype.Haplotype; +import org.broadinstitute.sting.utils.sam.GATKSAMRecord; +import org.broadinstitute.variant.variantcontext.Allele; + +import java.util.List; +import java.util.Map; +import java.util.HashMap; + +//For loading library from jar +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +/** + * Created with IntelliJ IDEA. + * User: rpoplin, carneiro + * Date: 10/16/12 + */ +public class VectorLoglessPairHMM extends JNILoglessPairHMM { + + //For machine capabilities + public static final long sse41Mask = 1; + public static final long sse42Mask = 2; + public static final long avxMask = 4; + public static final long enableAll = 0xFFFFFFFFFFFFFFFFl; + + //Used to copy references to byteArrays to JNI from reads + protected class JNIReadDataHolderClass { + public byte[] readBases = null; + public byte[] readQuals = null; + public byte[] insertionGOP = null; + public byte[] deletionGOP = null; + public byte[] overallGCP = null; + } + + //Used to copy references to byteArrays to JNI from haplotypes + protected class JNIHaplotypeDataHolderClass { + public byte[] haplotypeBases = null; + } + + /** + * Return 64-bit mask representing machine capabilities + * Bit 0 is LSB, bit 63 MSB + * Bit 0 represents sse4.1 availability + * Bit 1 represents sse4.2 availability + * Bit 2 represents AVX availability + */ + public native long jniGetMachineType(); + + /** + * Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM. + * C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not + * change per JVM session + * @param readDataHolderClass class type of JNIReadDataHolderClass + * @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass + * @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask + * */ + private native void jniInitializeClassFieldsAndMachineMask(Class readDataHolderClass, Class haplotypeDataHolderClass, long mask); + + private static Boolean isVectorLoglessPairHMMLibraryLoaded = false; + //The constructor is called only once inside PairHMMLikelihoodCalculationEngine + public VectorLoglessPairHMM() { + super(); + + logger.warn("WARNING: the VectorLoglessPairHMM is an experimental implementation still under active development. " + + "Use at your own risk!"); + + synchronized(isVectorLoglessPairHMMLibraryLoaded) { + //Load the library and initialize the FieldIDs + if(!isVectorLoglessPairHMMLibraryLoaded) { + try + { + //Try loading from Java's library path first + //Useful if someone builds his/her own library and wants to override the bundled + //implementation without modifying the Java code + System.loadLibrary("VectorLoglessPairHMM"); + logger.info("libVectorLoglessPairHMM found in JVM library path"); + } + catch(UnsatisfiedLinkError ule) + { + //Could not load from Java's library path - try unpacking from jar + try + { + logger.debug("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from StingUtils.jar"); + loadLibraryFromJar("/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so"); + logger.debug("libVectorLoglessPairHMM unpacked successfully from StingUtils.jar"); + } + catch(IOException ioe) + { + //Throw the UnsatisfiedLinkError to make it clear to the user what failed + throw ule; + } + } + + isVectorLoglessPairHMMLibraryLoaded = true; + jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once + } + } + } + + private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray); + //Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize + //Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype + private HashMap haplotypeToHaplotypeListIdxMap = new HashMap(); + private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray; + @Override + public HashMap getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; } + + //Used to transfer data to JNI + //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region + /** + * {@inheritDoc} + */ + @Override + public void initialize( final List haplotypes, final Map> perSampleReadList, + final int readMaxLength, final int haplotypeMaxLength ) { + int numHaplotypes = haplotypes.size(); + mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes]; + int idx = 0; + haplotypeToHaplotypeListIdxMap.clear(); + for(final Haplotype currHaplotype : haplotypes) + { + mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass(); + mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases(); + haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx); + ++idx; + } + jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray); + } + /** + * Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not + * accessing Java memory directly, still important to release memory from C++ + */ + private native void jniFinalizeRegion(); + + /** + * {@inheritDoc} + */ + @Override + public void finalizeRegion() + { + jniFinalizeRegion(); + } + + /** + * Real compute kernel + */ + private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray, + JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse); + /** + * {@inheritDoc} + */ + @Override + public PerReadAlleleLikelihoodMap computeLikelihoods( final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap ) { + if(doProfiling) + startTime = System.nanoTime(); + int readListSize = reads.size(); + int numHaplotypes = alleleHaplotypeMap.size(); + int numTestcases = readListSize*numHaplotypes; + JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize]; + int idx = 0; + for(GATKSAMRecord read : reads) + { + readDataArray[idx] = new JNIReadDataHolderClass(); + readDataArray[idx].readBases = read.getReadBases(); + readDataArray[idx].readQuals = read.getBaseQualities(); + readDataArray[idx].insertionGOP = read.getBaseInsertionQualities(); + readDataArray[idx].deletionGOP = read.getBaseDeletionQualities(); + readDataArray[idx].overallGCP = GCPArrayMap.get(read); + ++idx; + } + + mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results + if(doProfiling) + setupTime += (System.nanoTime() - startTime); + //for(reads) + // for(haplotypes) + // compute_full_prob() + jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12); + + final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap(); + idx = 0; + int idxInsideHaplotypeList = 0; + int readIdx = 0; + for(GATKSAMRecord read : reads) + { + for (Map.Entry currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always + { + //Since the order of haplotypes in the List and alleleHaplotypeMap is different, + //get idx of current haplotype in the list and use this idx to get the right likelihoodValue + idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); + likelihoodMap.add(read, currEntry.getKey(), mLikelihoodArray[readIdx + idxInsideHaplotypeList]); + ++idx; + } + readIdx += numHaplotypes; + } + if(doProfiling) + computeTime += (System.nanoTime() - startTime); + return likelihoodMap; + } + + /** + * Print final profiling information from native code + */ + public native void jniClose(); + @Override + public void close() + { + System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9)); + super.close(); + jniClose(); + } + + //Copied from http://frommyplayground.com/how-to-load-native-jni-library-from-jar + /** + * Loads library from current JAR archive + * + * The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting. + * Method uses String as filename because the pathname is "abstract", not system-dependent. + * + * @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext + * @throws IOException If temporary file creation or read/write operation fails + * @throws IllegalArgumentException If source file (param path) does not exist + * @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}). + */ + public static void loadLibraryFromJar(String path) throws IOException { + + if (!path.startsWith("/")) { + throw new IllegalArgumentException("The path to be absolute (start with '/')."); + } + + // Obtain filename from path + String[] parts = path.split("/"); + String filename = (parts.length > 1) ? parts[parts.length - 1] : null; + + // Split filename to prexif and suffix (extension) + String prefix = ""; + String suffix = null; + if (filename != null) { + parts = filename.split("\\.", 2); + prefix = parts[0]; + suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-) + } + + // Check if the filename is okay + if (filename == null || prefix.length() < 3) { + throw new IllegalArgumentException("The filename has to be at least 3 characters long."); + } + + // Prepare temporary file + File temp = File.createTempFile(prefix, suffix); + //System.out.println("Temp lib file "+temp.getAbsolutePath()); + temp.deleteOnExit(); + + if (!temp.exists()) { + throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist."); + } + + // Prepare buffer for data copying + byte[] buffer = new byte[1024]; + int readBytes; + + // Open and check input stream + InputStream is = VectorLoglessPairHMM.class.getResourceAsStream(path); + if (is == null) { + throw new FileNotFoundException("File " + path + " was not found inside JAR."); + } + + // Open output stream and copy data between source file in JAR and the temporary file + OutputStream os = new FileOutputStream(temp); + try { + while ((readBytes = is.read(buffer)) != -1) { + os.write(buffer, 0, readBytes); + } + } finally { + // If read/write fails, close streams safely before throwing an exception + os.close(); + is.close(); + } + + // Finally, load the library + System.load(temp.getAbsolutePath()); + } +} diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java index b1c280748..0ec1dd996 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java @@ -46,6 +46,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator; +import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.utils.MannWhitneyU; import org.testng.Assert; import org.testng.annotations.BeforeClass; @@ -75,9 +76,9 @@ public class RankSumUnitTest { makeDistribution(distribution20_40, 40, skew, observations/2); // shuffle the observations - Collections.shuffle(distribution20); - Collections.shuffle(distribution30); - Collections.shuffle(distribution20_40); + Collections.shuffle(distribution20, GenomeAnalysisEngine.getRandomGenerator()); + Collections.shuffle(distribution30, GenomeAnalysisEngine.getRandomGenerator()); + Collections.shuffle(distribution20_40, GenomeAnalysisEngine.getRandomGenerator()); } private static void makeDistribution(final List result, final int target, final int skew, final int numObservations) { diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatioUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatioUnitTest.java new file mode 100644 index 000000000..562736f0a --- /dev/null +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatioUnitTest.java @@ -0,0 +1,100 @@ +/* +* By downloading the PROGRAM you agree to the following terms of use: +* +* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY +* +* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE). +* +* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and +* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions. +* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows: +* +* 1. DEFINITIONS +* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE. +* +* 2. LICENSE +* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM. +* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement. +* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement. +* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM. +* +* 3. OWNERSHIP OF INTELLECTUAL PROPERTY +* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication. +* Copyright 2012 Broad Institute, Inc. +* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc. +* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes. +* +* 4. INDEMNIFICATION +* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement. +* +* 5. NO REPRESENTATIONS OR WARRANTIES +* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME. +* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING. +* +* 6. ASSIGNMENT +* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void. +* +* 7. MISCELLANEOUS +* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries. +* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes. +* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4. +* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt. +* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter. +* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement. +* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles. +*/ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.lang.Integer; +import java.util.List; + +/** + * Created by haasb on 3/5/14. + */ +public class StrandOddsRatioUnitTest { + private static double DELTA_PRECISION = 0.001; + + @DataProvider(name = "UsingSOR") + public Object[][] makeUsingSORData() { + List tests = new ArrayList<>(); + tests.add(new Object[]{0, 0, 0, 0, 2.0}); + tests.add(new Object[]{100000, 100000, 100000, 100000, 2.0} ); + tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, 2.0} ); + + tests.add(new Object[]{0, 0, 100000, 100000, 2.0}); + tests.add(new Object[]{0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE, 2.0}); + + tests.add(new Object[]{100000,100000,100000,0, 1000001.000001}); + tests.add(new Object[]{100,100,100,0, 1001.000999}); + tests.add(new Object[]{Integer.MAX_VALUE,Integer.MAX_VALUE,Integer.MAX_VALUE,0, 21474836471.0}); + + tests.add(new Object[]{13736,9047,41,1433, 52.95947}); + tests.add(new Object[]{66, 14, 64, 4, 3.63482}); + tests.add(new Object[]{351169, 306836, 153739, 2379, 56.48043}); + tests.add(new Object[]{116449, 131216, 289, 16957, 52.07302}); + tests.add(new Object[]{137, 159, 9, 23, 2.64460}); + tests.add(new Object[]{129, 90, 21, 20, 2.09757}); + tests.add(new Object[]{14054, 9160, 16, 7827, 745.89657}); + tests.add(new Object[]{32803, 9184, 32117, 3283, 3.10399}); + tests.add(new Object[]{2068, 6796, 1133, 0, 37235.43791}); + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "UsingSOR") + public void testUsingSOR(final int refpos, final int refneg, final int altpos, final int altneg, double expectedOddsRatio ) { + int[][] contingencyTable = new int[2][2]; + contingencyTable[0][0] = refpos; + contingencyTable[0][1] = refneg; + contingencyTable[1][0] = altpos; + contingencyTable[1][1] = altneg; + final double ratio = new StrandOddsRatio().symmetricOddsRatio(contingencyTable); + Assert.assertEquals(ratio, expectedOddsRatio, DELTA_PRECISION, "Pass"); + } +} diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java index a650e0f6f..58ff0a7b3 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceIntegrationTest.java @@ -47,38 +47,63 @@ package org.broadinstitute.sting.gatk.walkers.fasta; import org.broadinstitute.sting.WalkerTest; +import org.broadinstitute.sting.utils.exceptions.UserException; import org.testng.annotations.Test; import java.util.Arrays; public class FastaAlternateReferenceIntegrationTest extends WalkerTest { + @Test - public void testIntervals() { + public void testReferenceOnly() { - String md5_1 = "328d2d52cedfdc52da7d1abff487633d"; - - WalkerTestSpec spec1a = new WalkerTestSpec( - "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s", - 1, - Arrays.asList(md5_1)); - executeTest("testFastaReference", spec1a); - - WalkerTestSpec spec1b = new WalkerTestSpec( + WalkerTestSpec spec = new WalkerTestSpec( "-T FastaReferenceMaker -R " + b36KGReference + " -L 1:10,000,100-10,000,500 -L 1:10,100,000-10,101,000 -L 1:10,900,000-10,900,001 -o %s", 1, - Arrays.asList(md5_1)); - executeTest("testFastaReference", spec1b); + Arrays.asList("328d2d52cedfdc52da7d1abff487633d")); + executeTest("test FastaReference", spec); + } - WalkerTestSpec spec2 = new WalkerTestSpec( + @Test + public void testIndelsAndSnpMask() { + + WalkerTestSpec spec = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + validationDataLocation + "NA12878.chr1_10mb_11mb.slx.indels.vcf4 --snpmask:vcf " + b36dbSNP129 + " -L 1:10,075,000-10,075,380 -L 1:10,093,447-10,093,847 -L 1:10,271,252-10,271,452 -o %s", 1, Arrays.asList("ef481be9962e21d09847b8a1d4a4ff65")); - executeTest("testFastaAlternateReferenceIndels", spec2); + executeTest("test indels", spec); + } - WalkerTestSpec spec3 = new WalkerTestSpec( + @Test + public void testSnps() { + + WalkerTestSpec spec = new WalkerTestSpec( "-T FastaAlternateReferenceMaker -R " + b36KGReference + " -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s", 1, Arrays.asList("8b6cd2e20c381f9819aab2d270f5e641")); - executeTest("testFastaAlternateReferenceSnps", spec3); + executeTest("test SNPs", spec); + } + + @Test + public void testBadIupacInput() { + + // cannot use 'expectedExceptions = UserException.BadInput.class' because it technically gets thrown as a RuntimeException by the engine + try { + WalkerTestSpec spec = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b36KGReference + " --useIUPAC -V " + GATKDataLocation + "dbsnp_129_b36.vcf -L 1:10,023,400-10,023,500 -L 1:10,029,200-10,029,500 -o %s", + 1, + Arrays.asList("FAILFAILFAILFAILFAILFAILFAILFAIL")); + executeTest("test bad input", spec); + } catch (Exception e) {} // do nothing + } + + @Test + public void testIupac() { + + WalkerTestSpec spec = new WalkerTestSpec( + "-T FastaAlternateReferenceMaker -R " + b37KGReference + " --useIUPAC -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf -L 20:61050-66380 -o %s", + 1, + Arrays.asList("5feb2a576ff2ed1745a007eaa36448b3")); + executeTest("test iupac", spec); } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java index 2838648d5..24e47879d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest.java @@ -94,6 +94,19 @@ public class HaplotypeCallerComplexAndSymbolicVariantsIntegrationTest extends Wa @Test public void testHaplotypeCallerMultiSampleGGAMultiAllelic() { HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:133041-133161 -L 20:300207-300337", - "f50e0b35e2240b19b1b8b6dfa0cf9796"); + "5ac3bfe1da1d411b52a98ef3debbd318"); } + + private void HCTestComplexConsensusMode(String bam, String args, String md5) { + final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, bam) + " --no_cmdline_in_header -o %s -consensus -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf -alleles " + validationDataLocation + "phase1.projectConsensus.chr20.raw.snps.vcf"; + final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5)); + executeTest("testHaplotypeCallerComplexConsensusMode: args=" + args, spec); + } + + @Test + public void testHaplotypeCallerMultiSampleConsensusModeComplex() { + HCTestComplexGGA(NA12878_CHR20_BAM, "-L 20:119673-119823 -L 20:121408-121538 -L 20:133041-133161 -L 20:300207-300337", + "61972c7c0d378e756f3b4d99aed9d0cf"); + } + } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java index 8ca67f31d..bde1e7ef5 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerGVCFIntegrationTest.java @@ -68,8 +68,8 @@ public class HaplotypeCallerGVCFIntegrationTest extends WalkerTest { // this functionality can be adapted to provide input data for whatever you might want in your data tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.NONE, PCRFreeIntervals, "50323a284788c8220c9226037c7003b5"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "7c16aa8e35de9f418533efac3bae6551"}); - tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "7e1e193d70187774f9740d475e0f1cc1"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, PCRFreeIntervals, "96fea2caf0a40df3feb268e8b14da670"}); + tests.add(new Object[]{NA12878_PCRFREE, HaplotypeCaller.ReferenceConfidenceMode.GVCF, PCRFreeIntervals, "19efc8020f31d1b68d80c50df0629e50"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.NONE, WExIntervals, "39bf5fe3911d0c646eefa8f79894f4df"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.BP_RESOLUTION, WExIntervals, "d926d653500a970280ad7828d9ee2b84"}); tests.add(new Object[]{NA12878_WEx, HaplotypeCaller.ReferenceConfidenceMode.GVCF, WExIntervals, "83ddc16e4f0900429b2da30e582994aa"}); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index 386fc3800..8db4d5066 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -227,7 +227,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("0998be22d7af4372247f5a0338f9446b")); + Arrays.asList("7c3254ead383e2b9a51b242f6de2a5b2")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -244,7 +244,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestDBSNPAnnotationWGSGraphBased() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller -likelihoodEngine GraphBased --disableDithering --pcr_indel_model NONE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_PCRFREE + " -o %s -L 20:10,000,000-10,100,000 -D " + b37dbSNP132, 1, - Arrays.asList("1aeed297a3cb41940d83eac499a2ce07")); + Arrays.asList("eda8f91091fe462205d687ec49fc61e7")); executeTest("HC calling with dbSNP ID annotation on WGS intervals", spec); } @@ -276,7 +276,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestAggressivePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model AGGRESSIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("f426f4c2986e1dea8f3f55951ef8e013")); + Arrays.asList("73c52372a1a80f052ea2b728ee17bf22")); executeTest("HC calling with aggressive indel error modeling on WGS intervals", spec); } @@ -284,7 +284,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { public void HCTestConservativePcrIndelModelWGS() { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( "-T HaplotypeCaller --disableDithering --pcr_indel_model CONSERVATIVE -R " + b37KGReference + " --no_cmdline_in_header -I " + NA12878_BAM + " -o %s -L 20:10,000,000-10,300,000", 1, - Arrays.asList("dcb38cb9280f2c3059a09d323db1c633")); + Arrays.asList("4e10d49b8af23d5ef3a28cb702d10a4b")); executeTest("HC calling with conservative indel error modeling on WGS intervals", spec); } @@ -298,4 +298,25 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest { spec.disableShadowBCF(); executeTest("testGraphBasedNoSuchEdgeBugFix", spec); } + + @Test + public void testLackSensitivityDueToBadHaplotypeSelectionFix() { + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", + b37KGReferenceWithDecoy, privateTestDir + "hc-lack-sensitivity.bam", privateTestDir + "hc-lack-sensitivity.interval_list", + HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("e2e6647f7c96e91aeead7301017dc800")); + spec.disableShadowBCF(); + executeTest("testLackSensitivityDueToBadHaplotypeSelectionFix", spec); + } + + @Test + public void testBadLikelihoodsDueToBadHaplotypeSelectionFix() { + final String commandLine = String.format("-T HaplotypeCaller -R %s -I %s -L %s --no_cmdline_in_header ", + hg19RefereneWithChrPrefixInChromosomeNames, privateTestDir + "bad-likelihoods.bam", privateTestDir + "bad-likelihoods.interval_list", + HaplotypeCaller.OPTIMAL_GVCF_INDEX_TYPE, HaplotypeCaller.OPTIMAL_GVCF_INDEX_PARAMETER); + final WalkerTestSpec spec = new WalkerTestSpec(commandLine + " -o %s", Arrays.asList("cbda30145523bf05e0413157f1a00b3e")); + spec.disableShadowBCF(); + executeTest("testBadLikelihoodsDueToBadHaplotypeSelectionFix", spec); + } + } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java index 23513f314..b801f05a9 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerParallelIntegrationTest.java @@ -47,7 +47,6 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller; import org.broadinstitute.sting.WalkerTest; -import org.broadinstitute.sting.utils.haplotypeBAMWriter.HaplotypeBAMWriter; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -61,7 +60,7 @@ public class HaplotypeCallerParallelIntegrationTest extends WalkerTest { List tests = new ArrayList<>(); for ( final int nct : Arrays.asList(1, 2, 4) ) { - tests.add(new Object[]{nct, "1f463bf3a06c401006858bc446ecea54"}); + tests.add(new Object[]{nct, "fd9324a574f9204f7308fc1af422fdcc"}); } return tests.toArray(new Object[][]{}); diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java index 0ddf7544d..da1474db1 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixMergerUnitTest.java @@ -52,10 +52,11 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; +import java.io.IOException; import java.util.*; public class CommonSuffixMergerUnitTest extends BaseTest { - private final static boolean PRINT_GRAPHS = true; + private final static boolean PRINT_GRAPHS = false; @DataProvider(name = "CompleteCycleData") public Object[][] makeCompleteCycleData() { @@ -134,11 +135,35 @@ public class CommonSuffixMergerUnitTest extends BaseTest { return toUse.toArray(new Object[][]{}); } + /** + * Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score. + */ + private static final Comparator KBESTHAPLOTYPE_COMPARATOR = new Comparator() { + + /** + * Compares KBestHaplotype solutions, first by the haplotype base sequence and the by their score. + * + * @return {@inheritDoc} + */ + @Override + public int compare(final KBestHaplotype o1,final KBestHaplotype o2) { + final int baseCmp = o1.haplotype().getBaseString().compareTo(o2.haplotype().getBaseString()); + if (baseCmp != 0) + return baseCmp; + return - Double.compare(o1.score(),o2.score()); + } + }; + + public static void assertSameHaplotypes(final String name, final SeqGraph actual, final SeqGraph original) { + final KBestHaplotypeFinder originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks()); + final KBestHaplotypeFinder actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks()); + final List sortedOriginalKBestHaplotypes = new ArrayList<>(originalKBestHaplotypes); + Collections.sort(sortedOriginalKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR); + final List sortedActualKBestHaplotypes = new ArrayList<>(actualKBestHaplotypes); + Collections.sort(sortedActualKBestHaplotypes, KBESTHAPLOTYPE_COMPARATOR); try { final Set haplotypes = new HashSet(); - final List originalKBestHaplotypes = new KBestHaplotypeFinder(original,original.getSources(),original.getSinks()); - final List actualKBestHaplotypes = new KBestHaplotypeFinder(actual,actual.getSources(),actual.getSinks()); for (final KBestHaplotype kbh : originalKBestHaplotypes) haplotypes.add(new String(kbh.bases())); @@ -148,14 +173,16 @@ public class CommonSuffixMergerUnitTest extends BaseTest { Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h); } - if ( actualKBestHaplotypes.size() == originalKBestHaplotypes.size() ) { - for ( int i = 0; i < originalKBestHaplotypes.size(); i++ ) { - Assert.assertTrue(actualKBestHaplotypes.get(i).haplotype().getBaseString().equals(originalKBestHaplotypes.get(i).haplotype().getBaseString()), "Paths not equal " + actualKBestHaplotypes.get(i).haplotype() + " vs. original " + originalKBestHaplotypes.get(i).haplotype()); - } - } + Assert.assertEquals(sortedActualKBestHaplotypes,sortedOriginalKBestHaplotypes); } catch ( AssertionError e ) { if ( PRINT_GRAPHS ) original.printGraph(new File(String.format("%s.original.dot", name, actual.vertexSet().size())), 0); if ( PRINT_GRAPHS ) actual.printGraph(new File(String.format("%s.actual.dot", name, actual.vertexSet().size())), 0); + try { + if ( PRINT_GRAPHS ) originalKBestHaplotypes.printDOTFile(String.format("%s.original.finder.dot",name)); + if ( PRINT_GRAPHS ) actualKBestHaplotypes.printDOTFile(String.format("%s.actual.finder.dot",name)); + } catch (IOException e2) { + // do nothing. + } throw e; } } diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java index 6dc3d5d67..26c511b6e 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinderUnitTest.java @@ -65,7 +65,6 @@ import java.util.*; * User: rpoplin * Date: 1/31/13 */ - public class KBestHaplotypeFinderUnitTest extends BaseTest { @DataProvider(name = "BasicPathFindingData") @@ -113,11 +112,11 @@ public class KBestHaplotypeFinderUnitTest extends BaseTest { final int expectedNumOfPaths = nStartNodes * nBranchesPerBubble * nEndNodes; Assert.assertEquals(paths.size(), expectedNumOfPaths, "Didn't find the expected number of paths"); - int lastScore = Integer.MAX_VALUE; + double lastScore = 0; for ( final KBestHaplotype kbh : paths ) { final Path path = kbh.path(); - Assert.assertTrue(path.getScore() <= lastScore, "Paths out of order. Path " + path + " has score above previous " + lastScore); - lastScore = path.getScore(); + Assert.assertTrue(kbh.score() <= lastScore, "Paths out of order. Path " + path + " has score " + path.getScore() + " above previous " + lastScore); + lastScore = kbh.score(); } // get the best path, and make sure it's the same as our optimal path overall diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java index 2f44129d8..eb0432769 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitterUnitTest.java @@ -47,6 +47,7 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.collections.Pair; import org.testng.Assert; @@ -226,28 +227,34 @@ public class SharedVertexSequenceSplitterUnitTest extends BaseTest { } final Set haplotypes = new HashSet<>(); - final List originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks()); + final KBestHaplotypeFinder originalPaths = new KBestHaplotypeFinder((SeqGraph) graph.clone(),graph.getSources(),graph.getSinks()); for ( final KBestHaplotype path : originalPaths ) haplotypes.add(new String(path.bases())); final SharedVertexSequenceSplitter splitter = new SharedVertexSequenceSplitter(graph, v); splitter.split(); - if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".original.dot"), 0); - if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + ".split.dot"), 0); + if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".original.dot"), 0); + if ( PRINT_GRAPHS ) splitter.splitGraph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".split.dot"), 0); splitter.updateGraph(top, bot); - if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + ".updated.dot"), 0); + if ( PRINT_GRAPHS ) graph.printGraph(new File(Utils.join("_", strings) + "_" + hasTop + "_" + hasBot + ".updated.dot"), 0); - final List splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks()); + final KBestHaplotypeFinder splitPaths = new KBestHaplotypeFinder(graph,graph.getSources(),graph.getSinks()); for ( final KBestHaplotype path : splitPaths ) { final String h = new String(path.bases()); Assert.assertTrue(haplotypes.contains(h), "Failed to find haplotype " + h); } - if ( splitPaths.size() == originalPaths.size() ) { - for ( int i = 0; i < originalPaths.size(); i++ ) { - Assert.assertTrue(splitPaths.get(i).path().equalScoreAndSequence(originalPaths.get(i).path()), "Paths not equal " + splitPaths.get(i) + " vs. original " + originalPaths.get(i)); - } - } + + final List sortedOriginalPaths = new ArrayList<>(originalPaths.size()); + for (final KBestHaplotype kbh : originalPaths.unique()) + sortedOriginalPaths.add(kbh.bases()); + Collections.sort(sortedOriginalPaths, BaseUtils.BASES_COMPARATOR); + final List sortedSplitPaths = new ArrayList<>(splitPaths.size()); + for (final KBestHaplotype kbh : splitPaths.unique()) + sortedSplitPaths.add(kbh.bases()); + Collections.sort(sortedSplitPaths, BaseUtils.BASES_COMPARATOR); + + Assert.assertEquals(sortedSplitPaths,sortedOriginalPaths,Utils.join("_", strings) + "_" + hasTop + "_" + hasBot); } @DataProvider(name = "MeetsMinSequenceData") diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java index 275ababda..185082c2d 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerIntegrationTest.java @@ -60,8 +60,8 @@ public class IndelRealignerIntegrationTest extends WalkerTest { private static final String knownIndels = validationDataLocation + "indelRealignerTest.pilot1.ceu.vcf"; private static final String baseCommandPrefix = "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + mainTestBam + " -targetIntervals " + mainTestIntervals + " -compress 0 -L 20:49,500-55,500 "; private static final String baseCommand = baseCommandPrefix + "-o %s "; - private static final String base_md5 = "a102dd55451799e5f053c784b762087e"; - private static final String base_md5_with_SW_or_VCF = "06b8eefcbd785e929027feaa22bb060d"; + private static final String base_md5 = "458588d68c8ea7e54443ea722604b265"; + private static final String base_md5_with_SW_or_VCF = "d5ed91bd5b2023c69078a0fc00268d3c"; @Test public void testDefaults() { @@ -84,7 +84,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec1 = new WalkerTestSpec( baseCommand + "--consensusDeterminationModel KNOWNS_ONLY -known " + knownIndels, 1, - Arrays.asList("1b24b0f2a20aed1adc726d1b296a3192")); + Arrays.asList("a1b9396f4d5b65f7ae6e0062daf363a3")); executeTest("realigner known indels only from VCF", spec1); } @@ -101,7 +101,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { public void testLods() { HashMap e = new HashMap(); e.put( "-LOD 60", base_md5 ); - e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "4bf28d3c0337682d439257874377a681" ); + e.put( "-LOD 1 --consensusDeterminationModel USE_SW", "dea9bd14323b33348d9cf28e256415f2" ); for ( Map.Entry entry : e.entrySet() ) { WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec( @@ -117,7 +117,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T IndelRealigner -noPG -R " + b36KGReference + " -I " + validationDataLocation + "NA12878.chrom1.SLX.SRP000032.2009_06.bam -L 1:10,000,000-11,000,000 -targetIntervals " + validationDataLocation + "indelRealignerTest.NA12878.chrom1.intervals -compress 0 -o %s", 1, - Arrays.asList("f4f6c3b2a2be0306a0ecd3def334bafe")); + Arrays.asList("b91c0bf803247f703dc1cb6ccdc4f18f")); executeTest("realigner long run", spec); } @@ -126,7 +126,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseCommand + "--noOriginalAlignmentTags --consensusDeterminationModel USE_SW", 1, - Arrays.asList("71fb521f8febfe2dc683fc636e28ae7d")); + Arrays.asList("041e2254f271261fb46dc3878cf638f6")); executeTest("realigner no output tags", spec); } @@ -148,7 +148,7 @@ public class IndelRealignerIntegrationTest extends WalkerTest { @Test public void testMaxReadsInMemory() { HashMap e = new HashMap(); - e.put( "--maxReadsInMemory 10000", base_md5 ); + e.put( "--maxReadsInMemory 10000", "0108cd5950f1a4eb90209c3dca8f9e11" ); e.put( "--maxReadsInMemory 40000", base_md5 ); for ( Map.Entry entry : e.entrySet() ) { diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java index 03d136290..1a4d0c1f4 100644 --- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java +++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFsIntegrationTest.java @@ -165,4 +165,12 @@ public class CombineGVCFsIntegrationTest extends WalkerTest { spec.disableShadowBCF(); executeTest("testMD5s", spec); } + + @Test + public void testBasepairResolution() throws Exception { + final String cmd = baseTestString(" -L 1:69485-69791 --convertToBasePairResolution"); + final WalkerTestSpec spec = new WalkerTestSpec(cmd, 1, Arrays.asList("a068fb2c35cdd14df1e8f1f92f4114b4")); + spec.disableShadowBCF(); + executeTest("testBasepairResolution", spec); + } } diff --git a/protected/pom.xml b/protected/pom.xml index 8a9646438..434ea568e 100644 --- a/protected/pom.xml +++ b/protected/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-root - 3.0 + 3.1 ../public/sting-root diff --git a/public/VectorPairHMM/README.md b/public/VectorPairHMM/README.md new file mode 100644 index 000000000..85cc0a04a --- /dev/null +++ b/public/VectorPairHMM/README.md @@ -0,0 +1,71 @@ +Implementation overview: +Created a new Java class called VectorLoglessPairHMM which extends LoglessPairHMM and +overrides functions from both LoglessPairHMM and PairHMM. +1. Constructor: Call base class constructors. Then, load the native library located in this +directory and call an init function (with suffix 'jniInitializeClassFieldsAndMachineMask') in the +library to determine fields ids for the members of classes JNIReadDataHolder and +JNIHaplotypeDataHolders. The native code stores the field ids (struct offsets) for the classes and +re-uses them for subsequent computations. Optionally, the user can disable the vector +implementation, by using the 'mask' argument (see comments for a more detailed explanation). +2. When the library is loaded, it invokes the constructor of the class LoadTimeInitializer (because +a global variable g_load_time_initializer is declared in the library). This constructor +(LoadTimeInitializer.cc) can be used to perform various initializations. Currently, it initializes +two global function pointers to point to the function implementation that is supported on the +machine (AVX/SSE/un-vectorized) on which the program is being run. The two pointers are for float +and double respectively. The global function pointers are declared in utils.cc and are assigned in +the function initialize_function_pointers() defined in utils.cc and invoked from the constructor of +LoadTimeInitializer. +Other initializations in LoadTimeInitializer: +* ConvertChar::init - sets some masks for the vector implementation +* FTZ for performance +* stat counters = 0 +* debug structs (which are never used in non-debug mode) +This initialization is done only once for the whole program. +3. initialize(): To initialize the region for PairHMM. Pass haplotype bases to native code through +the JNIHaplotypeDataHolder class. Since the haplotype list is common across multiple samples in +computeReadLikelihoods(), we can pass the haplotype bases to the native code once and re-use across +multiple samples. +4. computeLikelihoods(): Copies array references for readBases/quals etc to array of +JNIReadDataHolder objects. Invokes the JNI function to perform the computation and updates the +likelihoodMap. +The JNI function copies the byte array references into an array of testcase structs and invokes the +compute_full_prob function through the function pointers initialized earlier. +The primary native function called is +Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods. It uses +standard JNI calls to get and return data from/to the Java class VectorLoglessPairHMM. The last +argument to the function is the maximum number of OpenMP threads to use while computing PairHMM in +C++. This option is set when the native function call is made from JNILoglessPairHMM +computeLikelihoods - currently it is set to 12 (no logical reason). +Note: OpenMP has been disabled for now - insufficient #testcases per call to computeLikelihoods() to +justify multi-threading. +5. finalizeRegion(): Releases the haplotype arrays initialized in step 3 - should be called at the +end of every region (line 351 in PairHMMLikelihoodCalculationEngine). + +Note: Debug code has been moved to a separate class DebugJNILoglessPairHMM.java. + +Compiling: +Make sure you have icc (Intel C compiler) available. Currently, gcc does not seem to support all AVX +intrinsics. +This native library is called libVectorLoglessPairHMM.so +Using Maven: +Type 'mvn install' in this directory - this will build the library (by invoking 'make') and copy the +native library to the directory +${sting-utils.basedir}/src/main/resources/org/broadinstitute/sting/utils/pairhmm +The GATK maven build process (when run) will bundle the library into the StingUtils jar file from +the copied directory. +Simple build: +cd src/main/c++ +make + +Running: +The default implementation of PairHMM is now VECTOR_LOGLESS_CACHING in HaplotypeCaller.java. To use +the Java version, use the command line argument "--pair_hmm_implementation LOGLESS_CACHING". (see +run.sh in src/main/c++). +The native library is bundled with the StingUtils jar file. When HaplotypeCaller is invoked, then +the library is unpacked from the jar file, copied to the /tmp directory (with a unique id) and +loaded by the Java class VectorLoglessPairHMM in the constructor (if it has not been loaded +already). +The default library can be overridden by using the -Djava.library.path argument (see +src/main/c++/run.sh for an example) for the JVM to pass the path to the library. If the library +libVectorLoglessPairHMM.so can be found in java.library.path, then it is loaded and the 'packed' +library is not used. diff --git a/public/VectorPairHMM/pom.xml b/public/VectorPairHMM/pom.xml new file mode 100644 index 000000000..41bb73211 --- /dev/null +++ b/public/VectorPairHMM/pom.xml @@ -0,0 +1,119 @@ + + + 4.0.0 + + + org.broadinstitute.sting + sting-root + 2.8-SNAPSHOT + ../../public/sting-root + + + VectorPairHMM + pom + Vectorized PairHMM native libraries + + Builds a GNU/Linux x86_64 library of VectorPairHMM using icc (Intel C++ compiler). During install, copies it into sting-utils. Neither tested nor expected to work on any other platform. + + + UTF-8 + ${sourceEncoding} + ${sourceEncoding} + ${project.basedir}/../.. + ${sting.basedir}/public/sting-utils + + ${sting-utils.basedir}/src/main/resources/org/broadinstitute/sting/utils/pairhmm + + + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + + display-info + + validate + + + + + + + org.codehaus.mojo + exec-maven-plugin + + + + exec + + compile + + make + src/main/c++ + + ${java.home} + ${project.build.directory} + + + + + + + + + org.apache.maven.plugins + maven-install-plugin + + true + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + default-install + + copy-resources + + install + + ${pairhmm.resources.directory} + + + ${project.build.directory} + + **/* + + + + + + + + + + + com.google.code.sortpom + maven-sortpom-plugin + + false + custom_1 + \n + ${sourceEncoding} + true + scope + 4 + false + + + + + diff --git a/public/VectorPairHMM/src/main/c++/.gitignore b/public/VectorPairHMM/src/main/c++/.gitignore new file mode 100644 index 000000000..d791ffd80 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/.gitignore @@ -0,0 +1,16 @@ +.svn +*.o +*.so +tests +.deps +hmm_Mohammad +pairhmm-template-main +*.swp +*.class +checker +reformat +subdir_checkout.sh +avx/ +sse/ +triplicate.sh + diff --git a/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc new file mode 100644 index 000000000..0e3026f65 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/LoadTimeInitializer.cc @@ -0,0 +1,206 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "LoadTimeInitializer.h" +#include "utils.h" +using namespace std; +char* LoadTimeInitializerStatsNames[] = +{ + "num_regions", + "num_reads", + "num_haplotypes", + "num_testcases", + "num_double_invocations", + "haplotype_length", + "readlength", + "product_read_length_haplotype_length", + "dummy" +}; + +LoadTimeInitializer g_load_time_initializer; + +LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded +{ + ConvertChar::init(); +#ifndef DISABLE_FTZ + //Very important to get good performance on Intel processors + //Function: enabling FTZ converts denormals to 0 in hardware + //Denormals cause microcode to insert uops into the core causing big slowdown + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + cout << "FTZ enabled - may decrease accuracy if denormal numbers encountered\n"; +#else + cout << "FTZ is not set - may slow down performance if denormal numbers encountered\n"; +#endif + //Profiling: times for compute and transfer (either bytes copied or pointers copied) + m_compute_time = 0; + m_data_transfer_time = 0; + m_bytes_copied = 0; + + //Initialize profiling counters + for(unsigned i=0;i::initializeStaticMembers(); + Context::initializeStaticMembers(); + + cout.flush(); +} + +void LoadTimeInitializer::print_profiling() +{ + double mean = 0; + double variance = 0; + uint64_t denominator = 1; + cout << "Time spent in compute_testcases "< C++) "<::iterator mI = m_filename_to_fptr.find(filename); + ofstream* fptr = 0; + if(mI == m_filename_to_fptr.end()) + { + m_filename_to_fptr[filename] = new ofstream(); + fptr = m_filename_to_fptr[filename]; + //File never seen before + if(m_written_files_set.find(filename) == m_written_files_set.end()) + { + to_append = false; + m_written_files_set.insert(filename); + } + fptr->open(filename.c_str(), to_append ? ios::app : ios::out); + assert(fptr->is_open()); + } + else + fptr = (*mI).second; + //ofstream fptr; + //fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out); + (*fptr) << s; + if(add_newline) + (*fptr) << "\n"; + //fptr.close(); +} +void LoadTimeInitializer::debug_close() +{ + for(map::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end(); + mB != mE;mB++) + { + (*mB).second->close(); + delete (*mB).second; + } + m_filename_to_fptr.clear(); +} + +void LoadTimeInitializer::dump_sandbox(testcase& tc, unsigned tc_idx, unsigned numReads, unsigned numHaplotypes) +{ + unsigned haplotypeLength = tc.haplen; + unsigned readLength = tc.rslen; + ofstream& dumpFptr = m_sandbox_fptr; + for(unsigned k=0;k +#include "template.h" + +enum LoadTimeInitializerStatsEnum +{ + NUM_REGIONS_IDX=0, + NUM_READS_IDX, + NUM_HAPLOTYPES_IDX, + NUM_TESTCASES_IDX, + NUM_DOUBLE_INVOCATIONS_IDX, + HAPLOTYPE_LENGTH_IDX, + READ_LENGTH_IDX, + PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, + TOTAL_NUMBER_STATS +}; +extern char* LoadTimeInitializerStatsNames[]; + +class LoadTimeInitializer +{ + public: + LoadTimeInitializer(); //will be called when library is loaded + ~LoadTimeInitializer() + { + delete[] m_buffer; + } + void print_profiling(); + void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); + void debug_close(); + + void dump_sandbox(testcase& tc, unsigned tc_idx, unsigned numReads, unsigned numHaplotypes); + void open_sandbox() { m_sandbox_fptr.open("sandbox.txt", std::ios::app); } + void close_sandbox() { m_sandbox_fptr.close(); } + + jfieldID m_readBasesFID; + jfieldID m_readQualsFID; + jfieldID m_insertionGOPFID; + jfieldID m_deletionGOPFID; + jfieldID m_overallGCPFID; + jfieldID m_haplotypeBasesFID; + //profiling - update stats + void update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value); + //timing in nanoseconds + uint64_t m_compute_time; + uint64_t m_data_transfer_time; + //bytes copied + uint64_t m_bytes_copied; + unsigned get_buffer_size() { return m_buffer_size; } + char* get_buffer() { return (char*)m_buffer; } + private: + std::map m_filename_to_fptr; + std::set m_written_files_set; + std::ofstream m_sandbox_fptr; + //used to compute various stats + uint64_t m_sum_stats[TOTAL_NUMBER_STATS]; + double m_sum_square_stats[TOTAL_NUMBER_STATS]; + uint64_t m_min_stats[TOTAL_NUMBER_STATS]; + uint64_t m_max_stats[TOTAL_NUMBER_STATS]; + unsigned m_buffer_size; + uint64_t* m_buffer; +}; +extern LoadTimeInitializer g_load_time_initializer; + +#define SIZE_PER_TESTCASE 6*10000 +#define SIZE_PER_BUFFER 10000 + +#endif diff --git a/public/VectorPairHMM/src/main/c++/Makefile b/public/VectorPairHMM/src/main/c++/Makefile new file mode 100644 index 000000000..354bca0bb --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/Makefile @@ -0,0 +1,114 @@ +#Copyright (c) 2012 The Broad Institute + +#Permission is hereby granted, free of charge, to any person +#obtaining a copy of this software and associated documentation +#files (the "Software"), to deal in the Software without +#restriction, including without limitation the rights to use, +#copy, modify, merge, publish, distribute, sublicense, and/or sell +#copies of the Software, and to permit persons to whom the +#Software is furnished to do so, subject to the following +#conditions: + +#The above copyright notice and this permission notice shall be +#included in all copies or substantial portions of the Software. + +#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +#EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +#OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +#NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +#WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +#FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +#THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# + + +#OMPCFLAGS=-fopenmp +#OMPLFLAGS=-fopenmp #-openmp-link static + +#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas +#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas + +JRE_HOME?=/opt/jdk1.7.0_25/jre +JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JRE_HOME}/../include -I${JRE_HOME}/../include/linux + +COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas +CC=icc +CXX=icc + +LDFLAGS=-lm -lrt $(OMPLDFLAGS) +ifdef DISABLE_FTZ + COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz +endif + +PAPI_DIR=/home/karthikg/softwares/papi-5.3.0 +ifdef USE_PAPI + ifeq ($(USE_PAPI),1) + COMMON_COMPILATION_FLAGS+=-I$(PAPI_DIR)/include -DUSE_PAPI + LDFLAGS+=-L$(PAPI_DIR)/lib -lpapi + endif +endif + +ifdef DISABLE_FTZ + COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz +endif + +BIN=libVectorLoglessPairHMM.so pairhmm-template-main checker +#BIN=checker + +DEPDIR=.deps +DF=$(DEPDIR)/$(*).d + +#Common across libJNI and sandbox +COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc LoadTimeInitializer.cc +#Part of libJNI +LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc Sandbox.cc $(COMMON_SOURCES) +SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc +LIBOBJECTS=$(LIBSOURCES:.cc=.o) +COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o) + + +#No vectorization for these files +NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc LoadTimeInitializer.cc Sandbox.cc +#Use -xAVX for these files +AVX_SOURCES=avx_function_instantiations.cc +#Use -xSSE4.2 for these files +SSE_SOURCES=sse_function_instantiations.cc + +NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o) +AVX_OBJECTS=$(AVX_SOURCES:.cc=.o) +SSE_OBJECTS=$(SSE_SOURCES:.cc=.o) +$(NO_VECTOR_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) +$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX +$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2 +OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS) + +all: $(BIN) Sandbox.class copied_lib + +-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d)) + +checker: pairhmm-1-base.o $(COMMON_OBJECTS) + $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) + +pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS) + $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) + +libVectorLoglessPairHMM.so: $(LIBOBJECTS) + $(CXX) $(OMPLFLAGS) -shared -static-intel -o $@ $(LIBOBJECTS) ${LDFLAGS} + + +$(OBJECTS): %.o: %.cc + @mkdir -p $(DEPDIR) + $(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $< + +Sandbox.class: Sandbox.java + javac Sandbox.java + +copied_lib: libVectorLoglessPairHMM.so +ifdef OUTPUT_DIR + mkdir -p $(OUTPUT_DIR) + rsync -a libVectorLoglessPairHMM.so $(OUTPUT_DIR)/ +endif + +clean: + rm -rf $(BIN) *.o $(DEPDIR) *.class diff --git a/public/VectorPairHMM/src/main/c++/Sandbox.cc b/public/VectorPairHMM/src/main/c++/Sandbox.cc new file mode 100644 index 000000000..985b19ae9 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/Sandbox.cc @@ -0,0 +1,106 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "Sandbox.h" +#include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h" +#include "utils.h" +#include "jni_common.h" +/* + * Class: Sandbox + * Method: jniGetMachineType + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_Sandbox_jniGetMachineType + (JNIEnv * env, jobject thisObj) +{ + return 0; +} + +/* + * Class: Sandbox + * Method: jniInitializeClassFieldsAndMachineMask + * Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniInitializeClassFieldsAndMachineMask + (JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask) +{ + Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask(env, thisObject, readDataHolderClass, + haplotypeDataHolderClass, mask); +} + +/* + * Class: Sandbox + * Method: jniInitializeHaplotypes + * Signature: (I[LSandbox/JNIHaplotypeDataHolderClass;)V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniInitializeHaplotypes + (JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray) +{ + Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray); +} + +/* + * Class: Sandbox + * Method: jniFinalizeRegion + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniFinalizeRegion + (JNIEnv * env, jobject thisObject) +{ + Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniFinalizeRegion(env, thisObject); +} + + +/* + * Class: Sandbox + * Method: jniComputeLikelihoods + * Signature: (II[LSandbox/JNIReadDataHolderClass;[LSandbox/JNIHaplotypeDataHolderClass;[DI)V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniComputeLikelihoods + (JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes, + jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse) +{ + Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods(env, thisObject, + numReads, numHaplotypes, readDataArray, haplotypeDataArray, likelihoodArray, maxNumThreadsToUse); +} +/* + * Class: Sandbox + * Method: jniClose + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniClose + (JNIEnv* env, jobject thisObject) +{ Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniClose(env, thisObject); } + +JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative + (JNIEnv* env, jobject thisObject, jstring fileNameString) +{ + const char* fileName = env->GetStringUTFChars(fileNameString, 0); + char local_array[800]; + strncpy(local_array, fileName, 200); + env->ReleaseStringUTFChars(fileNameString, fileName); + do_compute(local_array, true, 10000, false); +} + diff --git a/public/VectorPairHMM/src/main/c++/Sandbox.h b/public/VectorPairHMM/src/main/c++/Sandbox.h new file mode 100644 index 000000000..486a1c095 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/Sandbox.h @@ -0,0 +1,96 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class Sandbox */ + +#ifndef _Included_Sandbox +#define _Included_Sandbox +#ifdef __cplusplus +extern "C" { +#endif +#undef Sandbox_enableAll +#define Sandbox_enableAll -1LL +/* + * Class: Sandbox + * Method: jniGetMachineType + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_Sandbox_jniGetMachineType + (JNIEnv *, jobject); + +/* + * Class: Sandbox + * Method: jniInitializeClassFieldsAndMachineMask + * Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniInitializeClassFieldsAndMachineMask + (JNIEnv *, jobject, jclass, jclass, jlong); + +/* + * Class: Sandbox + * Method: jniInitializeHaplotypes + * Signature: (I[LSandbox/JNIHaplotypeDataHolderClass;)V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniInitializeHaplotypes + (JNIEnv *, jobject, jint, jobjectArray); + +/* + * Class: Sandbox + * Method: jniFinalizeRegion + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniFinalizeRegion + (JNIEnv *, jobject); + +/* + * Class: Sandbox + * Method: jniComputeLikelihoods + * Signature: (II[LSandbox/JNIReadDataHolderClass;[LSandbox/JNIHaplotypeDataHolderClass;[DI)V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniComputeLikelihoods + (JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint); + +/* + * Class: Sandbox + * Method: jniClose + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_Sandbox_jniClose + (JNIEnv *, jobject); + +/* + * Class: Sandbox + * Method: doEverythingNative + * Signature: ([B)V + */ +JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative + (JNIEnv *, jobject, jstring); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/VectorPairHMM/src/main/c++/Sandbox.java b/public/VectorPairHMM/src/main/c++/Sandbox.java new file mode 100644 index 000000000..698c1b48c --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/Sandbox.java @@ -0,0 +1,306 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.sting.utils.vectorpairhmm; + +import java.util.List; +import java.util.LinkedList; +import java.util.Map; +import java.util.HashMap; +import java.io.File; +import java.util.Scanner; +import java.io.IOException; +import java.io.FileNotFoundException; +import java.io.InputStreamReader; + +public class Sandbox { + + private long setupTime = 0; + private long computeTime = 0; + //Used to copy references to byteArrays to JNI from reads + protected class JNIReadDataHolderClass { + public byte[] readBases = null; + public byte[] readQuals = null; + public byte[] insertionGOP = null; + public byte[] deletionGOP = null; + public byte[] overallGCP = null; + } + + //Used to copy references to byteArrays to JNI from haplotypes + protected class JNIHaplotypeDataHolderClass { + public byte[] haplotypeBases = null; + } + + /** + * Return 64-bit mask representing machine capabilities + * Bit 0 is LSB, bit 63 MSB + * Bit 0 represents sse4.2 availability + * Bit 1 represents AVX availability + */ + public native long jniGetMachineType(); + public static final long enableAll = 0xFFFFFFFFFFFFFFFFl; + + + /** + * Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM. + * C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not + * change per JVM session + * @param readDataHolderClass class type of JNIReadDataHolderClass + * @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass + * @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask + * */ + private native void jniInitializeClassFieldsAndMachineMask(Class readDataHolderClass, Class haplotypeDataHolderClass, long mask); + + private static Boolean isVectorLoglessPairHMMLibraryLoaded = false; + //The constructor is called only once inside PairHMMLikelihoodCalculationEngine + public Sandbox() { + synchronized(isVectorLoglessPairHMMLibraryLoaded) { + //Load the library and initialize the FieldIDs + if(!isVectorLoglessPairHMMLibraryLoaded) { + System.loadLibrary("VectorLoglessPairHMM"); + isVectorLoglessPairHMMLibraryLoaded = true; + jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once + } + } + } + + private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray); + private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray = null; + + //Used to transfer data to JNI + //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region + public void initialize(final List haplotypes) { + int numHaplotypes = haplotypes.size(); + mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes]; + int idx = 0; + for(final JNIHaplotypeDataHolderClass currHaplotype : haplotypes) + { + mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass(); + mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.haplotypeBases; + ++idx; + } + jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray); + } + /** + * Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not + * accessing Java memory directly, still important to release memory from C++ + */ + private native void jniFinalizeRegion(); + + + public void finalizeRegion() + { + jniFinalizeRegion(); + } + + /** + * Real compute kernel + */ + private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray, + JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse); + + public void computeLikelihoods(final List reads, final List haplotypes) { + //System.out.println("Region : "+reads.size()+" x "+haplotypes.size()); + long startTime = System.nanoTime(); + int readListSize = reads.size(); + int numHaplotypes = haplotypes.size(); + int numTestcases = readListSize*numHaplotypes; + JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize]; + int idx = 0; + for(JNIReadDataHolderClass read : reads) + { + readDataArray[idx] = new JNIReadDataHolderClass(); + readDataArray[idx].readBases = read.readBases; + readDataArray[idx].readQuals = read.readQuals; + readDataArray[idx].insertionGOP = read.insertionGOP; + readDataArray[idx].deletionGOP = read.deletionGOP; + readDataArray[idx].overallGCP = read.overallGCP; + ++idx; + } + + double[] mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results + setupTime += (System.nanoTime() - startTime); + //for(reads) + // for(haplotypes) + // compute_full_prob() + jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12); + + computeTime += (System.nanoTime() - startTime); + } + + /** + * Print final profiling information from native code + */ + public native void jniClose(); + public void close() + { + System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9)+" compute time : "+(computeTime*1e-9)); + jniClose(); + } + + public void parseSandboxFile(String filename) + { + File file = new File(filename); + Scanner input = null; + try + { + input = new Scanner(file); + } + catch(FileNotFoundException e) + { + System.err.println("File "+filename+" cannot be found/read"); + return; + } + int idx = 0; + int numReads = 0; + int numHaplotypes = 0; + int readIdx = 0, testCaseIdx = 0, haplotypeIdx = 0; + LinkedList haplotypeList = new LinkedList(); + LinkedList readList = new LinkedList(); + + byte[][] byteArray = new byte[6][]; + boolean firstLine = true; + String[] currTokens = new String[8]; + while(input.hasNextLine()) + { + String line = input.nextLine(); + Scanner lineScanner = new Scanner(line); + idx = 0; + while(lineScanner.hasNext()) + currTokens[idx++] = lineScanner.next(); + if(idx == 0) + break; + assert(idx >= 6); + //start of new region + if(idx == 8) + { + if(!firstLine) + { + initialize(haplotypeList); + computeLikelihoods(readList, haplotypeList); + finalizeRegion(); + } + try + { + numReads = Integer.parseInt(currTokens[6]); + } + catch(NumberFormatException e) + { + numReads = 1; + } + try + { + numHaplotypes = Integer.parseInt(currTokens[7]); + } + catch(NumberFormatException e) + { + numHaplotypes = 1; + } + haplotypeIdx = readIdx = testCaseIdx = 0; + readList.clear(); + haplotypeList.clear(); + } + if(haplotypeIdx < numHaplotypes) + { + JNIHaplotypeDataHolderClass X = new JNIHaplotypeDataHolderClass(); + X.haplotypeBases = currTokens[0].getBytes(); + haplotypeList.add(X); + } + if(testCaseIdx%numHaplotypes == 0) + { + JNIReadDataHolderClass X = new JNIReadDataHolderClass(); + X.readBases = currTokens[1].getBytes(); + for(int i=2;i<6;++i) + { + byteArray[i] = currTokens[i].getBytes(); + for(int j=0;j 0 && readList.size() > 0) + { + initialize(haplotypeList); + computeLikelihoods(readList, haplotypeList); + finalizeRegion(); + } + + close(); + input.close(); + } + + private native void doEverythingNative(String filename); + + public static void main(String[] args) + { + if(args.length <= 0) + { + System.err.println("Needs 1 argument - "); + System.exit(-1); + } + //// Get runtime + //java.lang.Runtime rt = java.lang.Runtime.getRuntime(); + //// Start a new process: UNIX command ls + //String cmd = "/home/karthikg/broad/gsa-unstable/public/c++/VectorPairHMM/checker "+args[0]; + //try + //{ + //System.out.println(cmd); + //java.lang.Process p = rt.exec(cmd); + //try + //{ + //p.waitFor(); + //java.io.InputStream is = p.getInputStream(); + //java.io.BufferedReader reader = new java.io.BufferedReader(new InputStreamReader(is)); + //// And print each line + //String s = null; + //while ((s = reader.readLine()) != null) { + //System.out.println(s); + //} + //is.close(); + //} + //catch(InterruptedException e) + //{ + //System.err.println(e); + //} + //} + //catch(IOException e) + //{ + //System.err.println(e); + //} + Sandbox t = new Sandbox(); + //t.doEverythingNative(args[0]); + t.parseSandboxFile(args[0]); + } +} diff --git a/public/VectorPairHMM/src/main/c++/Sandbox_JNIHaplotypeDataHolderClass.h b/public/VectorPairHMM/src/main/c++/Sandbox_JNIHaplotypeDataHolderClass.h new file mode 100644 index 000000000..7f78f0178 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/Sandbox_JNIHaplotypeDataHolderClass.h @@ -0,0 +1,13 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class Sandbox_JNIHaplotypeDataHolderClass */ + +#ifndef _Included_Sandbox_JNIHaplotypeDataHolderClass +#define _Included_Sandbox_JNIHaplotypeDataHolderClass +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/VectorPairHMM/src/main/c++/Sandbox_JNIReadDataHolderClass.h b/public/VectorPairHMM/src/main/c++/Sandbox_JNIReadDataHolderClass.h new file mode 100644 index 000000000..a9312ff3b --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/Sandbox_JNIReadDataHolderClass.h @@ -0,0 +1,13 @@ +/* DO NOT EDIT THIS FILE - it is machine generated */ +#include +/* Header for class Sandbox_JNIReadDataHolderClass */ + +#ifndef _Included_Sandbox_JNIReadDataHolderClass +#define _Included_Sandbox_JNIReadDataHolderClass +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc b/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc new file mode 100644 index 000000000..6d90d5070 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/avx_function_instantiations.cc @@ -0,0 +1,44 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "template.h" + +#undef SIMD_ENGINE +#undef SIMD_ENGINE_SSE + +#define SIMD_ENGINE avx +#define SIMD_ENGINE_AVX + +#include "define-float.h" +#include "shift_template.c" +#include "pairhmm-template-kernel.cc" + +#include "define-double.h" +#include "shift_template.c" +#include "pairhmm-template-kernel.cc" + +template double compute_full_prob_avxd(testcase* tc, double* nextlog); +template float compute_full_prob_avxs(testcase* tc, float* nextlog); + diff --git a/public/VectorPairHMM/src/main/c++/baseline.cc b/public/VectorPairHMM/src/main/c++/baseline.cc new file mode 100644 index 000000000..a2dd8d329 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/baseline.cc @@ -0,0 +1,167 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "headers.h" +#include "template.h" +#include "utils.h" +#include "LoadTimeInitializer.h" +using namespace std; + +template +NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log) +{ + int r, c; + int ROWS = tc->rslen + 1; + int COLS = tc->haplen + 1; + + Context ctx; + //#define USE_STACK_ALLOCATION 1 +#ifdef USE_STACK_ALLOCATION + NUMBER M[ROWS][COLS]; + NUMBER X[ROWS][COLS]; + NUMBER Y[ROWS][COLS]; + NUMBER p[ROWS][6]; +#else + //allocate on heap in way that simulates a 2D array. Having a 2D array instead of + //a straightforward array of pointers ensures that all data lies 'close' in memory, increasing + //the chance of being stored together in the cache. Also, prefetchers can learn memory access + //patterns for 2D arrays, not possible for array of pointers + //bool locally_allocated = false; + //NUMBER* common_buffer = 0; + NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6]; + //unsigned curr_size = sizeof(NUMBER)*(3*ROWS*COLS + ROWS*6); + //if(true) + //{ + //common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6]; + //locally_allocated = true; + //} + //else + //common_buffer = (NUMBER*)(g_load_time_initializer.get_buffer()); + //pointers to within the allocated buffer + NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS]; + NUMBER* ptr = common_buffer; + unsigned i = 0; + for(i=0;i<3*ROWS;++i, ptr+=COLS) + common_pointer_buffer[i] = ptr; + for(;i<4*ROWS;++i, ptr+=6) + common_pointer_buffer[i] = ptr; + + NUMBER** M = common_pointer_buffer; + NUMBER** X = M + ROWS; + NUMBER** Y = X + ROWS; + NUMBER** p = Y + ROWS; +#endif + + + p[0][MM] = ctx._(0.0); + p[0][GapM] = ctx._(0.0); + p[0][MX] = ctx._(0.0); + p[0][XX] = ctx._(0.0); + p[0][MY] = ctx._(0.0); + p[0][YY] = ctx._(0.0); + + for (r = 1; r < ROWS; r++) + { + int _i = tc->i[r-1] & 127; + int _d = tc->d[r-1] & 127; + int _c = tc->c[r-1] & 127; + //p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127]; + SET_MATCH_TO_MATCH_PROB(p[r][MM], _i, _d); + p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c]; + p[r][MX] = ctx.ph2pr[_i]; + p[r][XX] = ctx.ph2pr[_c]; + p[r][MY] = ctx.ph2pr[_d]; + p[r][YY] = ctx.ph2pr[_c]; + //p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d]; + //p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c]; + } + for (c = 0; c < COLS; c++) + { + M[0][c] = ctx._(0.0); + X[0][c] = ctx._(0.0); + Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen); + } + + for (r = 1; r < ROWS; r++) + { + M[r][0] = ctx._(0.0); + X[r][0] = X[r-1][0] * p[r][XX]; + Y[r][0] = ctx._(0.0); + } + + NUMBER result = ctx._(0.0); + + for (r = 1; r < ROWS; r++) + for (c = 1; c < COLS; c++) + { + fexcept_t flagp; + char _rs = tc->rs[r-1]; + char _hap = tc->hap[c-1]; + int _q = tc->q[r-1] & 127; + NUMBER distm = ctx.ph2pr[_q]; + if (_rs == _hap || _rs == 'N' || _hap == 'N') + distm = ctx._(1.0) - distm; + else + distm = distm/3; + + + //feclearexcept(FE_ALL_EXCEPT); + M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]); + //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + + //feclearexcept(FE_ALL_EXCEPT); + X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX]; + //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + + //feclearexcept(FE_ALL_EXCEPT); + Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY]; + //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + + //CONVERT_AND_PRINT(M[r][c]); + //CONVERT_AND_PRINT(X[r][c]); + //CONVERT_AND_PRINT(Y[r][c]); + + } + for (c = 0; c < COLS; c++) + { + result += M[ROWS-1][c] + X[ROWS-1][c]; + } + + if (before_last_log != NULL) + *before_last_log = result; + +#ifndef USE_STACK_ALLOCATION + delete[] common_pointer_buffer; + //if(locally_allocated) + delete[] common_buffer; +#endif + + return result; + //return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT; +} + +template double compute_full_prob(testcase* tc, double* nextbuf); +template float compute_full_prob(testcase* tc, float* nextbuf); + diff --git a/public/VectorPairHMM/src/main/c++/define-double.h b/public/VectorPairHMM/src/main/c++/define-double.h new file mode 100644 index 000000000..2067d369c --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/define-double.h @@ -0,0 +1,205 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include + +#ifdef PRECISION +#undef PRECISION +#undef MAIN_TYPE +#undef MAIN_TYPE_SIZE +#undef UNION_TYPE +#undef IF_128 +#undef IF_MAIN_TYPE +#undef SHIFT_CONST1 +#undef SHIFT_CONST2 +#undef SHIFT_CONST3 +#undef _128_TYPE +#undef SIMD_TYPE +#undef AVX_LENGTH +#undef HAP_TYPE +#undef MASK_TYPE +#undef MASK_ALL_ONES + +#undef SET_VEC_ZERO(__vec) +#undef VEC_OR(__v1, __v2) +#undef VEC_ADD(__v1, __v2) +#undef VEC_SUB(__v1, __v2) +#undef VEC_MUL(__v1, __v2) +#undef VEC_DIV(__v1, __v2) +#undef VEC_BLEND(__v1, __v2, __mask) +#undef VEC_BLENDV(__v1, __v2, __maskV) +#undef VEC_CAST_256_128(__v1) +#undef VEC_EXTRACT_128(__v1, __im) +#undef VEC_EXTRACT_UNIT(__v1, __im) +#undef VEC_SET1_VAL128(__val) +#undef VEC_MOVE(__v1, __val) +#undef VEC_CAST_128_256(__v1) +#undef VEC_INSERT_VAL(__v1, __val, __pos) +#undef VEC_CVT_128_256(__v1) +#undef VEC_SET1_VAL(__val) +#undef VEC_POPCVT_CHAR(__ch) +#undef VEC_LDPOPCVT_CHAR(__addr) +#undef VEC_CMP_EQ(__v1, __v2) +#undef VEC_SET_LSE(__val) +#undef SHIFT_HAP(__v1, __val) +#undef MASK_VEC +#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) +#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef MASK_ALL_ONES +#undef COMPARE_VECS(__v1, __v2) +#undef _256_INT_TYPE +#undef BITMASK_VEC +#endif + +#define PRECISION d +#define MAIN_TYPE double +#define MAIN_TYPE_SIZE 64 +#define UNION_TYPE mix_D +#define IF_128 IF_128d +#define IF_MAIN_TYPE IF_64 +#define SHIFT_CONST1 8 +#define SHIFT_CONST2 1 +#define SHIFT_CONST3 8 +#define _128_TYPE __m128d +#define SIMD_TYPE __m256d +#define _256_INT_TYPE __m256i +#define AVX_LENGTH 4 +#define HAP_TYPE __m128i +#define MASK_TYPE uint64_t +#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFF +#define MASK_VEC MaskVec_D + +#define SET_VEC_ZERO(__vec) \ + __vec= _mm256_setzero_pd() + +#define VEC_OR(__v1, __v2) \ + _mm256_or_pd(__v1, __v2) + +#define VEC_ADD(__v1, __v2) \ + _mm256_add_pd(__v1, __v2) + +#define VEC_SUB(__v1, __v2) \ + _mm256_sub_pd(__v1, __v2) + +#define VEC_MUL(__v1, __v2) \ + _mm256_mul_pd(__v1, __v2) + +#define VEC_DIV(__v1, __v2) \ + _mm256_div_pd(__v1, __v2) + +#define VEC_BLEND(__v1, __v2, __mask) \ + _mm256_blend_pd(__v1, __v2, __mask) + +#define VEC_BLENDV(__v1, __v2, __maskV) \ + _mm256_blendv_pd(__v1, __v2, __maskV) + +#define VEC_CAST_256_128(__v1) \ + _mm256_castpd256_pd128 (__v1) + +#define VEC_EXTRACT_128(__v1, __im) \ + _mm256_extractf128_pd (__v1, __im) + +#define VEC_EXTRACT_UNIT(__v1, __im) \ + _mm_extract_epi64(__v1, __im) + +#define VEC_SET1_VAL128(__val) \ + _mm_set1_pd(__val) + +#define VEC_MOVE(__v1, __val) \ + _mm_move_sd(__v1, __val) + +#define VEC_CAST_128_256(__v1) \ + _mm256_castpd128_pd256(__v1) + +#define VEC_INSERT_VAL(__v1, __val, __pos) \ + _mm256_insertf128_pd(__v1, __val, __pos) + +#define VEC_CVT_128_256(__v1) \ + _mm256_cvtepi32_pd(__v1) + +#define VEC_SET1_VAL(__val) \ + _mm256_set1_pd(__val) + +#define VEC_POPCVT_CHAR(__ch) \ + _mm256_cvtepi32_pd(_mm_set1_epi32(__ch)) + +#define VEC_LDPOPCVT_CHAR(__addr) \ + _mm256_cvtepi32_pd(_mm_load_si128((__m128i const *)__addr)) + +#define VEC_CMP_EQ(__v1, __v2) \ + _mm256_cmp_pd(__v1, __v2, _CMP_EQ_OQ) + +#define VEC_SET_LSE(__val) \ + _mm256_set_pd(zero, zero, zero, __val); + +#define SHIFT_HAP(__v1, __val) \ + __v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0) + +#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ + __vdst = _mm256_castpd128_pd256(__vsLow) ; \ +__vdst = _mm256_insertf128_pd(__vdst, __vsHigh, 1) ; + +#define VEC_SHIFT_LEFT_1BIT(__vs) \ + __vs = _mm_slli_epi64(__vs, 1) + + +#define COMPARE_VECS(__v1, __v2, __first, __last) { \ + double* ptr1 = (double*) (&__v1) ; \ + double* ptr2 = (double*) (&__v2) ; \ + for (int ei=__first; ei <= __last; ++ei) { \ + if (ptr1[ei] != ptr2[ei]) { \ + std::cout << "Double Mismatch at " << ei << ": " \ + << ptr1[ei] << " vs. " << ptr2[ei] << std::endl ; \ + exit(0) ; \ + } \ + } \ +} + +class BitMaskVec_double { + + MASK_VEC low_, high_ ; + SIMD_TYPE combined_ ; + + public: + inline MASK_TYPE& getLowEntry(int index) { + return low_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return high_.masks[index] ; + } + + inline const SIMD_TYPE& getCombinedMask() { + VEC_SSE_TO_AVX(low_.vecf, high_.vecf, combined_) ; + return combined_ ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(low_.vec) ; + VEC_SHIFT_LEFT_1BIT(high_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_double diff --git a/public/VectorPairHMM/src/main/c++/define-float.h b/public/VectorPairHMM/src/main/c++/define-float.h new file mode 100644 index 000000000..318f78280 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/define-float.h @@ -0,0 +1,206 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include + +#ifdef PRECISION +#undef PRECISION +#undef MAIN_TYPE +#undef MAIN_TYPE_SIZE +#undef UNION_TYPE +#undef IF_128 +#undef IF_MAIN_TYPE +#undef SHIFT_CONST1 +#undef SHIFT_CONST2 +#undef SHIFT_CONST3 +#undef _128_TYPE +#undef SIMD_TYPE +#undef AVX_LENGTH +#undef HAP_TYPE +#undef MASK_TYPE +#undef MASK_ALL_ONES + +#undef SET_VEC_ZERO(__vec) +#undef VEC_OR(__v1, __v2) +#undef VEC_ADD(__v1, __v2) +#undef VEC_SUB(__v1, __v2) +#undef VEC_MUL(__v1, __v2) +#undef VEC_DIV(__v1, __v2) +#undef VEC_BLEND(__v1, __v2, __mask) +#undef VEC_BLENDV(__v1, __v2, __maskV) +#undef VEC_CAST_256_128(__v1) +#undef VEC_EXTRACT_128(__v1, __im) +#undef VEC_EXTRACT_UNIT(__v1, __im) +#undef VEC_SET1_VAL128(__val) +#undef VEC_MOVE(__v1, __val) +#undef VEC_CAST_128_256(__v1) +#undef VEC_INSERT_VAL(__v1, __val, __pos) +#undef VEC_CVT_128_256(__v1) +#undef VEC_SET1_VAL(__val) +#undef VEC_POPCVT_CHAR(__ch) +#undef VEC_LDPOPCVT_CHAR(__addr) +#undef VEC_CMP_EQ(__v1, __v2) +#undef VEC_SET_LSE(__val) +#undef SHIFT_HAP(__v1, __val) +#undef MASK_VEC +#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) +#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef MASK_ALL_ONES +#undef COMPARE_VECS(__v1, __v2) +#undef _256_INT_TYPE +#undef BITMASK_VEC +#endif + +#define PRECISION s + +#define MAIN_TYPE float +#define MAIN_TYPE_SIZE 32 +#define UNION_TYPE mix_F +#define IF_128 IF_128f +#define IF_MAIN_TYPE IF_32 +#define SHIFT_CONST1 12 +#define SHIFT_CONST2 3 +#define SHIFT_CONST3 4 +#define _128_TYPE __m128 +#define SIMD_TYPE __m256 +#define _256_INT_TYPE __m256i +#define AVX_LENGTH 8 +#define HAP_TYPE UNION_TYPE +#define MASK_TYPE uint32_t +#define MASK_ALL_ONES 0xFFFFFFFF +#define MASK_VEC MaskVec_F + +#define SET_VEC_ZERO(__vec) \ + __vec= _mm256_setzero_ps() + +#define VEC_OR(__v1, __v2) \ + _mm256_or_ps(__v1, __v2) + +#define VEC_ADD(__v1, __v2) \ + _mm256_add_ps(__v1, __v2) + +#define VEC_SUB(__v1, __v2) \ + _mm256_sub_ps(__v1, __v2) + +#define VEC_MUL(__v1, __v2) \ + _mm256_mul_ps(__v1, __v2) + +#define VEC_DIV(__v1, __v2) \ + _mm256_div_ps(__v1, __v2) + +#define VEC_BLEND(__v1, __v2, __mask) \ + _mm256_blend_ps(__v1, __v2, __mask) + +#define VEC_BLENDV(__v1, __v2, __maskV) \ + _mm256_blendv_ps(__v1, __v2, __maskV) + +#define VEC_CAST_256_128(__v1) \ + _mm256_castps256_ps128 (__v1) + +#define VEC_EXTRACT_128(__v1, __im) \ + _mm256_extractf128_ps (__v1, __im) + +#define VEC_EXTRACT_UNIT(__v1, __im) \ + _mm_extract_epi32(__v1, __im) + +#define VEC_SET1_VAL128(__val) \ + _mm_set1_ps(__val) + +#define VEC_MOVE(__v1, __val) \ + _mm_move_ss(__v1, __val) + +#define VEC_CAST_128_256(__v1) \ + _mm256_castps128_ps256(__v1) + +#define VEC_INSERT_VAL(__v1, __val, __pos) \ + _mm256_insertf128_ps(__v1, __val, __pos) + +#define VEC_CVT_128_256(__v1) \ + _mm256_cvtepi32_ps(__v1.i) + +#define VEC_SET1_VAL(__val) \ + _mm256_set1_ps(__val) + +#define VEC_POPCVT_CHAR(__ch) \ + _mm256_cvtepi32_ps(_mm256_set1_epi32(__ch)) + +#define VEC_LDPOPCVT_CHAR(__addr) \ + _mm256_cvtepi32_ps(_mm256_loadu_si256((__m256i const *)__addr)) + +#define VEC_CMP_EQ(__v1, __v2) \ + _mm256_cmp_ps(__v1, __v2, _CMP_EQ_OQ) + +#define VEC_SET_LSE(__val) \ + _mm256_set_ps(zero, zero, zero, zero, zero, zero, zero, __val); + +#define SHIFT_HAP(__v1, __val) \ + _vector_shift_lastavxs(__v1, __val.f); + +#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ + __vdst = _mm256_castps128_ps256(__vsLow) ; \ +__vdst = _mm256_insertf128_ps(__vdst, __vsHigh, 1) ; + +#define VEC_SHIFT_LEFT_1BIT(__vs) \ + __vs = _mm_slli_epi32(__vs, 1) + +#define COMPARE_VECS(__v1, __v2, __first, __last) { \ + float* ptr1 = (float*) (&__v1) ; \ + float* ptr2 = (float*) (&__v2) ; \ + for (int ei=__first; ei <= __last; ++ei) { \ + if (ptr1[ei] != ptr2[ei]) { \ + std::cout << "Float Mismatch at " << ei << ": " \ + << ptr1[ei] << " vs. " << ptr2[ei] << std::endl ; \ + exit(0) ; \ + } \ + } \ +} + +class BitMaskVec_float { + + MASK_VEC low_, high_ ; + SIMD_TYPE combined_ ; + + public: + + inline MASK_TYPE& getLowEntry(int index) { + return low_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return high_.masks[index] ; + } + + inline const SIMD_TYPE& getCombinedMask() { + VEC_SSE_TO_AVX(low_.vecf, high_.vecf, combined_) ; + return combined_ ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(low_.vec) ; + VEC_SHIFT_LEFT_1BIT(high_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_float diff --git a/public/VectorPairHMM/src/main/c++/define-sse-double.h b/public/VectorPairHMM/src/main/c++/define-sse-double.h new file mode 100644 index 000000000..2d271a854 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/define-sse-double.h @@ -0,0 +1,173 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#ifdef PRECISION +#undef PRECISION +#undef MAIN_TYPE +#undef MAIN_TYPE_SIZE +#undef UNION_TYPE +#undef IF_128 +#undef IF_MAIN_TYPE +#undef SHIFT_CONST1 +#undef SHIFT_CONST2 +#undef SHIFT_CONST3 +#undef _128_TYPE +#undef SIMD_TYPE +#undef AVX_LENGTH +#undef HAP_TYPE +#undef MASK_TYPE +#undef MASK_ALL_ONES + +#undef VEC_EXTRACT_UNIT(__v1, __im) +#undef VEC_INSERT_UNIT(__v1,__ins,__im) +#undef SET_VEC_ZERO(__vec) +#undef VEC_OR(__v1, __v2) +#undef VEC_ADD(__v1, __v2) +#undef VEC_SUB(__v1, __v2) +#undef VEC_MUL(__v1, __v2) +#undef VEC_DIV(__v1, __v2) +#undef VEC_BLEND(__v1, __v2, __mask) +#undef VEC_BLENDV(__v1, __v2, __maskV) +#undef VEC_CAST_256_128(__v1) +#undef VEC_EXTRACT_128(__v1, __im) +#undef VEC_EXTRACT_UNIT(__v1, __im) +#undef VEC_SET1_VAL128(__val) +#undef VEC_MOVE(__v1, __val) +#undef VEC_CAST_128_256(__v1) +#undef VEC_INSERT_VAL(__v1, __val, __pos) +#undef VEC_CVT_128_256(__v1) +#undef VEC_SET1_VAL(__val) +#undef VEC_POPCVT_CHAR(__ch) +#undef VEC_LDPOPCVT_CHAR(__addr) +#undef VEC_CMP_EQ(__v1, __v2) +#undef VEC_SET_LSE(__val) +#undef SHIFT_HAP(__v1, __val) +#undef MASK_VEC +#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) +#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef MASK_ALL_ONES +#undef COMPARE_VECS(__v1, __v2) +#undef _256_INT_TYPE +#undef BITMASK_VEC +#endif + +#define SSE +#define PRECISION d + +#define MAIN_TYPE double +#define MAIN_TYPE_SIZE 64 +#define UNION_TYPE mix_D128 +#define IF_128 IF_128d +#define IF_MAIN_TYPE IF_64 +#define SHIFT_CONST1 1 +#define SHIFT_CONST2 8 +#define SHIFT_CONST3 0 +#define _128_TYPE __m128d +#define SIMD_TYPE __m128d +#define _256_INT_TYPE __m128i +#define AVX_LENGTH 2 +#define HAP_TYPE __m128i +#define MASK_TYPE uint64_t +#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFFL +#define MASK_VEC MaskVec_D + +#define VEC_EXTRACT_UNIT(__v1, __im) \ + _mm_extract_epi64(__v1, __im) + +#define VEC_INSERT_UNIT(__v1,__ins,__im) \ + _mm_insert_epi64(__v1,__ins,__im) + +#define VEC_OR(__v1, __v2) \ + _mm_or_pd(__v1, __v2) + +#define VEC_ADD(__v1, __v2) \ + _mm_add_pd(__v1, __v2) + +#define VEC_SUB(__v1, __v2) \ + _mm_sub_pd(__v1, __v2) + +#define VEC_MUL(__v1, __v2) \ + _mm_mul_pd(__v1, __v2) + +#define VEC_DIV(__v1, __v2) \ + _mm_div_pd(__v1, __v2) + +#define VEC_CMP_EQ(__v1, __v2) \ + _mm_cmpeq_pd(__v1, __v2) + +#define VEC_BLEND(__v1, __v2, __mask) \ + _mm_blend_pd(__v1, __v2, __mask) + +#define VEC_BLENDV(__v1, __v2, __maskV) \ + _mm_blendv_pd(__v1, __v2, __maskV) + +#define SHIFT_HAP(__v1, __val) \ + __v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0) + +#define VEC_CVT_128_256(__v1) \ + _mm_cvtepi32_pd(__v1) + +#define VEC_SET1_VAL(__val) \ + _mm_set1_pd(__val) + +#define VEC_POPCVT_CHAR(__ch) \ + _mm_cvtepi32_pd(_mm_set1_epi32(__ch)) + +#define VEC_SET_LSE(__val) \ + _mm_set_pd(zero, __val); + +#define VEC_LDPOPCVT_CHAR(__addr) \ + _mm_cvtepi32_pd(_mm_loadu_si128((__m128i const *)__addr)) + +#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ + __vdst = _mm_castsi128_pd(_mm_set_epi64(__vsHigh, __vsLow)) + +#define VEC_SHIFT_LEFT_1BIT(__vs) \ + __vs = _mm_slli_epi64(__vs, 1) + + +class BitMaskVec_sse_double { + + MASK_VEC combined_ ; + public: + inline MASK_TYPE& getLowEntry(int index) { + return combined_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return combined_.masks[AVX_LENGTH/2+index] ; + } + + inline const SIMD_TYPE& getCombinedMask() { + return combined_.vecf ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(combined_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_sse_double + diff --git a/public/VectorPairHMM/src/main/c++/define-sse-float.h b/public/VectorPairHMM/src/main/c++/define-sse-float.h new file mode 100644 index 000000000..20af947dd --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/define-sse-float.h @@ -0,0 +1,173 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#ifdef PRECISION +#undef PRECISION +#undef MAIN_TYPE +#undef MAIN_TYPE_SIZE +#undef UNION_TYPE +#undef IF_128 +#undef IF_MAIN_TYPE +#undef SHIFT_CONST1 +#undef SHIFT_CONST2 +#undef SHIFT_CONST3 +#undef _128_TYPE +#undef SIMD_TYPE +#undef AVX_LENGTH +#undef HAP_TYPE +#undef MASK_TYPE +#undef MASK_ALL_ONES + +#undef VEC_EXTRACT_UNIT(__v1, __im) +#undef VEC_INSERT_UNIT(__v1,__ins,__im) +#undef SET_VEC_ZERO(__vec) +#undef VEC_OR(__v1, __v2) +#undef VEC_ADD(__v1, __v2) +#undef VEC_SUB(__v1, __v2) +#undef VEC_MUL(__v1, __v2) +#undef VEC_DIV(__v1, __v2) +#undef VEC_BLEND(__v1, __v2, __mask) +#undef VEC_BLENDV(__v1, __v2, __maskV) +#undef VEC_CAST_256_128(__v1) +#undef VEC_EXTRACT_128(__v1, __im) +#undef VEC_EXTRACT_UNIT(__v1, __im) +#undef VEC_SET1_VAL128(__val) +#undef VEC_MOVE(__v1, __val) +#undef VEC_CAST_128_256(__v1) +#undef VEC_INSERT_VAL(__v1, __val, __pos) +#undef VEC_CVT_128_256(__v1) +#undef VEC_SET1_VAL(__val) +#undef VEC_POPCVT_CHAR(__ch) +#undef VEC_LDPOPCVT_CHAR(__addr) +#undef VEC_CMP_EQ(__v1, __v2) +#undef VEC_SET_LSE(__val) +#undef SHIFT_HAP(__v1, __val) +#undef MASK_VEC +#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) +#undef VEC_SHIFT_LEFT_1BIT(__vs) +#undef MASK_ALL_ONES +#undef COMPARE_VECS(__v1, __v2) +#undef _256_INT_TYPE +#undef BITMASK_VEC +#endif + +#define SSE +#define PRECISION s + +#define MAIN_TYPE float +#define MAIN_TYPE_SIZE 32 +#define UNION_TYPE mix_F128 +#define IF_128 IF_128f +#define IF_MAIN_TYPE IF_32 +#define SHIFT_CONST1 3 +#define SHIFT_CONST2 4 +#define SHIFT_CONST3 0 +#define _128_TYPE __m128 +#define SIMD_TYPE __m128 +#define _256_INT_TYPE __m128i +#define AVX_LENGTH 4 +//#define MAVX_COUNT (MROWS+3)/AVX_LENGTH +#define HAP_TYPE UNION_TYPE +#define MASK_TYPE uint32_t +#define MASK_ALL_ONES 0xFFFFFFFF +#define MASK_VEC MaskVec_F + +#define VEC_EXTRACT_UNIT(__v1, __im) \ + _mm_extract_epi32(__v1, __im) + +#define VEC_INSERT_UNIT(__v1,__ins,__im) \ + _mm_insert_epi32(__v1,__ins,__im) + +#define VEC_OR(__v1, __v2) \ + _mm_or_ps(__v1, __v2) + +#define VEC_ADD(__v1, __v2) \ + _mm_add_ps(__v1, __v2) + +#define VEC_SUB(__v1, __v2) \ + _mm_sub_ps(__v1, __v2) + +#define VEC_MUL(__v1, __v2) \ + _mm_mul_ps(__v1, __v2) + +#define VEC_DIV(__v1, __v2) \ + _mm_div_ps(__v1, __v2) + +#define VEC_CMP_EQ(__v1, __v2) \ + _mm_cmpeq_ps(__v1, __v2) + +#define VEC_BLEND(__v1, __v2, __mask) \ + _mm_blend_ps(__v1, __v2, __mask) + +#define VEC_BLENDV(__v1, __v2, __maskV) \ + _mm_blendv_ps(__v1, __v2, __maskV) + +#define SHIFT_HAP(__v1, __val) \ + _vector_shift_lastsses(__v1, __val.f) + +#define VEC_CVT_128_256(__v1) \ + _mm_cvtepi32_ps(__v1.i) + +#define VEC_SET1_VAL(__val) \ + _mm_set1_ps(__val) + +#define VEC_POPCVT_CHAR(__ch) \ + _mm_cvtepi32_ps(_mm_set1_epi32(__ch)) + +#define VEC_SET_LSE(__val) \ + _mm_set_ps(zero, zero, zero, __val); + +#define VEC_LDPOPCVT_CHAR(__addr) \ + _mm_cvtepi32_ps(_mm_loadu_si128((__m128i const *)__addr)) + +#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ + __vdst = _mm_cvtpi32x2_ps(__vsLow, __vsHigh) + +#define VEC_SHIFT_LEFT_1BIT(__vs) \ + __vs = _mm_slli_epi32(__vs, 1) + +class BitMaskVec_sse_float { + + MASK_VEC combined_ ; + + public: + inline MASK_TYPE& getLowEntry(int index) { + return combined_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return combined_.masks[AVX_LENGTH/2+index] ; + } + + inline const SIMD_TYPE& getCombinedMask() { + return combined_.vecf ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(combined_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_sse_float diff --git a/public/VectorPairHMM/src/main/c++/headers.h b/public/VectorPairHMM/src/main/c++/headers.h new file mode 100644 index 000000000..4a0d89b57 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/headers.h @@ -0,0 +1,71 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef COMMON_HEADERS_H +#define COMMON_HEADERS_H + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern uint64_t exceptions_array[128]; +extern FILE* g_debug_fptr; +#define STORE_FP_EXCEPTIONS(flagp, exceptions_array) \ + fegetexceptflag(&flagp, FE_ALL_EXCEPT | __FE_DENORM); \ + exceptions_array[FE_INVALID] += ((flagp & FE_INVALID)); \ + exceptions_array[__FE_DENORM] += ((flagp & __FE_DENORM) >> 1); \ + exceptions_array[FE_DIVBYZERO] += ((flagp & FE_DIVBYZERO) >> 2); \ + exceptions_array[FE_OVERFLOW] += ((flagp & FE_OVERFLOW) >> 3); \ + exceptions_array[FE_UNDERFLOW] += ((flagp & FE_UNDERFLOW) >> 4); \ + feclearexcept(FE_ALL_EXCEPT | __FE_DENORM); + +#define CONVERT_AND_PRINT(X) \ + g_converter.f = (X); \ + fwrite(&(g_converter.i),4,1,g_debug_fptr); \ + +#endif diff --git a/public/VectorPairHMM/src/main/c++/jni_common.h b/public/VectorPairHMM/src/main/c++/jni_common.h new file mode 100644 index 000000000..ee43da2ec --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/jni_common.h @@ -0,0 +1,60 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef JNI_COMMON_H +#define JNI_COMMON_H + +/*#define SINGLE_THREADED_ONLY 1*/ +#include +/*#define ENABLE_ASSERTIONS 1*/ +#ifdef SINGLE_THREADED_ONLY +#define DO_PROFILING 1 +#endif +/*#define DEBUG0_1 1*/ +/*#define DEBUG3 1*/ +/*#define DUMP_TO_SANDBOX 1*/ + + +#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1 + +#ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY +//Gets direct access to Java arrays +#define GET_BYTE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical +#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical +#define JNI_RO_RELEASE_MODE JNI_ABORT +#define GET_DOUBLE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical +#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical + +#else +//Likely makes copy of Java arrays to JNI C++ space +#define GET_BYTE_ARRAY_ELEMENTS env->GetByteArrayElements +#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleaseByteArrayElements +#define JNI_RO_RELEASE_MODE JNI_ABORT +#define GET_DOUBLE_ARRAY_ELEMENTS env->GetDoubleArrayElements +#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleaseDoubleArrayElements + +#endif //ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY + +#endif //ifndef JNI_COMMON_H diff --git a/public/VectorPairHMM/src/main/c++/jnidebug.h b/public/VectorPairHMM/src/main/c++/jnidebug.h new file mode 100644 index 000000000..df2e207b6 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/jnidebug.h @@ -0,0 +1,191 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef JNI_DEBUG_H +#define JNI_DEBUG_H + +template +class DataHolder +{ +#define INIT_MATRIX(X) \ + X = new NUMBER*[m_paddedMaxReadLength]; \ + for(int i=0;i ctx; + for (int r = 1; r <= length;r++) //in original code, r < ROWS (where ROWS = paddedReadLength) + { + int _i = insertionGOP[r-1]; //insertionGOP + int _d = deletionGOP[r-1]; //deletionGOP + int _c = overallGCP[r-1]; //overallGCP + m_transition[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127]; //lines 161-162 + m_transition[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c]; //line 163 + m_transition[r][MX] = ctx.ph2pr[_i]; //164 + m_transition[r][XX] = ctx.ph2pr[_c]; //165 + m_transition[r][MY] = ctx.ph2pr[_d];//last row seems different, compared to line 166 + m_transition[r][YY] = ctx.ph2pr[_c];//same as above for line 167 + //m_transition[r][MY] = (r == length) ? ctx._(1.0) : ctx.ph2pr[_d];//last row seems different, compared to line 166 + //m_transition[r][YY] = (r == length) ? ctx._(1.0) : ctx.ph2pr[_c];//same as above for line 167 +#ifdef DEBUG3 + for(int j=0;j<6;++j) + debug_dump("transitions_jni.txt", to_string(m_transition[r][j]),true); +#endif + } + ++g_num_prob_init; + } + bool m_is_initialized; + int m_readMaxLength; + int m_haplotypeMaxLength; + int m_paddedMaxReadLength; + int m_paddedMaxHaplotypeLength; + NUMBER** m_matchMatrix; + NUMBER** m_insertionMatrix; + NUMBER** m_deletionMatrix; + NUMBER** m_prior; + NUMBER (*m_transition)[6]; +}; +extern DataHolder g_double_dataholder; + +template +NUMBER compute_full_prob(testcase *tc, NUMBER** M, NUMBER** X, NUMBER** Y, NUMBER (*p)[6], + bool do_initialization, jint hapStartIndex, NUMBER *before_last_log = NULL) +{ + int r, c; + int ROWS = tc->rslen + 1; //ROWS = paddedReadLength + int COLS = tc->haplen + 1; //COLS = paddedHaplotypeLength + + Context ctx; + //////NOTES + ////ctx.ph2pr[quality]; //This quantity is QualityUtils.qualToErrorProb(quality) + ////1-ctx.ph2pr[quality]; //This corresponds to QualityUtils.qualToProb(quality); + + //Initialization + if(do_initialization) + { + for (c = 0; c < COLS; c++) + { + M[0][c] = ctx._(0.0); + X[0][c] = ctx._(0.0); + Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen); //code from 87-90 in LoglessPairHMM + } + + for (r = 1; r < ROWS; r++) + { + M[r][0] = ctx._(0.0); + //deletionMatrix row 0 in above nest is initialized in the Java code + //However, insertionMatrix column 0 is not initialized in Java code, could it be that + //values are re-used from a previous iteration? + //Why even do this, X[0][0] = 0 from above loop nest, X[idx][0] = 0 from this computation + X[r][0] = X[r-1][0] * p[r][XX]; + Y[r][0] = ctx._(0.0); + } + } + + for (r = 1; r < ROWS; r++) + for (c = hapStartIndex+1; c < COLS; c++) + { + //The following lines correspond to initializePriors() + char _rs = tc->rs[r-1]; //line 137 + char _hap = tc->hap[c-1]; //line 140 + //int _q = tc->q[r-1] & 127; //line 138 - q is the quality (qual), should be byte hence int ANDed with 0xFF + int _q = tc->q[r-1]; //line 138 - q is the quality (qual), should be byte hence int ANDed with 0xFF + NUMBER distm = ctx.ph2pr[_q]; //This quantity is QualityUtils.qualToErrorProb(_q) + //The assumption here is that doNotUseTristateCorrection is true + //TOASK + if (_rs == _hap || _rs == 'N' || _hap == 'N') + distm = ctx._(1.0) - distm; //This is the quantity QualityUtils.qualToProb(qual) + else + distm = distm/3; +#ifdef DEBUG3 + debug_dump("priors_jni.txt",to_string(distm),true); +#endif + + //Computation inside updateCell + M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]); + X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX]; + Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY]; +#ifdef DEBUG3 + debug_dump("matrices_jni.txt",to_string(M[r][c]),true); + debug_dump("matrices_jni.txt",to_string(X[r][c]),true); + debug_dump("matrices_jni.txt",to_string(Y[r][c]),true); +#endif + } + + NUMBER result = ctx._(0.0); + for (c = 0; c < COLS; c++) + result += M[ROWS-1][c] + X[ROWS-1][c]; + + if (before_last_log != NULL) + *before_last_log = result; + +#ifdef DEBUG + debug_dump("return_values_jni.txt",to_string(ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT),true); +#endif + return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT; +} + +#endif diff --git a/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc new file mode 100644 index 000000000..8a3f8b5bc --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc @@ -0,0 +1,176 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "headers.h" +#include "jni_common.h" +#include "org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.h" +#include "template.h" +#include "utils.h" +#include "LoadTimeInitializer.h" +#include "jnidebug.h" +DataHolder g_double_dataholder; + +using namespace std; + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitialize +(JNIEnv* env, jobject thisObject, + jint readMaxLength, jint haplotypeMaxLength) +{ + static int g_num_init_calls = 0; +#ifdef DEBUG3 + cout << "Entered alloc initialized .. readMaxLength "<GetArrayLength(insertionGOP); +#ifdef DEBUG3 + cout << "Entered initializeProbabilities .. length "<GetByteArrayElements(insertionGOP, &is_copy); + jbyte* deletionGOPArray = (env)->GetByteArrayElements(deletionGOP, &is_copy); + jbyte* overallGCPArray = (env)->GetByteArrayElements(overallGCP, &is_copy); +#ifdef DEBUG + if(insertionGOPArray == 0) + cerr << "insertionGOP array not initialized in JNI\n"; + ////assert(insertionGOPArray && "insertionGOP array not initialized in JNI"); + if(deletionGOPArray == 0) + cerr << "deletionGOP array not initialized in JNI\n"; + ////assert(deletionGOPArray && "deletionGOP array not initialized in JNI"); + assert(overallGCPArray && "OverallGCP array not initialized in JNI"); +#endif + + g_double_dataholder.initializeProbabilities(length, insertionGOPArray, deletionGOPArray, overallGCPArray); + + env->ReleaseByteArrayElements(overallGCP, overallGCPArray, JNI_ABORT); + env->ReleaseByteArrayElements(deletionGOP, deletionGOPArray, JNI_ABORT); + env->ReleaseByteArrayElements(insertionGOP, insertionGOPArray, JNI_ABORT); +} + +JNIEXPORT jdouble JNICALL +Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializePriorsAndUpdateCells( + JNIEnv* env, jobject thisObject, + jboolean doInitialization, jint paddedReadLength, jint paddedHaplotypeLength, + jbyteArray readBases, jbyteArray haplotypeBases, jbyteArray readQuals, + jint hapStartIndex + ) +{ +#ifdef DEBUG3 + cout << "Entered mainCompute .. doInitialization "<<(doInitialization == JNI_TRUE)<<" hapStartIndex "<GetByteArrayElements(readBases, &is_copy); + jbyte* haplotypeBasesArray = (env)->GetByteArrayElements(haplotypeBases, &is_copy); + jbyte* readQualsArray = (env)->GetByteArrayElements(readQuals, &is_copy); +#ifdef DEBUG + assert(readBasesArray && "readBasesArray not initialized in JNI"); + assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI"); + assert(readQualsArray && "readQualsArray not initialized in JNI"); +#endif + testcase tc; + + tc.rslen = paddedReadLength-1; + tc.haplen = paddedHaplotypeLength-1; + + tc.rs = (char*)readBasesArray; + tc.hap = (char*)haplotypeBasesArray; + tc.q = (char*)readQualsArray; //TOASK - q is now char* + + compute_full_prob(&tc, g_double_dataholder.m_matchMatrix, g_double_dataholder.m_insertionMatrix, + g_double_dataholder.m_deletionMatrix, g_double_dataholder.m_transition, + doInitialization == JNI_TRUE, hapStartIndex, NULL); + + env->ReleaseByteArrayElements(readBases, readBasesArray, JNI_ABORT); + env->ReleaseByteArrayElements(haplotypeBases, haplotypeBasesArray, JNI_ABORT); + env->ReleaseByteArrayElements(readQuals, readQualsArray, JNI_ABORT); + return 0.0; +} + +JNIEXPORT jdouble JNICALL +Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10( + JNIEnv* env, jobject thisObject, + jint readLength, jint haplotypeLength, + jbyteArray readBases, jbyteArray haplotypeBases, jbyteArray readQuals, + jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP, + jint hapStartIndex + ) +{ + jboolean is_copy = JNI_FALSE; + jbyte* readBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readBases, &is_copy); + jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBases, &is_copy); + jbyte* readQualsArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readQuals, &is_copy); + jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy); + jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy); + jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy); +#ifdef DEBUG + assert(readBasesArray && "readBasesArray not initialized in JNI"); + assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI"); + assert(readQualsArray && "readQualsArray not initialized in JNI"); + assert(insertionGOPArray && "insertionGOP array not initialized in JNI"); + assert(deletionGOPArray && "deletionGOP array not initialized in JNI"); + assert(overallGCPArray && "OverallGCP array not initialized in JNI"); + //assert(readLength < MROWS); +#endif + testcase tc; + tc.rslen = readLength; + tc.haplen = haplotypeLength; + tc.rs = (char*)readBasesArray; + tc.hap = (char*)haplotypeBasesArray; + for(unsigned i=0;i +/* Header for class org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM */ + +#ifndef _Included_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM +#define _Included_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM +#ifdef __cplusplus +extern "C" { +#endif +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_TRISTATE_CORRECTION +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_TRISTATE_CORRECTION 3.0 +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToMatch +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToMatch 0L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_indelToMatch +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_indelToMatch 1L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToInsertion +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToInsertion 2L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_insertionToInsertion +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_insertionToInsertion 3L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToDeletion +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_matchToDeletion 4L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_deletionToDeletion +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_deletionToDeletion 5L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug 0L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_verify +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_verify 0L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug0_1 +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug0_1 0L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug1 +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug1 0L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug2 +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug2 0L +#undef org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug3 +#define org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_debug3 0L +/* + * Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM + * Method: jniInitialize + * Signature: (II)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitialize + (JNIEnv *, jobject, jint, jint); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM + * Method: jniInitializeProbabilities + * Signature: ([[D[B[B[B)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializeProbabilities + (JNIEnv *, jclass, jobjectArray, jbyteArray, jbyteArray, jbyteArray); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM + * Method: jniInitializePriorsAndUpdateCells + * Signature: (ZII[B[B[BI)D + */ +JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniInitializePriorsAndUpdateCells + (JNIEnv *, jobject, jboolean, jint, jint, jbyteArray, jbyteArray, jbyteArray, jint); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM + * Method: jniSubComputeReadLikelihoodGivenHaplotypeLog10 + * Signature: (II[B[B[B[B[B[BI)D + */ +JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10 + (JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc new file mode 100644 index 000000000..220b1aa60 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc @@ -0,0 +1,416 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "headers.h" +#include "jni_common.h" +#include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h" +#include "template.h" +#include "utils.h" +#include "LoadTimeInitializer.h" + +using namespace std; + +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType + (JNIEnv* env, jobject thisObject) +{ + return (jlong)get_machine_capabilities(); +} + +//Should be called only once for the whole Java process - initializes field ids for the classes JNIReadDataHolderClass +//and JNIHaplotypeDataHolderClass +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask + (JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask) +{ + assert(readDataHolderClass); + assert(haplotypeDataHolderClass); + jfieldID fid; + fid = env->GetFieldID(readDataHolderClass, "readBases", "[B"); + assert(fid && "JNI pairHMM: Could not get FID for readBases"); + g_load_time_initializer.m_readBasesFID = fid; + fid = env->GetFieldID(readDataHolderClass, "readQuals", "[B"); + assert(fid && "JNI pairHMM: Could not get FID for readQuals"); + g_load_time_initializer.m_readQualsFID = fid; + fid = env->GetFieldID(readDataHolderClass, "insertionGOP", "[B"); + assert(fid && "JNI pairHMM: Could not get FID for insertionGOP"); + g_load_time_initializer.m_insertionGOPFID = fid; + fid = env->GetFieldID(readDataHolderClass, "deletionGOP", "[B"); + assert(fid && "JNI pairHMM: Could not get FID for deletionGOP"); + g_load_time_initializer.m_deletionGOPFID = fid; + fid = env->GetFieldID(readDataHolderClass, "overallGCP", "[B"); + assert(fid && "JNI pairHMM: Could not get FID for overallGCP"); + g_load_time_initializer.m_overallGCPFID = fid; + + fid = env->GetFieldID(haplotypeDataHolderClass, "haplotypeBases", "[B"); + assert(fid && "JNI pairHMM: Could not get FID for haplotypeBases"); + g_load_time_initializer.m_haplotypeBasesFID = fid; + if(mask != ENABLE_ALL_HARDWARE_FEATURES) + { + cout << "Using user supplied hardware mask to re-initialize function pointers for PairHMM\n"; + initialize_function_pointers((uint64_t)mask); + cout.flush(); + } +} + +JNIEXPORT void JNICALL initializeHaplotypes + (JNIEnv * env, jobject& thisObject, jint numHaplotypes, jobjectArray& haplotypeDataArray, + vector >& haplotypeBasesArrayVector, vector& haplotypeBasesLengths) +{ + jboolean is_copy = JNI_FALSE; + haplotypeBasesArrayVector.clear(); + haplotypeBasesLengths.clear(); + haplotypeBasesArrayVector.resize(numHaplotypes); + haplotypeBasesLengths.resize(numHaplotypes); + jsize haplotypeBasesLength = 0; + for(unsigned j=0;jGetObjectArrayElement(haplotypeDataArray, j); + jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID); +#ifdef ENABLE_ASSERTIONS + assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str()); +#endif + //Need a global reference as this will be accessed across multiple JNI calls to JNIComputeLikelihoods() + jbyteArray haplotypeBasesGlobalRef = (jbyteArray)env->NewGlobalRef(haplotypeBases); +#ifdef ENABLE_ASSERTIONS + assert(haplotypeBasesGlobalRef && ("Could not get global ref to haplotypeBases at index : "+to_string(j)+"\n").c_str()); +#endif + env->DeleteLocalRef(haplotypeBases); //free the local reference + jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBasesGlobalRef, &is_copy); + haplotypeBasesLength = env->GetArrayLength(haplotypeBasesGlobalRef); +#ifdef ENABLE_ASSERTIONS + assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI"); + //assert(haplotypeBasesLength < MCOLS); +#endif +#ifdef DEBUG0_1 + cout << "JNI haplotype length "< >& haplotypeBasesArrayVector, vector& haplotypeBasesLengths + ) +{ + //Now release haplotype arrays + for(int j=haplotypeBasesArrayVector.size()-1;j>=0;--j) //note the order - reverse of GET + { + RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RO_RELEASE_MODE); + env->DeleteGlobalRef(haplotypeBasesArrayVector[j].first); //free the global reference + } + haplotypeBasesArrayVector.clear(); + haplotypeBasesLengths.clear(); +} + + +vector > g_haplotypeBasesArrayVector; +vector g_haplotypeBasesLengths; +//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region, +//transfer the list only once +//Works only for ST case as the haplotype data is stored in global variables +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes + (JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray) +{ +#ifdef SINGLE_THREADED_ONLY + //To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector + initializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray, g_haplotypeBasesArrayVector, g_haplotypeBasesLengths); +#endif +} + + +//Create a vector of testcases for computation - copy the references to bytearrays read/readQuals etc into the appropriate +//testcase struct +inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector + (JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray, + vector > >& readBasesArrayVector, + vector >& haplotypeBasesArrayVector, vector& haplotypeBasesLengths, + vector& tc_array) +{ + jboolean is_copy = JNI_FALSE; + unsigned tc_idx = 0; + for(unsigned i=0;iGetObjectArrayElement(readDataArray, i); + jbyteArray readBases = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_readBasesFID); + jbyteArray insertionGOP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_insertionGOPFID); + jbyteArray deletionGOP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_deletionGOPFID); + jbyteArray overallGCP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_overallGCPFID); + jbyteArray readQuals = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_readQualsFID); + +#ifdef ENABLE_ASSERTIONS + assert(readBases && ("readBases is NULL at index : "+to_string(i)+"\n").c_str()); + assert(insertionGOP && ("insertionGOP is NULL at index : "+to_string(i)+"\n").c_str()); + assert(deletionGOP && ("deletionGOP is NULL at index : "+to_string(i)+"\n").c_str()); + assert(overallGCP && ("overallGCP is NULL at index : "+to_string(i)+"\n").c_str()); + assert(readQuals && ("readQuals is NULL at index : "+to_string(i)+"\n").c_str()); +#endif + jsize readLength = env->GetArrayLength(readBases); + + jbyte* readBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readBases, &is_copy); //order of GET-RELEASE is important + jbyte* readQualsArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readQuals, &is_copy); + jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy); + jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy); + jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy); +#ifdef DO_PROFILING + g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0); + g_load_time_initializer.update_stat(READ_LENGTH_IDX, readLength); +#endif +#ifdef ENABLE_ASSERTIONS + assert(readBasesArray && "readBasesArray not initialized in JNI"); + assert(readQualsArray && "readQualsArray not initialized in JNI"); + assert(insertionGOPArray && "insertionGOP array not initialized in JNI"); + assert(deletionGOPArray && "deletionGOP array not initialized in JNI"); + assert(overallGCPArray && "overallGCP array not initialized in JNI"); + //assert(readLength < MROWS); + assert(readLength == env->GetArrayLength(readQuals)); + assert(readLength == env->GetArrayLength(insertionGOP)); + assert(readLength == env->GetArrayLength(deletionGOP)); + assert(readLength == env->GetArrayLength(overallGCP)); +#endif +#ifdef DEBUG0_1 + cout << "JNI read length "<& tc_array, unsigned numTestCases, double* likelihoodDoubleArray, + unsigned maxNumThreadsToUse) +{ +#ifdef DO_REPEAT_PROFILING + for(unsigned i=0;i<10;++i) +#endif + { +#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse) + for(unsigned tc_idx=0;tc_idx > >& readBasesArrayVector) +{ + //Release read arrays first + for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET + { + for(int j=readBasesArrayVector[i].size()-1;j>=0;--j) + RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE); + readBasesArrayVector[i].clear(); + } + readBasesArrayVector.clear(); +} + + +#ifdef DO_WARMUP +uint64_t g_sum = 0; +#endif +//JNI function to invoke compute_full_prob_avx +//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc +//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases +//likelihoodArray - array of doubles to return results back to Java. Memory allocated by Java prior to JNI call +//maxNumThreadsToUse - Max number of threads that OpenMP can use for the HMM computation +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods + (JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes, + jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse) +{ +#ifdef DEBUG0_1 + cout << "JNI numReads "< tc_array; + tc_array.clear(); + tc_array.resize(numTestCases); + //Store read arrays for release later + vector > > readBasesArrayVector; + readBasesArrayVector.clear(); + readBasesArrayVector.resize(numReads); +#ifdef DUMP_TO_SANDBOX + g_load_time_initializer.open_sandbox(); +#endif +#ifdef DO_PROFILING + get_time(&start_time); +#endif + +#ifdef SINGLE_THREADED_ONLY + vector >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector; + vector& haplotypeBasesLengths = g_haplotypeBasesLengths; +#else + vector > l_haplotypeBasesArrayVector; + vector >& haplotypeBasesArrayVector = l_haplotypeBasesArrayVector; + vector l_haplotypeBasesLengths; + vector& haplotypeBasesLengths = l_haplotypeBasesLengths; + initializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray, haplotypeBasesArrayVector, haplotypeBasesLengths); +#endif + //Copy byte array references from Java memory into vector of testcase structs + Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector(env, + numReads, numHaplotypes, readDataArray, readBasesArrayVector, haplotypeBasesArrayVector, haplotypeBasesLengths, tc_array); + +#ifdef DO_PROFILING + g_load_time_initializer.m_data_transfer_time += diff_time(start_time); +#endif + + //Get double array where results are stored (to pass back to java) + jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy); +#ifdef ENABLE_ASSERTIONS + assert(likelihoodDoubleArray && "likelihoodArray is NULL"); + assert(env->GetArrayLength(likelihoodArray) == numTestCases); +#endif +#ifdef DO_WARMUP //ignore - only for crazy profiling + for(unsigned i=0;iGetArrayLength(haplotypeBasesArrayVector[i].first); + for(unsigned j=0;jGetArrayLength(readBasesArrayVector[i][j].first); + for(unsigned k=0;k +/* Header for class org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM */ + +#ifndef _Included_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM +#define _Included_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM +#ifdef __cplusplus +extern "C" { +#endif +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_TRISTATE_CORRECTION +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_TRISTATE_CORRECTION 3.0 +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToMatch +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToMatch 0L +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_indelToMatch +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_indelToMatch 1L +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToInsertion +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToInsertion 2L +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_insertionToInsertion +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_insertionToInsertion 3L +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToDeletion +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_matchToDeletion 4L +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_deletionToDeletion +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_deletionToDeletion 5L +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_sse42Mask +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_sse42Mask 1LL +#undef org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_avxMask +#define org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_avxMask 2LL +/* + * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM + * Method: jniGetMachineType + * Signature: ()J + */ +JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType + (JNIEnv *, jobject); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM + * Method: jniClose + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniClose + (JNIEnv *, jobject); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM + * Method: jniInitializeClassFieldsAndMachineMask + * Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask + (JNIEnv *, jobject, jclass, jclass, jlong); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM + * Method: jniInitializeHaplotypes + * Signature: (I[Lorg/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM/JNIHaplotypeDataHolderClass;)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes + (JNIEnv *, jobject, jint, jobjectArray); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM + * Method: jniFinalizeRegion + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniFinalizeRegion + (JNIEnv *, jobject); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM + * Method: jniComputeLikelihoods + * Signature: (II[Lorg/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM/JNIHaplotypeDataHolderClass;[DI)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods + (JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc b/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc new file mode 100644 index 000000000..d2cc7d903 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/pairhmm-1-base.cc @@ -0,0 +1,66 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +#include "headers.h" +#include "utils.h" +#include "LoadTimeInitializer.h" +using namespace std; + +int main(int argc, char** argv) +{ +#define BATCH_SIZE 10000 + if(argc < 2) + { + cerr << "Needs path to input file as argument\n"; + exit(0); + } + bool use_old_read_testcase = false; + if(argc >= 3 && string(argv[2]) == "1") + use_old_read_testcase = true; + unsigned chunk_size = 10000; + bool do_check = true; + uint64_t mask = ~(0ull); + for(int i=3;i +#include +#include + + +void CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) { + + const int maskBitCnt = MAIN_TYPE_SIZE ; + + for (int vi=0; vi < numMaskVecs; ++vi) { + for (int rs=0; rs < NUM_DISTINCT_CHARS; ++rs) { + maskArr[vi][rs] = 0 ; + } + maskArr[vi][AMBIG_CHAR] = MASK_ALL_ONES ; + } + + for (int col=1; col < COLS; ++col) { + int mIndex = (col-1) / maskBitCnt ; + int mOffset = (col-1) % maskBitCnt ; + MASK_TYPE bitMask = ((MASK_TYPE)0x1) << (maskBitCnt-1-mOffset) ; + + char hapChar = ConvertChar::get(tc.hap[col-1]); + + if (hapChar == AMBIG_CHAR) { + for (int ci=0; ci < NUM_DISTINCT_CHARS; ++ci) + maskArr[mIndex][ci] |= bitMask ; + } + + maskArr[mIndex][hapChar] |= bitMask ; + // bit corresponding to col 1 will be the MSB of the mask 0 + // bit corresponding to col 2 will be the MSB-1 of the mask 0 + // ... + // bit corresponding to col 32 will be the LSB of the mask 0 + // bit corresponding to col 33 will be the MSB of the mask 1 + // ... + } + +} + +void CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) { + + for (int ri=0; ri < numRowsToProcess; ++ri) { + rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ; + } + + for (int ei=0; ei < AVX_LENGTH; ++ei) { + lastMaskShiftOut[ei] = 0 ; + } +} + +#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \ + MASK_TYPE __bitMask = (((MASK_TYPE)0x1) << __shiftBy) - 1 ; \ + MASK_TYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \ + __dstMask = (__srcMask >> __shiftBy) | __lastShiftOut ; \ + __lastShiftOut = __nextShiftOut ; \ +} + + +void CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)(int maskIndex, BITMASK_VEC& bitMaskVec, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, int maskBitCnt) { + + for (int ei=0; ei < AVX_LENGTH/2; ++ei) { + SET_MASK_WORD(bitMaskVec.getLowEntry(ei), maskArr[maskIndex][rsArr[ei]], + lastMaskShiftOut[ei], ei, maskBitCnt) ; + + int ei2 = ei + AVX_LENGTH/2 ; // the second entry index + SET_MASK_WORD(bitMaskVec.getHighEntry(ei), maskArr[maskIndex][rsArr[ei2]], + lastMaskShiftOut[ei2], ei2, maskBitCnt) ; + } + +} + + +inline void CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (BITMASK_VEC& bitMaskVec, SIMD_TYPE& distm, SIMD_TYPE& _1_distm, SIMD_TYPE& distmChosen) { + + distmChosen = VEC_BLENDV(distm, _1_distm, bitMaskVec.getCombinedMask()) ; + + bitMaskVec.shift_left_1bit() ; +} + +/* + * This function: + * 1- Intializes probability values p_MM, p_XX, P_YY, p_MX, p_GAPM and pack them into vectors (SSE or AVX) + * 2- Precompute parts of "distm" which only depeneds on a row number and pack it into vector + */ + +template void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, SIMD_TYPE *p_MM, SIMD_TYPE *p_GAPM, SIMD_TYPE *p_MX, SIMD_TYPE *p_XX, SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, SIMD_TYPE *distm1D) +{ + NUMBER zero = ctx._(0.0); + NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen); + for (int s=0;si[r-1] & 127; + int _d = tc->d[r-1] & 127; + int _c = tc->c[r-1] & 127; + + //*(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127]; + SET_MATCH_TO_MATCH_PROB(*(ptr_p_MM+r-1), _i, _d); + *(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c]; + *(ptr_p_MX+r-1) = ctx.ph2pr[_i]; + *(ptr_p_XX+r-1) = ctx.ph2pr[_c]; + *(ptr_p_MY+r-1) = ctx.ph2pr[_d]; + *(ptr_p_YY+r-1) = ctx.ph2pr[_c]; + } + + NUMBER *ptr_distm1D = (NUMBER *)distm1D; + for (int r = 1; r < ROWS; r++) + { + int _q = tc->q[r-1] & 127; + ptr_distm1D[r-1] = ctx.ph2pr[_q]; + } +} + +/* + * This function handles pre-stripe computation: + * 1- Retrieve probaility vectors from memory + * 2- Initialize M, X, Y vectors with all 0's (for the first stripe) and shifting the last row from previous stripe for the rest + */ + +template inline void CONCAT(CONCAT(stripeINITIALIZATION,SIMD_ENGINE), PRECISION)( + int stripeIdx, Context ctx, testcase *tc, SIMD_TYPE &pGAPM, SIMD_TYPE &pMM, SIMD_TYPE &pMX, SIMD_TYPE &pXX, SIMD_TYPE &pMY, SIMD_TYPE &pYY, + SIMD_TYPE &rs, UNION_TYPE &rsN, SIMD_TYPE &distm, SIMD_TYPE &_1_distm, SIMD_TYPE *distm1D, SIMD_TYPE N_packed256, SIMD_TYPE *p_MM , SIMD_TYPE *p_GAPM , + SIMD_TYPE *p_MX, SIMD_TYPE *p_XX , SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1, + UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM) +{ + int i = stripeIdx; + pGAPM = p_GAPM[i]; + pMM = p_MM[i]; + pMX = p_MX[i]; + pXX = p_XX[i]; + pMY = p_MY[i]; + pYY = p_YY[i]; + + NUMBER zero = ctx._(0.0); + NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen); + UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0); + UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(3.0); + + distm = distm1D[i]; + _1_distm = VEC_SUB(packed1.d, distm); + + distm = VEC_DIV(distm, packed3.d); + + /* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */ + M_t_2.d = VEC_SET1_VAL(zero); + X_t_2.d = VEC_SET1_VAL(zero); + + if (i==0) { + M_t_1.d = VEC_SET1_VAL(zero); + X_t_1.d = VEC_SET1_VAL(zero); + Y_t_2.d = VEC_SET_LSE(init_Y); + Y_t_1.d = VEC_SET1_VAL(zero); + } + else { + X_t_1.d = VEC_SET_LSE(shiftOutX[AVX_LENGTH]); + M_t_1.d = VEC_SET_LSE(shiftOutM[AVX_LENGTH]); + Y_t_2.d = VEC_SET1_VAL(zero); + Y_t_1.d = VEC_SET1_VAL(zero); + } + M_t_1_y = M_t_1; +} + +/* + * This function is the main compute kernel to compute M, X and Y + */ + +inline void CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, + UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1, + SIMD_TYPE pMM, SIMD_TYPE pGAPM, SIMD_TYPE pMX, SIMD_TYPE pXX, SIMD_TYPE pMY, SIMD_TYPE pYY, SIMD_TYPE distmSel) +{ + /* Compute M_t <= distm * (p_MM*M_t_2 + p_GAPM*X_t_2 + p_GAPM*Y_t_2) */ + M_t.d = VEC_MUL(VEC_ADD(VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(X_t_2.d, pGAPM)), VEC_MUL(Y_t_2.d, pGAPM)), distmSel); + //M_t.d = VEC_MUL( VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(VEC_ADD(X_t_2.d, Y_t_2.d), pGAPM)), distmSel); + + M_t_y = M_t; + + /* Compute X_t */ + X_t.d = VEC_ADD(VEC_MUL(M_t_1.d, pMX) , VEC_MUL(X_t_1.d, pXX)); + + /* Compute Y_t */ + Y_t.d = VEC_ADD(VEC_MUL(M_t_1_y.d, pMY) , VEC_MUL(Y_t_1.d, pYY)); +} + +/* + * This is the main compute function. It operates on the matrix in s stripe manner. + * The stripe height is determined by the SIMD engine type. + * Stripe height: "AVX float": 8, "AVX double": 4, "SSE float": 4, "SSE double": 2 + * For each stripe the operations are anti-diagonal based. + * Each anti-diagonal (M_t, Y_t, X_t) depends on the two previous anti-diagonals (M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, Y_t_1). + * Each stripe (except the fist one) depends on the last row of the previous stripe. + * The last stripe computation handles the addition of the last row of M and X, that's the reason for loop spliting. + */ + +template NUMBER CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL) +{ + int ROWS = tc->rslen + 1; + int COLS = tc->haplen + 1; + int MAVX_COUNT = (ROWS+AVX_LENGTH-1)/AVX_LENGTH; + + /* Probaility arrays */ + SIMD_TYPE p_MM [MAVX_COUNT], p_GAPM [MAVX_COUNT], p_MX [MAVX_COUNT]; + SIMD_TYPE p_XX [MAVX_COUNT], p_MY [MAVX_COUNT], p_YY [MAVX_COUNT]; + + /* For distm precomputation */ + SIMD_TYPE distm1D[MAVX_COUNT]; + + /* Carries the values from each stripe to the next stripe */ + NUMBER shiftOutM[ROWS+COLS+AVX_LENGTH], shiftOutX[ROWS+COLS+AVX_LENGTH], shiftOutY[ROWS+COLS+AVX_LENGTH]; + + /* The vector to keep the anti-diagonals of M, X, Y*/ + /* Current: M_t, X_t, Y_t */ + /* Previous: M_t_1, X_t_1, Y_t_1 */ + /* Previous to previous: M_t_2, X_t_2, Y_t_2 */ + UNION_TYPE M_t, M_t_1, M_t_2, X_t, X_t_1, X_t_2, Y_t, Y_t_1, Y_t_2, M_t_y, M_t_1_y; + + /* Probality vectors */ + SIMD_TYPE pGAPM, pMM, pMX, pXX, pMY, pYY; + + struct timeval start, end; + NUMBER result_avx2; + Context ctx; + UNION_TYPE rs , rsN; + HAP_TYPE hap; + SIMD_TYPE distmSel, distmChosen ; + SIMD_TYPE distm, _1_distm; + + int r, c; + NUMBER zero = ctx._(0.0); + UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0); + SIMD_TYPE N_packed256 = VEC_POPCVT_CHAR('N'); + NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen); + int remainingRows = (ROWS-1) % AVX_LENGTH; + int stripe_cnt = ((ROWS-1) / AVX_LENGTH) + (remainingRows!=0); + + const int maskBitCnt = MAIN_TYPE_SIZE ; + const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function + + /* Mask precomputation for distm*/ + MASK_TYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ; + CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(*tc, COLS, numMaskVecs, maskArr) ; + + char rsArr[AVX_LENGTH] ; + MASK_TYPE lastMaskShiftOut[AVX_LENGTH] ; + + /* Precompute initialization for probabilities and shift vector*/ + CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY, + ctx, tc, p_MM, p_GAPM, p_MX, p_XX, p_MY, p_YY, distm1D); + + for (int i=0;i(&tc[b]); + +#ifdef RUN_HYBRID +#define MIN_ACCEPTED 1e-28f + if (result_avxf < MIN_ACCEPTED) { + count++; + result_avxd = CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), d)(&tc[b]); + result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f)); + } + else + result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)); +#endif + +#ifndef RUN_HYBRID + result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)); +#endif + } + aggregateTimeCompute += (getCurrClk() - lastClk) ; + lastClk = getCurrClk() ; + for (int b=0;b(testcase* tc, double* nextlog); +template float compute_full_prob_sses(testcase* tc, float* nextlog); diff --git a/public/VectorPairHMM/src/main/c++/template.h b/public/VectorPairHMM/src/main/c++/template.h new file mode 100644 index 000000000..ce4dbfc86 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/template.h @@ -0,0 +1,320 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef TEMPLATES_H_ +#define TEMPLATES_H_ + +#include "headers.h" + +#define MM 0 +#define GapM 1 +#define MX 2 +#define XX 3 +#define MY 4 +#define YY 5 + +//#define MROWS 500 +//#define MCOLS 1000 + +#define CAT(X,Y) X####Y +#define CONCAT(X,Y) CAT(X,Y) + +#define ALIGNED __attribute__((aligned(32))) + +typedef union __attribute__((aligned(32))) { + ALIGNED __m256 ALIGNED d; + ALIGNED __m128i ALIGNED s[2]; + ALIGNED float ALIGNED f[8]; + ALIGNED __m256i ALIGNED i; +} ALIGNED mix_F ALIGNED; + +typedef union __attribute__((aligned(32))) { + ALIGNED __m128 ALIGNED d; + ALIGNED __m64 ALIGNED s[2]; + ALIGNED float ALIGNED f[4]; + ALIGNED __m128i ALIGNED i; +} ALIGNED mix_F128 ALIGNED; + +typedef union ALIGNED { + __m128i vec ; + __m128 vecf ; + uint32_t masks[4] ; +} MaskVec_F ; + +typedef union ALIGNED { + __m64 vec ; + __m64 vecf ; + uint32_t masks[2] ; +} MaskVec_F128 ; + +typedef union ALIGNED +{ + ALIGNED __m128i ALIGNED i; + ALIGNED __m128 ALIGNED f; +} ALIGNED IF_128f ALIGNED; + +typedef union ALIGNED +{ + ALIGNED int ALIGNED i; + ALIGNED float ALIGNED f; +} ALIGNED IF_32 ALIGNED; + +typedef union __attribute__((aligned(32))) { + ALIGNED __m256d ALIGNED d; + ALIGNED __m128i ALIGNED s[2]; + ALIGNED double ALIGNED f[4]; + ALIGNED __m256i ALIGNED i; +} ALIGNED mix_D ALIGNED; + +typedef union __attribute__((aligned(32))) { + ALIGNED __m128d ALIGNED d; + ALIGNED __m64 ALIGNED s[2]; + ALIGNED double ALIGNED f[2]; + ALIGNED __m128i ALIGNED i; +} ALIGNED mix_D128 ALIGNED; + +typedef union ALIGNED { + __m128i vec ; + __m128d vecf ; + uint64_t masks[2] ; +} MaskVec_D ; + +typedef union ALIGNED { + __m64 vec ; + __m64 vecf ; + uint64_t masks[1] ; +} MaskVec_D128 ; + +typedef union ALIGNED +{ + ALIGNED __m128i ALIGNED i; + ALIGNED __m128d ALIGNED f; +} ALIGNED IF_128d ALIGNED; + +typedef union ALIGNED +{ + ALIGNED int64_t ALIGNED i; + ALIGNED double ALIGNED f; +} ALIGNED IF_64 ALIGNED; + + +#define MAX_QUAL 254 +#define MAX_JACOBIAN_TOLERANCE 8.0 +#define JACOBIAN_LOG_TABLE_STEP 0.0001 +#define JACOBIAN_LOG_TABLE_INV_STEP (1.0 / JACOBIAN_LOG_TABLE_STEP) +#define MAXN 70000 +#define LOG10_CACHE_SIZE (4*MAXN) // we need to be able to go up to 2*(2N) when calculating some of the coefficients +#define JACOBIAN_LOG_TABLE_SIZE ((int) (MAX_JACOBIAN_TOLERANCE / JACOBIAN_LOG_TABLE_STEP) + 1) + +template +struct ContextBase +{ + public: + NUMBER ph2pr[128]; + NUMBER INITIAL_CONSTANT; + NUMBER LOG10_INITIAL_CONSTANT; + NUMBER RESULT_THRESHOLD; + + static bool staticMembersInitializedFlag; + static NUMBER jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; + static NUMBER matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; + + static void initializeStaticMembers() + { + if(!staticMembersInitializedFlag) + { + //Order of calls important - Jacobian first, then MatchToMatch + initializeJacobianLogTable(); + initializeMatchToMatchProb(); + staticMembersInitializedFlag = true; + } + } + + static void deleteStaticMembers() + { + if(staticMembersInitializedFlag) + { + staticMembersInitializedFlag = false; + } + } + + //Called only once during library load - don't bother to optimize with single precision fp + static void initializeJacobianLogTable() + { + for (int k = 0; k < JACOBIAN_LOG_TABLE_SIZE; k++) { + jacobianLogTable[k] = (NUMBER)(log10(1.0 + pow(10.0, -((double) k) * JACOBIAN_LOG_TABLE_STEP))); + } + } + + //Called only once per library load - don't bother optimizing with single fp + static void initializeMatchToMatchProb() + { + double LN10 = log(10); + double INV_LN10 = 1.0/LN10; + for (int i = 0, offset = 0; i <= MAX_QUAL; offset += ++i) + for (int j = 0; j <= i; j++) { + double log10Sum = approximateLog10SumLog10(-0.1*i, -0.1*j); + double matchToMatchLog10 = + log1p(-std::min(1.0,pow(10,log10Sum))) * INV_LN10; + matchToMatchProb[offset + j] = (NUMBER)(pow(10,matchToMatchLog10)); + } + } + //Called during computation - use single precision where possible + static int fastRound(NUMBER d) { + return (d > ((NUMBER)0.0)) ? (int) (d + ((NUMBER)0.5)) : (int) (d - ((NUMBER)0.5)); + } + //Called during computation - use single precision where possible + static NUMBER approximateLog10SumLog10(NUMBER small, NUMBER big) { + // make sure small is really the smaller value + if (small > big) { + NUMBER t = big; + big = small; + small = t; + } + + if (isinf(small) == -1 || isinf(big) == -1) + return big; + + NUMBER diff = big - small; + if (diff >= ((NUMBER)MAX_JACOBIAN_TOLERANCE)) + return big; + + // OK, so |y-x| < tol: we use the following identity then: + // we need to compute log10(10^x + 10^y) + // By Jacobian logarithm identity, this is equal to + // max(x,y) + log10(1+10^-abs(x-y)) + // we compute the second term as a table lookup with integer quantization + // we have pre-stored correction for 0,0.1,0.2,... 10.0 + int ind = fastRound((NUMBER)(diff * ((NUMBER)JACOBIAN_LOG_TABLE_INV_STEP))); // hard rounding + return big + jacobianLogTable[ind]; + } +}; + +template +struct Context : public ContextBase +{}; + +template<> +struct Context : public ContextBase +{ + Context():ContextBase() + { + for (int x = 0; x < 128; x++) + ph2pr[x] = pow(10.0, -((double)x) / 10.0); + + INITIAL_CONSTANT = ldexp(1.0, 1020.0); + LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT); + RESULT_THRESHOLD = 0.0; + } + + double LOG10(double v){ return log10(v); } + inline double POW(double b, double e) { return pow(b,e); } + + static double _(double n){ return n; } + static double _(float n){ return ((double) n); } +}; + +template<> +struct Context : public ContextBase +{ + Context() : ContextBase() + { + for (int x = 0; x < 128; x++) + { + ph2pr[x] = powf(10.f, -((float)x) / 10.f); + } + + INITIAL_CONSTANT = ldexpf(1.f, 120.f); + LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT); + RESULT_THRESHOLD = ldexpf(1.f, -110.f); + } + + float LOG10(float v){ return log10f(v); } + inline float POW(float b, float e) { return powf(b,e); } + + static float _(double n){ return ((float) n); } + static float _(float n){ return n; } +}; + +#define SET_MATCH_TO_MATCH_PROB(output, insQual, delQual) \ +{ \ + int minQual = delQual; \ + int maxQual = insQual; \ + if (insQual <= delQual) \ + { \ + minQual = insQual; \ + maxQual = delQual; \ + } \ + (output) = (MAX_QUAL < maxQual) ? \ + ((NUMBER)1.0) - ctx.POW(((NUMBER)10), ctx.approximateLog10SumLog10(((NUMBER)-0.1)*minQual, ((NUMBER)-0.1)*maxQual)) \ + : ctx.matchToMatchProb[((maxQual * (maxQual + 1)) >> 1) + minQual]; \ +} + +typedef struct +{ + int rslen, haplen; + /*int *q, *i, *d, *c;*/ + /*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/ + char *q, *i, *d, *c; + char *hap, *rs; + int *ihap; + int *irs; +} testcase; + +int normalize(char c); +int read_testcase(testcase *tc, FILE* ifp=0); + + +#define MIN_ACCEPTED 1e-28f +#define NUM_DISTINCT_CHARS 5 +#define AMBIG_CHAR 4 + +class ConvertChar { + + static uint8_t conversionTable[255] ; + +public: + + static void init() { + assert (NUM_DISTINCT_CHARS == 5) ; + assert (AMBIG_CHAR == 4) ; + + conversionTable['A'] = 0 ; + conversionTable['C'] = 1 ; + conversionTable['T'] = 2 ; + conversionTable['G'] = 3 ; + conversionTable['N'] = 4 ; + } + + static inline uint8_t get(uint8_t input) { + return conversionTable[input] ; + } + +}; + + +#endif + + diff --git a/public/VectorPairHMM/src/main/c++/utils.cc b/public/VectorPairHMM/src/main/c++/utils.cc new file mode 100644 index 000000000..6c623e9e5 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/utils.cc @@ -0,0 +1,496 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#include "headers.h" +#include "template.h" +#include "utils.h" +#include "vector_defs.h" +#include "LoadTimeInitializer.h" +using namespace std; + +//static members from ConvertChar +uint8_t ConvertChar::conversionTable[255]; +//Global function pointers in utils.h +float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0; +double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0; +//Static members in ContextBase +bool ContextBase::staticMembersInitializedFlag = false; +double ContextBase::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; +double ContextBase::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; +bool ContextBase::staticMembersInitializedFlag = false; +float ContextBase::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE]; +float ContextBase::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1]; + + +bool is_avx_supported() +{ + return (_may_i_use_cpu_feature(_FEATURE_AVX) > 0); + //int ecx = 0, edx = 0, ebx = 0; + //__asm__("cpuid" + //: "=b" (ebx), + //"=c" (ecx), + //"=d" (edx) + //: "a" (1) + //); + //return ((ecx >> 28)&1) == 1; +} + +bool is_sse41_supported() +{ + return (_may_i_use_cpu_feature(_FEATURE_SSE4_1) > 0); + //int ecx = 0, edx = 0, ebx = 0; + //__asm__("cpuid" + //: "=b" (ebx), + //"=c" (ecx), + //"=d" (edx) + //: "a" (1) + //); + //return ((ecx >> 19)&1) == 1; +} + +bool is_sse42_supported() +{ + return (_may_i_use_cpu_feature(_FEATURE_SSE4_2) > 0); + //int ecx = 0, edx = 0, ebx = 0; + //__asm__("cpuid" + //: "=b" (ebx), + //"=c" (ecx), + //"=d" (edx) + //: "a" (1) + //); + //return ((ecx >> 20)&1) == 1; +} + +uint64_t get_machine_capabilities() +{ + uint64_t machine_mask = 0ull; + if(is_avx_supported()) + machine_mask |= (1 << AVX_CUSTOM_IDX); + if(is_sse42_supported()) + machine_mask |= (1 << SSE42_CUSTOM_IDX); + if(is_sse41_supported()) + machine_mask |= (1 << SSE41_CUSTOM_IDX); + return machine_mask; +} + +void initialize_function_pointers(uint64_t mask) +{ + //mask = 0ull; + //mask = (1 << SSE41_CUSTOM_IDX); + if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX))) + { + cout << "Using AVX accelerated implementation of PairHMM\n"; + g_compute_full_prob_float = compute_full_prob_avxs; + g_compute_full_prob_double = compute_full_prob_avxd; + } + else + if(is_sse41_supported() && (mask & ((1<< SSE41_CUSTOM_IDX) | (1<; + g_compute_full_prob_double = compute_full_prob_ssed; + } + else + { + cout << "Using un-vectorized C++ implementation of PairHMM\n"; + g_compute_full_prob_float = compute_full_prob; + g_compute_full_prob_double = compute_full_prob; + } +} + +int normalize(char c) +{ + return ((int) (c - 33)); +} + +int read_testcase(testcase *tc, FILE* ifp) +{ + char *q, *i, *d, *c, *line = NULL; + int _q, _i, _d, _c; + int x, size = 0; + ssize_t read; + + + read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp); + if (read == -1) + { + free(line); + return -1; + } + + + tc->hap = (char *) malloc(size); + tc->rs = (char *) malloc(size); + q = (char *) malloc(size); + i = (char *) malloc(size); + d = (char *) malloc(size); + c = (char *) malloc(size); + + if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6) + return -1; + + + tc->haplen = strlen(tc->hap); + tc->rslen = strlen(tc->rs); + assert(strlen(q) == tc->rslen); + assert(strlen(i) == tc->rslen); + assert(strlen(d) == tc->rslen); + assert(strlen(c) == tc->rslen); + //assert(tc->rslen < MROWS); + //tc->ihap = (int *) malloc(tc->haplen*sizeof(int)); + //tc->irs = (int *) malloc(tc->rslen*sizeof(int)); + + tc->q = (char *) malloc(sizeof(char) * tc->rslen); + tc->i = (char *) malloc(sizeof(char) * tc->rslen); + tc->d = (char *) malloc(sizeof(char) * tc->rslen); + tc->c = (char *) malloc(sizeof(char) * tc->rslen); + + for (x = 0; x < tc->rslen; x++) + { + _q = normalize(q[x]); + _i = normalize(i[x]); + _d = normalize(d[x]); + _c = normalize(c[x]); + tc->q[x] = (_q < 6) ? 6 : _q; + //tc->q[x] = _q; + tc->i[x] = _i; + tc->d[x] = _d; + tc->c[x] = _c; + //tc->irs[x] = tc->rs[x]; + } + //for (x = 0; x < tc->haplen; x++) + //tc->ihap[x] = tc->hap[x]; + + free(q); + free(i); + free(d); + free(c); + free(line); + + + + return 0; +} + +unsigned MAX_LINE_LENGTH = 65536; +int convToInt(std::string s) +{ + int i; + std::istringstream strin(s); + strin >> i; + return i; +} + +void tokenize(std::ifstream& fptr, std::vector& tokens) +{ + int i = 0; + std::string tmp; + std::vector myVec; + vector line; + line.clear(); + line.resize(MAX_LINE_LENGTH); + vector tmpline; + tmpline.clear(); + tmpline.resize(MAX_LINE_LENGTH); + myVec.clear(); + + while(!fptr.eof()) + { + i = 0; + bool still_read_line = true; + unsigned line_position = 0; + while(still_read_line) + { + fptr.getline(&(tmpline[0]), MAX_LINE_LENGTH); + if(line_position + MAX_LINE_LENGTH > line.size()) + line.resize(2*line.size()); + for(unsigned i=0;i> std::skipws >> tmp; + if(tmp != "") + { + myVec.push_back(tmp); + ++i; + //std::cout < 0) + break; + } + tokens.clear(); + //std::cout << "Why "< tokens; + tokens.clear(); + tokenize(fptr, tokens); + if(tokens.size() == 0) + return -1; + tc->hap = new char[tokens[0].size()+2]; + tc->haplen = tokens[0].size(); + memcpy(tc->hap, tokens[0].c_str(), tokens[0].size()); + tc->rs = new char[tokens[1].size()+2]; + tc->rslen = tokens[1].size(); + tc->q = new char[tc->rslen]; + tc->i = new char[tc->rslen]; + tc->d = new char[tc->rslen]; + tc->c = new char[tc->rslen]; + //cout << "Lengths "<haplen <<" "<rslen<<"\n"; + memcpy(tc->rs, tokens[1].c_str(),tokens[1].size()); + assert(tokens.size() == 2 + 4*(tc->rslen)); + //assert(tc->rslen < MROWS); + for(unsigned j=0;jrslen;++j) + tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]); + for(unsigned j=0;jrslen;++j) + tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]); + for(unsigned j=0;jrslen;++j) + tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]); + for(unsigned j=0;jrslen;++j) + tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]); + + if(reformat) + { + ofstream ofptr; + ofptr.open("reformat/debug_dump.txt",first_call ? ios::out : ios::app); + assert(ofptr.is_open()); + ofptr << tokens[0] << " "; + ofptr << tokens[1] << " "; + for(unsigned j=0;jrslen;++j) + ofptr << ((char)(tc->q[j]+33)); + ofptr << " "; + for(unsigned j=0;jrslen;++j) + ofptr << ((char)(tc->i[j]+33)); + ofptr << " "; + for(unsigned j=0;jrslen;++j) + ofptr << ((char)(tc->d[j]+33)); + ofptr << " "; + for(unsigned j=0;jrslen;++j) + ofptr << ((char)(tc->c[j]+33)); + ofptr << " 0 false\n"; + + ofptr.close(); + first_call = false; + } + + + return tokens.size(); +} + +double getCurrClk() { + struct timeval tv ; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; +} + +inline unsigned long long rdtsc(void) +{ + unsigned hi, lo; + __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); + return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); +} + +void get_time(struct timespec* store_struct) +{ + clock_gettime(CLOCK_REALTIME, store_struct); +} + +uint64_t diff_time(struct timespec& prev_time) +{ + struct timespec curr_time; + clock_gettime(CLOCK_REALTIME, &curr_time); + return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec)); +} + + +#ifdef USE_PAPI +#include "papi.h" +#define NUM_PAPI_COUNTERS 4 +#endif + +void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, bool do_check) +{ + FILE* fptr = 0; + ifstream ifptr; + if(use_old_read_testcase) + { + fptr = fopen(filename,"r"); + assert(fptr); + } + else + { + ifptr.open(filename); + assert(ifptr.is_open()); + } + vector tc_vector; + tc_vector.clear(); + testcase tc; + uint64_t vector_compute_time = 0; + uint64_t baseline_compute_time = 0; + uint64_t num_double_calls = 0; + unsigned num_testcases = 0; + bool all_ok = do_check ? true : false; +#ifdef USE_PAPI + uint32_t all_mask = (0); + uint32_t no_usr_mask = (1 << 16); //bit 16 user mode, bit 17 kernel mode + uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode + PAPI_num_counters(); + int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; + char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "itlb_walk_cycles", "dtlb_load_walk_cycles", "dtlb_store_walk_cycles" }; + assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK); + assert(PAPI_event_name_to_code("ITLB_MISSES:WALK_DURATION", &(events[1])) == PAPI_OK); + assert(PAPI_event_name_to_code("DTLB_LOAD_MISSES:WALK_DURATION", &(events[2])) == PAPI_OK); + assert(PAPI_event_name_to_code("DTLB_STORE_MISSES:WALK_DURATION", &(events[3])) == PAPI_OK); + long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; + long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; +#endif + while(1) + { + int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true); + if(break_value >= 0) + tc_vector.push_back(tc); + if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0)) + { + vector results_vec; + vector baseline_results_vec; + results_vec.clear(); + baseline_results_vec.clear(); + results_vec.resize(tc_vector.size()); + baseline_results_vec.resize(tc_vector.size()); + struct timespec start_time; +#ifdef USE_PAPI + assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK); +#endif + get_time(&start_time); +#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12) +#ifdef DO_REPEAT_PROFILING + for(unsigned z=0;z<10;++z) +#endif + { + for(unsigned i=0;i(&tc); + baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0)); + baseline_results_vec[i] = baseline_result; + } + baseline_compute_time += diff_time(start_time); + for(unsigned i=0;i 1e-5 && rel_error > 1e-5) + { + cout << std::scientific << baseline_result << " "< +std::string to_string(T obj) +{ + std::stringstream ss; + std::string ret_string; + ss.clear(); + ss << std::scientific << obj; + ss >> ret_string; + ss.clear(); + return ret_string; +} +void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); + +int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false); + +bool is_avx_supported(); +bool is_sse42_supported(); +extern float (*g_compute_full_prob_float)(testcase *tc, float *before_last_log); +extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log); +void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline); +template +NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); +double getCurrClk(); +void get_time(struct timespec* x); +uint64_t diff_time(struct timespec& prev_time); + +//bit 0 is sse4.2, bit 1 is AVX +enum ProcessorCapabilitiesEnum +{ + SSE41_CUSTOM_IDX=0, + SSE42_CUSTOM_IDX, + AVX_CUSTOM_IDX +}; +#define ENABLE_ALL_HARDWARE_FEATURES 0xFFFFFFFFFFFFFFFFull +uint64_t get_machine_capabilities(); +void initialize_function_pointers(uint64_t mask=ENABLE_ALL_HARDWARE_FEATURES); +void do_compute(char* filename, bool use_old_read_testcase=true, unsigned chunk_size=10000, bool do_check=true); + +//#define DO_WARMUP +//#define DO_REPEAT_PROFILING +/*#define DUMP_COMPUTE_VALUES 1*/ +#define BATCH_SIZE 10000 +#define RUN_HYBRID + +#endif diff --git a/public/VectorPairHMM/src/main/c++/vector_defs.h b/public/VectorPairHMM/src/main/c++/vector_defs.h new file mode 100644 index 000000000..2aca9565f --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/vector_defs.h @@ -0,0 +1,55 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +#undef SIMD_ENGINE +#undef SIMD_ENGINE_AVX +#undef SIMD_ENGINE_SSE + +#define SIMD_ENGINE avx +#define SIMD_ENGINE_AVX + +#include "define-float.h" +#include "vector_function_prototypes.h" + +#include "define-double.h" +#include "vector_function_prototypes.h" + +#undef SIMD_ENGINE +#undef SIMD_ENGINE_AVX + +#define SIMD_ENGINE sse +#define SIMD_ENGINE_SSE + + +#include "define-sse-float.h" +#include "vector_function_prototypes.h" + +#include "define-sse-double.h" +#include "vector_function_prototypes.h" + +#undef SIMD_ENGINE +#undef SIMD_ENGINE_AVX +#undef SIMD_ENGINE_SSE + diff --git a/public/VectorPairHMM/src/main/c++/vector_function_prototypes.h b/public/VectorPairHMM/src/main/c++/vector_function_prototypes.h new file mode 100644 index 000000000..c0fddc394 --- /dev/null +++ b/public/VectorPairHMM/src/main/c++/vector_function_prototypes.h @@ -0,0 +1,44 @@ +/*Copyright (c) 2012 The Broad Institute + +*Permission is hereby granted, free of charge, to any person +*obtaining a copy of this software and associated documentation +*files (the "Software"), to deal in the Software without +*restriction, including without limitation the rights to use, +*copy, modify, merge, publish, distribute, sublicense, and/or sell +*copies of the Software, and to permit persons to whom the +*Software is furnished to do so, subject to the following +*conditions: + +*The above copyright notice and this permission notice shall be +*included in all copies or substantial portions of the Software. + +*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +*THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + + +inline void CONCAT(CONCAT(_vector_shift,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut); +inline void CONCAT(CONCAT(_vector_shift_last,SIMD_ENGINE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn); +inline void CONCAT(CONCAT(precompute_masks_,SIMD_ENGINE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]); +inline void CONCAT(CONCAT(init_masks_for_row_,SIMD_ENGINE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess); +inline void CONCAT(CONCAT(update_masks_for_cols_,SIMD_ENGINE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt); +inline void CONCAT(CONCAT(computeDistVec,SIMD_ENGINE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, SIMD_TYPE& distm, SIMD_TYPE& _1_distm, SIMD_TYPE& distmChosen); +template inline void CONCAT(CONCAT(initializeVectors,SIMD_ENGINE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, SIMD_TYPE *p_MM, SIMD_TYPE *p_GAPM, SIMD_TYPE *p_MX, SIMD_TYPE *p_XX, SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, SIMD_TYPE *distm1D); +template inline void CONCAT(CONCAT(stripINITIALIZATION,SIMD_ENGINE), PRECISION)( + int stripIdx, Context ctx, testcase *tc, SIMD_TYPE &pGAPM, SIMD_TYPE &pMM, SIMD_TYPE &pMX, SIMD_TYPE &pXX, SIMD_TYPE &pMY, SIMD_TYPE &pYY, + SIMD_TYPE &rs, UNION_TYPE &rsN, SIMD_TYPE &distm, SIMD_TYPE &_1_distm, SIMD_TYPE *distm1D, SIMD_TYPE N_packed256, SIMD_TYPE *p_MM , SIMD_TYPE *p_GAPM , + SIMD_TYPE *p_MX, SIMD_TYPE *p_XX , SIMD_TYPE *p_MY, SIMD_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1, + UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM); +inline SIMD_TYPE CONCAT(CONCAT(computeDISTM,SIMD_ENGINE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, SIMD_TYPE rs, UNION_TYPE rsN, SIMD_TYPE N_packed256, + SIMD_TYPE distm, SIMD_TYPE _1_distm); +inline void CONCAT(CONCAT(computeMXY,SIMD_ENGINE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, + UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1, + SIMD_TYPE pMM, SIMD_TYPE pGAPM, SIMD_TYPE pMX, SIMD_TYPE pXX, SIMD_TYPE pMY, SIMD_TYPE pYY, SIMD_TYPE distmSel); +template NUMBER CONCAT(CONCAT(compute_full_prob_,SIMD_ENGINE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL); + diff --git a/public/external-example/pom.xml b/public/external-example/pom.xml index 9c05867a8..ff20fbb33 100644 --- a/public/external-example/pom.xml +++ b/public/external-example/pom.xml @@ -9,7 +9,7 @@ GATK External Example - 3.0 + 3.1 + + commons-httpclient + commons-httpclient + ${project.groupId} @@ -105,9 +110,9 @@ - pipeline-tests + queue-tests - ${sting.serialpipelinetests.skipped} + ${sting.serialqueuetests.skipped} org.broadinstitute.sting:.* diff --git a/public/gatk-queue-extgen/pom.xml b/public/gatk-queue-extgen/pom.xml index 99e9b23bc..967bf4743 100644 --- a/public/gatk-queue-extgen/pom.xml +++ b/public/gatk-queue-extgen/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 3.0 + 3.1 ../.. diff --git a/public/gsalib/pom.xml b/public/gsalib/pom.xml index a242145c2..3ac9e5bd8 100644 --- a/public/gsalib/pom.xml +++ b/public/gsalib/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 3.0 + 3.1 ../.. diff --git a/public/package-tests/pom.xml b/public/package-tests/pom.xml index 0eda2ae00..1a0b77ef0 100644 --- a/public/package-tests/pom.xml +++ b/public/package-tests/pom.xml @@ -9,7 +9,7 @@ org.broadinstitute.sting sting-root - 3.0 + 3.1 ../sting-root @@ -21,7 +21,7 @@ ${project.basedir}/../.. true true - true + true true true @@ -50,13 +50,15 @@ ${project.groupId} gatk-framework @@ -72,6 +74,7 @@ + --> org.testng @@ -111,6 +114,10 @@ ${sting.packagetests.basedir} ${project.build.outputDirectory}/ignored_by_package_test ${sting.packagetests.testClasses} + + + ${sting.basedir}/public/gatk-framework/target/gatk-framework-${project.version}-tests.jar + @@ -148,6 +155,10 @@ ${sting.packagetests.basedir} ${project.build.outputDirectory}/ignored_by_package_test ${sting.packagetests.testClasses} + + + ${sting.basedir}/public/gatk-framework/target/gatk-framework-${project.version}-tests.jar + @@ -161,13 +172,13 @@ - pipeline-tests + queue-tests verify - + - ${sting.packagepipelinetests.skipped} + ${sting.packagequeuetests.skipped} diff --git a/public/pom.xml b/public/pom.xml index 40560cfbf..9f42721c2 100644 --- a/public/pom.xml +++ b/public/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-root - 3.0 + 3.1 sting-root @@ -19,7 +19,7 @@ sting-utils gatk-framework gatk-package - + diff --git a/public/queue-framework/pom.xml b/public/queue-framework/pom.xml index 2accd1175..b7403e611 100644 --- a/public/queue-framework/pom.xml +++ b/public/queue-framework/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 3.0 + 3.1 ../.. @@ -198,7 +198,7 @@ package-knowledgebasetests - package-pipelinetests + package-queuetests diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTest.scala similarity index 85% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTest.scala index 2800ba2da..9c32a40be 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTest.scala @@ -39,13 +39,13 @@ import org.broadinstitute.sting.gatk.report.GATKReport import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.WildcardFileFilter -object PipelineTest extends BaseTest with Logging { +object QueueTest extends BaseTest with Logging { private val validationReportsDataLocation = "/humgen/gsa-hpprojects/GATK/validationreports/submitted/" private val md5DB = new MD5DB() /** - * All the job runners configured to run PipelineTests at The Broad. + * All the job runners configured to run QueueTests at The Broad. */ final val allJobRunners = Seq("Lsf706", "GridEngine", "Shell") @@ -56,15 +56,15 @@ object PipelineTest extends BaseTest with Logging { /** * Returns the top level output path to this test. - * @param testName The name of the test passed to PipelineTest.executeTest() + * @param testName The name of the test passed to QueueTest.executeTest() * @param jobRunner The name of the job manager to run the jobs. * @return the top level output path to this test. */ - def testDir(testName: String, jobRunner: String) = "pipelinetests/%s/%s/".format(testName, jobRunner) + def testDir(testName: String, jobRunner: String) = "queuetests/%s/%s/".format(testName, jobRunner) /** * Returns the directory where relative output files will be written for this test. - * @param testName The name of the test passed to PipelineTest.executeTest() + * @param testName The name of the test passed to QueueTest.executeTest() * @param jobRunner The name of the job manager to run the jobs. * @return the directory where relative output files will be written for this test. */ @@ -72,44 +72,44 @@ object PipelineTest extends BaseTest with Logging { /** * Returns the directory where temp files will be written for this test. - * @param testName The name of the test passed to PipelineTest.executeTest() + * @param testName The name of the test passed to QueueTest.executeTest() * @param jobRunner The name of the job manager to run the jobs. * @return the directory where temp files will be written for this test. */ private def tempDir(testName: String, jobRunner: String) = testDir(testName, jobRunner) + "temp/" /** - * Runs the pipelineTest. - * @param pipelineTest test to run. + * Runs the queueTest. + * @param queueTest test to run. */ - def executeTest(pipelineTest: PipelineTestSpec) { - var jobRunners = pipelineTest.jobRunners + def executeTest(queueTest: QueueTestSpec) { + var jobRunners = queueTest.jobRunners if (jobRunners == null) jobRunners = defaultJobRunners - jobRunners.foreach(executeTest(pipelineTest, _)) + jobRunners.foreach(executeTest(queueTest, _)) } /** - * Runs the pipelineTest. - * @param pipelineTest test to run. + * Runs the queueTest. + * @param queueTest test to run. * @param jobRunner The name of the job manager to run the jobs. */ - def executeTest(pipelineTest: PipelineTestSpec, jobRunner: String) { + def executeTest(queueTest: QueueTestSpec, jobRunner: String) { // Reset the order of functions added to the graph. QScript.resetAddOrder() - val name = pipelineTest.name + val name = queueTest.name if (name == null) - Assert.fail("PipelineTestSpec.name is null") + Assert.fail("QueueTestSpec.name is null") println(Utils.dupString('-', 80)) - executeTest(name, pipelineTest.args, pipelineTest.jobQueue, pipelineTest.expectedException, jobRunner) - if (BaseTest.pipelineTestRunModeIsSet) { - assertMatchingMD5s(name, pipelineTest.fileMD5s.map{case (file, md5) => new File(runDir(name, jobRunner), file) -> md5}, pipelineTest.parameterize) - if (pipelineTest.evalSpec != null) - validateEval(name, pipelineTest.evalSpec, jobRunner) - for (path <- pipelineTest.expectedFilePaths) + executeTest(name, queueTest.args, queueTest.jobQueue, queueTest.expectedException, jobRunner) + if (BaseTest.queueTestRunModeIsSet) { + assertMatchingMD5s(name, queueTest.fileMD5s.map{case (file, md5) => new File(runDir(name, jobRunner), file) -> md5}, queueTest.parameterize) + if (queueTest.evalSpec != null) + validateEval(name, queueTest.evalSpec, jobRunner) + for (path <- queueTest.expectedFilePaths) assertPathExists(runDir(name, jobRunner), path) - for (path <- pipelineTest.unexpectedFilePaths) + for (path <- queueTest.unexpectedFilePaths) assertPathDoesNotExist(runDir(name, jobRunner), path) println(" => %s PASSED (%s)".format(name, jobRunner)) } @@ -128,7 +128,7 @@ object PipelineTest extends BaseTest with Logging { Assert.fail("%d of %d MD5s did not match".format(failed, fileMD5s.size)) } - private def validateEval(name: String, evalSpec: PipelineTestEvalSpec, jobRunner: String) { + private def validateEval(name: String, evalSpec: QueueTestEvalSpec, jobRunner: String) { // write the report to the shared validation data location val formatter = new SimpleDateFormat("yyyy.MM.dd.HH.mm.ss") val reportLocation = "%s%s/%s/validation.%s.eval".format(validationReportsDataLocation, jobRunner, name, formatter.format(new Date)) @@ -176,7 +176,7 @@ object PipelineTest extends BaseTest with Logging { if (jobQueue != null) command = Utils.appendArray(command, "-jobQueue", jobQueue) - if (BaseTest.pipelineTestRunModeIsSet) + if (BaseTest.queueTestRunModeIsSet) command = Utils.appendArray(command, "-run") // run the executable diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTestEvalSpec.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTestEvalSpec.scala similarity index 98% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTestEvalSpec.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTestEvalSpec.scala index 4beb81c93..2295d7185 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTestEvalSpec.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTestEvalSpec.scala @@ -28,7 +28,7 @@ package org.broadinstitute.sting.queue.pipeline /** * Data validations to evaluate on a GATKReport. */ -class PipelineTestEvalSpec { +class QueueTestEvalSpec { /** Eval modules to output. */ var evalReport: String = _ diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTestSpec.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTestSpec.scala similarity index 96% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTestSpec.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTestSpec.scala index 3dc761382..e05350bce 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/PipelineTestSpec.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/QueueTestSpec.scala @@ -25,7 +25,7 @@ package org.broadinstitute.sting.queue.pipeline -class PipelineTestSpec(var name: String = null) { +class QueueTestSpec(var name: String = null) { /** The arguments to pass to the Queue test, ex: "-S scala/qscript/examples/HelloWorld.scala" */ var args: String = _ @@ -40,7 +40,7 @@ class PipelineTestSpec(var name: String = null) { var fileMD5s = Map.empty[String, String] /** VariantEval validations to run on a VCF after the pipeline has completed. */ - var evalSpec: PipelineTestEvalSpec = _ + var evalSpec: QueueTestEvalSpec = _ /** Expected exception from the test. */ var expectedException: Class[_ <: Exception] = null diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociQueueTest.scala similarity index 90% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociQueueTest.scala index 291894244..b1149054a 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociQueueTest.scala @@ -26,14 +26,14 @@ package org.broadinstitute.sting.queue.pipeline.examples import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} import org.broadinstitute.sting.BaseTest -class ExampleCountLociPipelineTest { +class ExampleCountLociQueueTest { @Test(timeOut=36000000) def testCountLoci() { val testOut = "count.out" - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "countloci" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountLoci.scala", @@ -41,6 +41,6 @@ class ExampleCountLociPipelineTest { " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -o " + testOut).mkString spec.fileMD5s += testOut -> "ade93df31a6150321c1067e749cae9be" - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } } \ No newline at end of file diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsQueueTest.scala similarity index 89% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsQueueTest.scala index 2ec84e85f..e023a9dda 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsQueueTest.scala @@ -26,18 +26,18 @@ package org.broadinstitute.sting.queue.pipeline.examples import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} import org.broadinstitute.sting.BaseTest -class ExampleCountReadsPipelineTest { +class ExampleCountReadsQueueTest { @Test(timeOut=36000000) def testCountReads() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "countreads" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleCountReads.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam").mkString - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } } diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExamplePrintReadsPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExamplePrintReadsQueueTest.scala similarity index 90% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExamplePrintReadsPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExamplePrintReadsQueueTest.scala index b9964d187..667187bd2 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExamplePrintReadsPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExamplePrintReadsQueueTest.scala @@ -50,34 +50,34 @@ package org.broadinstitute.sting.queue.pipeline.examples */ import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} import org.broadinstitute.sting.BaseTest -class ExamplePrintReadsPipelineTest { +class ExamplePrintReadsQueueTest { @Test(timeOut=36000000) def testDevNullOutput() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "devnulloutput" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExamplePrintReads.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -out /dev/null").mkString - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testCleanupBai() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "cleanupbai" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExamplePrintReads.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -out exampleOut.bam").mkString - spec.jobRunners = PipelineTest.allJobRunners + spec.jobRunners = QueueTest.allJobRunners spec.unexpectedFilePaths :+= ".queue/scatterGather/ExamplePrintReads-1-sg/temp_1_of_1/exampleOut.bai" - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } } diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterQueueTest.scala similarity index 95% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterQueueTest.scala index 7d74e1a5c..82fd57ee3 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterQueueTest.scala @@ -74,18 +74,18 @@ package org.broadinstitute.sting.queue.pipeline.examples */ import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} import org.broadinstitute.sting.BaseTest -class ExampleReadFilterPipelineTest { +class ExampleReadFilterQueueTest { @Test(timeOut=36000000) def testExampleReadFilter() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "examplereadfilter" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleReadFilter.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam").mkString - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } } diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitQueueTest.scala similarity index 87% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitQueueTest.scala index e98ca6756..3e314708c 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitQueueTest.scala @@ -26,22 +26,22 @@ package org.broadinstitute.sting.queue.pipeline.examples import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} import org.broadinstitute.sting.BaseTest -class ExampleRetryMemoryLimitPipelineTest { +class ExampleRetryMemoryLimitQueueTest { // This test is currently disabled due to unexplained intermittent failures (see GSA-943) @Test(timeOut=36000000,enabled = false) def testRetryMemoryLimit() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "RetryMemoryLimit" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleRetryMemoryLimit.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -retry 1").mkString - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } } diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperQueueTest.scala similarity index 87% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperQueueTest.scala index b054164a1..20f97ea61 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperQueueTest.scala @@ -26,13 +26,13 @@ package org.broadinstitute.sting.queue.pipeline.examples import org.testng.annotations.{DataProvider, Test} -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} import org.broadinstitute.sting.BaseTest -class ExampleUnifiedGenotyperPipelineTest { +class ExampleUnifiedGenotyperQueueTest { @Test(timeOut=36000000) def testUnifiedGenotyper() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "unifiedgenotyper" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", @@ -40,8 +40,8 @@ class ExampleUnifiedGenotyperPipelineTest { " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -filter QD", " -filterExpression 'QD < 2.0'").mkString - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @DataProvider(name = "ugIntervals") @@ -54,7 +54,7 @@ class ExampleUnifiedGenotyperPipelineTest { @Test(dataProvider = "ugIntervals", timeOut=36000000) def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "unifiedgenotyper_with_" + intervalsName spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", @@ -62,20 +62,20 @@ class ExampleUnifiedGenotyperPipelineTest { " -R " + BaseTest.hg18Reference, " -L " + intervalsPath).mkString spec.jobRunners = Seq("Lsf706") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testUnifiedGenotyperNoGCOpt() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "unifiedgenotyper_no_gc_opt" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -noGCOpt").mkString - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @DataProvider(name="resMemReqParams") @@ -83,7 +83,7 @@ class ExampleUnifiedGenotyperPipelineTest { @Test(dataProvider = "resMemReqParams", timeOut=36000000) def testUnifiedGenotyperResMemReqParam(reqParam: String) { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "unifiedgenotyper_" + reqParam spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", @@ -91,21 +91,21 @@ class ExampleUnifiedGenotyperPipelineTest { " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -resMemReqParam " + reqParam).mkString spec.jobRunners = Seq("GridEngine") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testUnifiedGenotyperLogDirectory() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "unifiedgenotyper_with_log_directory" spec.args = Array( " -S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/ExampleUnifiedGenotyper.scala", " -R " + BaseTest.publicTestDir + "exampleFASTA.fasta", " -I " + BaseTest.publicTestDir + "exampleBAM.bam", " -logDir exampleUGLogDir").mkString - spec.jobRunners = PipelineTest.allJobRunners + spec.jobRunners = QueueTest.allJobRunners spec.expectedFilePaths :+= "exampleUGLogDir/exampleBAM.unfiltered.vcf.out" spec.expectedFilePaths :+= "exampleUGLogDir/exampleBAM.unfiltered.eval.out" - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } } diff --git a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldQueueTest.scala similarity index 79% rename from public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala rename to public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldQueueTest.scala index 0f645cb2a..5d3923250 100644 --- a/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala +++ b/public/queue-framework/src/test/scala/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldQueueTest.scala @@ -26,127 +26,127 @@ package org.broadinstitute.sting.queue.pipeline.examples import org.testng.annotations.Test -import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec} +import org.broadinstitute.sting.queue.pipeline.{QueueTest, QueueTestSpec} -class HelloWorldPipelineTest { +class HelloWorldQueueTest { @Test(timeOut=36000000) def testHelloWorld() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorld" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithRunName() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithRunName" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -runName HelloWorld" - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithMemoryLimit() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldMemoryLimit" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -memLimit 1.25" - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithPriority() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithPriority" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -jobPriority 100" - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithLsfResource() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithLsfResource" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -jobResReq rusage[iodine_io=1] -jobResReq select[swp>0] -jobResReq order[swp]" spec.jobRunners = Seq("Lsf706") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithLsfResourceAndMemoryLimit() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithLsfResourceAndMemoryLimit" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -memLimit 1.25 -jobResReq rusage[iodine_io=1] -jobResReq select[swp>0] -jobResReq order[swp]" spec.jobRunners = Seq("Lsf706") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithLsfEnvironment() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithLsfEnvironment" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -jobEnv tv" spec.jobRunners = Seq("Lsf706") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithGridEngineResource() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithGridEngineResource" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -jobResReq s_core=1000M" spec.jobRunners = Seq("GridEngine") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithGridEngineResourceAndMemoryLimit() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithGridEngineResourceAndMemoryLimit" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -memLimit 1.25 -jobResReq s_core=1000M" spec.jobRunners = Seq("GridEngine") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithGridEngineEnvironment() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithGridEngineEnvironment" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -jobEnv \"make 1\"" spec.jobRunners = Seq("GridEngine") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } // disabled because our DRMAA implementation doesn't support wallTime @Test(enabled=false, timeOut=36000000) def testHelloWorldWithWalltime() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithWalltime" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -wallTime 100" - spec.jobRunners = PipelineTest.allJobRunners - PipelineTest.executeTest(spec) + spec.jobRunners = QueueTest.allJobRunners + QueueTest.executeTest(spec) } @Test(timeOut=36000000) def testHelloWorldWithLogDirectory() { - val spec = new PipelineTestSpec + val spec = new QueueTestSpec spec.name = "HelloWorldWithLogDirectory" spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" + " -logDir pipelineLogDir" - spec.jobRunners = PipelineTest.allJobRunners + spec.jobRunners = QueueTest.allJobRunners spec.expectedFilePaths = Seq("pipelineLogDir/HelloWorld-1.out") - PipelineTest.executeTest(spec) + QueueTest.executeTest(spec) } } diff --git a/public/queue-package/pom.xml b/public/queue-package/pom.xml index 591f2c5bd..35f5d327a 100644 --- a/public/queue-package/pom.xml +++ b/public/queue-package/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 3.0 + 3.1 ../.. @@ -67,6 +67,11 @@ net.sf.snpeff snpeff + + + commons-httpclient + commons-httpclient + ${project.groupId} @@ -144,9 +149,9 @@ - pipeline-tests + queue-tests - ${sting.serialpipelinetests.skipped} + ${sting.serialqueuetests.skipped} org.broadinstitute.sting:.* diff --git a/public/repo/net/sf/picard/1.107.1683/picard-1.107.1683.jar b/public/repo/net/sf/picard/1.109.1722/picard-1.109.1722.jar similarity index 89% rename from public/repo/net/sf/picard/1.107.1683/picard-1.107.1683.jar rename to public/repo/net/sf/picard/1.109.1722/picard-1.109.1722.jar index 089b71385..38d1e5958 100644 Binary files a/public/repo/net/sf/picard/1.107.1683/picard-1.107.1683.jar and b/public/repo/net/sf/picard/1.109.1722/picard-1.109.1722.jar differ diff --git a/public/repo/net/sf/picard/1.107.1683/picard-1.107.1683.pom b/public/repo/net/sf/picard/1.109.1722/picard-1.109.1722.pom similarity index 90% rename from public/repo/net/sf/picard/1.107.1683/picard-1.107.1683.pom rename to public/repo/net/sf/picard/1.109.1722/picard-1.109.1722.pom index fd8a61917..075b2606e 100644 --- a/public/repo/net/sf/picard/1.107.1683/picard-1.107.1683.pom +++ b/public/repo/net/sf/picard/1.109.1722/picard-1.109.1722.pom @@ -3,23 +3,23 @@ 4.0.0 net.sf picard - 1.107.1683 + 1.109.1722 picard net.sf sam - 1.107.1683 + 1.109.1722 org.broadinstitute variant - 1.107.1683 + 1.109.1722 org.broad tribble - 1.107.1683 + 1.109.1722 diff --git a/public/repo/net/sf/sam/1.107.1683/sam-1.107.1683.jar b/public/repo/net/sf/sam/1.109.1722/sam-1.109.1722.jar similarity index 71% rename from public/repo/net/sf/sam/1.107.1683/sam-1.107.1683.jar rename to public/repo/net/sf/sam/1.109.1722/sam-1.109.1722.jar index 928838707..88a396a9e 100644 Binary files a/public/repo/net/sf/sam/1.107.1683/sam-1.107.1683.jar and b/public/repo/net/sf/sam/1.109.1722/sam-1.109.1722.jar differ diff --git a/public/repo/net/sf/sam/1.107.1683/sam-1.107.1683.pom b/public/repo/net/sf/sam/1.109.1722/sam-1.109.1722.pom similarity index 95% rename from public/repo/net/sf/sam/1.107.1683/sam-1.107.1683.pom rename to public/repo/net/sf/sam/1.109.1722/sam-1.109.1722.pom index 89114f546..59a0e5df7 100644 --- a/public/repo/net/sf/sam/1.107.1683/sam-1.107.1683.pom +++ b/public/repo/net/sf/sam/1.109.1722/sam-1.109.1722.pom @@ -3,7 +3,7 @@ 4.0.0 net.sf sam - 1.107.1683 + 1.109.1722 sam-jdk diff --git a/public/repo/org/broad/tribble/1.107.1683/tribble-1.107.1683.jar b/public/repo/org/broad/tribble/1.109.1722/tribble-1.109.1722.jar similarity index 67% rename from public/repo/org/broad/tribble/1.107.1683/tribble-1.107.1683.jar rename to public/repo/org/broad/tribble/1.109.1722/tribble-1.109.1722.jar index efa04ad2c..7b60393ee 100644 Binary files a/public/repo/org/broad/tribble/1.107.1683/tribble-1.107.1683.jar and b/public/repo/org/broad/tribble/1.109.1722/tribble-1.109.1722.jar differ diff --git a/public/repo/org/broad/tribble/1.107.1683/tribble-1.107.1683.pom b/public/repo/org/broad/tribble/1.109.1722/tribble-1.109.1722.pom similarity index 87% rename from public/repo/org/broad/tribble/1.107.1683/tribble-1.107.1683.pom rename to public/repo/org/broad/tribble/1.109.1722/tribble-1.109.1722.pom index 7bf169bd4..21ab0d712 100644 --- a/public/repo/org/broad/tribble/1.107.1683/tribble-1.107.1683.pom +++ b/public/repo/org/broad/tribble/1.109.1722/tribble-1.109.1722.pom @@ -3,13 +3,13 @@ 4.0.0 org.broad tribble - 1.107.1683 + 1.109.1722 tribble net.sf sam - 1.107.1683 + 1.109.1722 diff --git a/public/repo/org/broadinstitute/variant/1.107.1683/variant-1.107.1683.jar b/public/repo/org/broadinstitute/variant/1.109.1722/variant-1.109.1722.jar similarity index 94% rename from public/repo/org/broadinstitute/variant/1.107.1683/variant-1.107.1683.jar rename to public/repo/org/broadinstitute/variant/1.109.1722/variant-1.109.1722.jar index ea4ebe35e..273b32d60 100644 Binary files a/public/repo/org/broadinstitute/variant/1.107.1683/variant-1.107.1683.jar and b/public/repo/org/broadinstitute/variant/1.109.1722/variant-1.109.1722.jar differ diff --git a/public/repo/org/broadinstitute/variant/1.107.1683/variant-1.107.1683.pom b/public/repo/org/broadinstitute/variant/1.109.1722/variant-1.109.1722.pom similarity index 90% rename from public/repo/org/broadinstitute/variant/1.107.1683/variant-1.107.1683.pom rename to public/repo/org/broadinstitute/variant/1.109.1722/variant-1.109.1722.pom index 256963812..5e393b752 100644 --- a/public/repo/org/broadinstitute/variant/1.107.1683/variant-1.107.1683.pom +++ b/public/repo/org/broadinstitute/variant/1.109.1722/variant-1.109.1722.pom @@ -3,18 +3,18 @@ 4.0.0 org.broadinstitute variant - 1.107.1683 + 1.109.1722 variant org.broad tribble - 1.107.1683 + 1.109.1722 net.sf sam - 1.107.1683 + 1.109.1722 org.apache.commons diff --git a/public/sting-root/pom.xml b/public/sting-root/pom.xml index 00cecf981..79b003cf5 100644 --- a/public/sting-root/pom.xml +++ b/public/sting-root/pom.xml @@ -12,7 +12,7 @@ org.broadinstitute.sting sting-root - 3.0 + 3.1 pom Sting Root @@ -31,10 +31,10 @@ true ${sting.committests.skipped} ${sting.committests.skipped} - ${sting.committests.skipped} + ${sting.committests.skipped} true true - false + false 1g 4g 4 @@ -43,7 +43,7 @@ -Xmx${test.maxmemory} -XX:+UseParallelOldGC -XX:ParallelGCThreads=${java.gc.threads} -XX:GCTimeLimit=${java.gc.timeLimit} -XX:GCHeapFreeLimit=${java.gc.heapFreeLimit} - 1.107.1683 + 1.109.1722 ${picard.public.version} ${picard.public.version} ${picard.public.version} @@ -188,6 +188,11 @@ commons-collections 3.2.1 + + commons-httpclient + commons-httpclient + 3.1 + org.apache.commons commons-math @@ -335,7 +340,11 @@ maven-assembly-plugin 2.4 - + + org.apache.maven.plugins + maven-enforcer-plugin + 1.3.1 + - ${sting.pipelinetests.run} + ${sting.queuetests.run} ${java.io.tmpdir} @@ -431,19 +440,19 @@ - pipeline-tests + queue-tests integration-test verify - + - - ${sting.pipelinetests.skipped} - ${project.build.directory}/failsafe-reports/pipeline/${it.test} - ${project.build.directory}/failsafe-reports/pipeline/failsafe-summary-${it.test}.xml + + ${sting.queuetests.skipped} + ${project.build.directory}/failsafe-reports/queuetest/${it.test} + ${project.build.directory}/failsafe-reports/queuetest/failsafe-summary-${it.test}.xml - **/*PipelineTest.class + **/*QueueTest.class diff --git a/public/sting-utils/pom.xml b/public/sting-utils/pom.xml index 6abf98515..c51d882b6 100644 --- a/public/sting-utils/pom.xml +++ b/public/sting-utils/pom.xml @@ -5,7 +5,7 @@ org.broadinstitute.sting sting-aggregator - 3.0 + 3.1 ../.. diff --git a/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so b/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so new file mode 100644 index 000000000..7cd8b1f73 Binary files /dev/null and b/public/sting-utils/src/main/resources/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so differ