The Fisher Strand test may not be calculated for certain complex indel cases or for multi-allelic sites.
*/
-public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation, ActiveRegionBasedAnnotation {
+public class FisherStrand extends StrandBiasTest implements StandardAnnotation, ActiveRegionBasedAnnotation {
private final static boolean ENABLE_DEBUGGING = false;
private final static Logger logger = Logger.getLogger(FisherStrand.class);
@@ -100,7 +97,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return null;
if ( vc.hasGenotypes() ) {
- final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes() );
+ final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
if ( tableFromPerSampleAnnotations != null ) {
return pValueForBestTable(tableFromPerSampleAnnotations, null);
}
@@ -116,8 +113,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
else if (stratifiedPerReadAlleleLikelihoodMap != null) {
// either SNP with no alignment context, or indels: per-read likelihood map needed
final int[][] table = getContingencyTable(stratifiedPerReadAlleleLikelihoodMap, vc);
-// logger.info("VC " + vc);
-// printTable(table, 0.0);
+ //logger.info("VC " + vc);
+ //printTable(table, 0.0);
return pValueForBestTable(table, null);
}
else
@@ -126,45 +123,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
return null;
}
- /**
- * Create the FisherStrand table by retrieving the per-sample strand bias annotation and adding them together
- * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
- * @return the table used for the FisherStrand p-value calculation, will be null if none of the genotypes contain the per-sample SB annotation
- */
- private int[][] getTableFromSamples( final GenotypesContext genotypes ) {
- if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
-
- final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
- boolean foundData = false;
-
- for( final Genotype g : genotypes ) {
- if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) )
- continue;
-
- foundData = true;
- final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
- final int[] data = encodeSBBS(sbbsString);
- if ( passesMinimumThreshold(data) ) {
- for( int index = 0; index < sbArray.length; index++ ) {
- sbArray[index] += data[index];
- }
- }
- }
-
- return ( foundData ? decodeSBBS(sbArray) : null );
- }
-
- /**
- * Does this strand data array pass the minimum threshold for inclusion?
- *
- * @param data the array
- * @return true if it passes the minimum threshold, false otherwise
- */
- private static boolean passesMinimumThreshold(final int[] data) {
- // the ref and alt totals must each be greater than MIN_COUNT
- return data[0] + data[1] > MIN_COUNT && data[2] + data[3] > MIN_COUNT;
- }
-
/**
* Create an annotation for the highest (i.e., least significant) p-value of table1 and table2
*
@@ -190,7 +148,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
* @param pValue
* @return a hash map from FS -> phred-scaled pValue
*/
- private Map annotationForOneTable(final double pValue) {
+ protected Map annotationForOneTable(final double pValue) {
final Object value = String.format("%.3f", QualityUtils.phredScaleErrorRate(Math.max(pValue, MIN_PVALUE))); // prevent INFINITYs
return Collections.singletonMap(FS, value);
}
@@ -218,36 +176,6 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
list.add(table[1][1]);
return list;
}
-
- /**
- * Helper function to parse the genotype annotation into the SB annotation array
- * @param string the string that is returned by genotype.getAnnotation("SB")
- * @return the array used by the per-sample Strand Bias annotation
- */
- private static int[] encodeSBBS( final String string ) {
- final int[] array = new int[4];
- final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
- for( int index = 0; index < 4; index++ ) {
- array[index] = Integer.parseInt(tokenizer.nextToken());
- }
- return array;
- }
-
- /**
- * Helper function to turn the SB annotation array into the FisherStrand table
- * @param array the array used by the per-sample Strand Bias annotation
- * @return the table used by the FisherStrand annotation
- */
- private static int[][] decodeSBBS( final int[] array ) {
- if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
- final int[][] table = new int[2][2];
- table[0][0] = array[0];
- table[0][1] = array[1];
- table[1][0] = array[2];
- table[1][1] = array[3];
- return table;
- }
-
private Double pValueForContingencyTable(int[][] originalTable) {
final int[][] normalizedTable = normalizeContingencyTable(originalTable);
@@ -419,7 +347,7 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
final GATKSAMRecord read = el.getKey();
updateTable(myTable, mostLikelyAllele.getAlleleIfInformative(), read, ref, alt);
}
- if ( passesMinimumThreshold(myTable) )
+ if ( passesMinimumThreshold(myTable, MIN_COUNT) )
copyToMainTable(myTable, table);
}
@@ -464,7 +392,8 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
updateTable(myTable, Allele.create(p.getBase(), false), p.getRead(), ref, alt);
}
- if ( passesMinimumThreshold(myTable) )
+
+ if ( passesMinimumThreshold( myTable, MIN_COUNT ) )
copyToMainTable(myTable, table);
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasTest.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasTest.java
new file mode 100644
index 000000000..2974ae746
--- /dev/null
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandBiasTest.java
@@ -0,0 +1,128 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012 Broad Institute, Inc.
+* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 4. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 5. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 6. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 7. MISCELLANEOUS
+* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
+import org.broadinstitute.variant.variantcontext.Genotype;
+import org.broadinstitute.variant.variantcontext.GenotypesContext;
+
+import java.util.*;
+
+/**
+ * Class of tests to detect strand bias.
+ */
+public abstract class StrandBiasTest extends InfoFieldAnnotation {
+ /**
+ * Create the contingency table by retrieving the per-sample strand bias annotation and adding them together
+ * @param genotypes the genotypes from which to pull out the per-sample strand bias annotation
+ * @param minCount minimum threshold for the sample strand bias counts for each ref and alt.
+ * If both ref and alt counts are above minCount the whole sample strand bias is added to the resulting table
+ * @return the table used for several strand bias tests, will be null if none of the genotypes contain the per-sample SB annotation
+ */
+ protected int[][] getTableFromSamples( final GenotypesContext genotypes, final int minCount ) {
+ if( genotypes == null ) { throw new IllegalArgumentException("Genotypes cannot be null."); }
+
+ final int[] sbArray = {0,0,0,0}; // reference-forward-reverse -by- alternate-forward-reverse
+ boolean foundData = false;
+
+ for( final Genotype g : genotypes ) {
+ if( g.isNoCall() || ! g.hasAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME) )
+ continue;
+
+ foundData = true;
+ final String sbbsString = (String) g.getAnyAttribute(StrandBiasBySample.STRAND_BIAS_BY_SAMPLE_KEY_NAME);
+ final int[] data = encodeSBBS(sbbsString);
+ if ( passesMinimumThreshold(data, minCount) ) {
+ for( int index = 0; index < sbArray.length; index++ ) {
+ sbArray[index] += data[index];
+ }
+ }
+ }
+
+ return ( foundData ? decodeSBBS(sbArray) : null );
+ }
+ /**
+ * Does this strand data array pass the minimum threshold for inclusion?
+ *
+ * @param data the array
+ * @minCount The minimum threshold of counts in the array
+ * @return true if it passes the minimum threshold, false otherwise
+ */
+ protected static boolean passesMinimumThreshold(final int[] data, final int minCount) {
+ // the ref and alt totals must each be greater than MIN_COUNT
+ return data[0] + data[1] > minCount && data[2] + data[3] > minCount;
+ }
+
+ /**
+ * Helper function to parse the genotype annotation into the SB annotation array
+ * @param string the string that is returned by genotype.getAnnotation("SB")
+ * @return the array used by the per-sample Strand Bias annotation
+ */
+ private static int[] encodeSBBS( final String string ) {
+ final int[] array = new int[4];
+ final StringTokenizer tokenizer = new StringTokenizer(string, ",", false);
+ for( int index = 0; index < 4; index++ ) {
+ array[index] = Integer.parseInt(tokenizer.nextToken());
+ }
+ return array;
+ }
+
+ /**
+ * Helper function to turn the SB annotation array into a contingency table
+ * @param array the array used by the per-sample Strand Bias annotation
+ * @return the table used by the StrandOddsRatio annotation
+ */
+ private static int[][] decodeSBBS( final int[] array ) {
+ if(array.length != 4) { throw new IllegalArgumentException("Expecting a length = 4 strand bias array."); }
+ final int[][] table = new int[2][2];
+ table[0][0] = array[0];
+ table[0][1] = array[1];
+ table[1][0] = array[2];
+ table[1][1] = array[3];
+ return table;
+ }
+}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatio.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatio.java
new file mode 100644
index 000000000..e500a725a
--- /dev/null
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatio.java
@@ -0,0 +1,150 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012 Broad Institute, Inc.
+* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 4. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 5. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 6. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 7. MISCELLANEOUS
+* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ActiveRegionBasedAnnotation;
+import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.vcf.VCFHeaderLineType;
+import org.broadinstitute.variant.vcf.VCFInfoHeaderLine;
+
+import java.util.*;
+
+/**
+ * Symmetric Odds Ratio to detect strand bias
+ *
+ *
Odds Ratios in the 2x2 contingency table below are R = (X[0][0] * X[1][1]) / (X[0][1] * X[1][0]) and its inverse
+ * + strand - strand
+ * Ref X[0][0] X[0][1]
+ * Alt X[1][0] X[1][0]
+ * The sum R + 1/R is used to detect a difference in strand bias for ref and for alt (the sum makes it symmetric):
+ * A high value is indicative of large difference where one entry is very small compared to the others.
+ *
+ */
+public class StrandOddsRatio extends StrandBiasTest implements ActiveRegionBasedAnnotation {
+ private final static double AUGMENTATION_CONSTANT = 0.1;
+ private static final int MIN_COUNT = 0;
+
+ private static final String SOR = "SOR";
+
+ public Map annotate(final RefMetaDataTracker tracker,
+ final AnnotatorCompatible walker,
+ final ReferenceContext ref,
+ final Map stratifiedContexts,
+ final VariantContext vc,
+ final Map stratifiedPerReadAlleleLikelihoodMap) {
+ if ( !vc.isVariant() )
+ return null;
+
+ if ( vc.hasGenotypes() ) {
+ final int[][] tableFromPerSampleAnnotations = getTableFromSamples( vc.getGenotypes(), MIN_COUNT );
+ if ( tableFromPerSampleAnnotations != null ) {
+ final double ratio = symmetricOddsRatio(tableFromPerSampleAnnotations);
+ return annotationForOneTable(ratio);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Computes the symmetric odds ratio of a table after augmentation.
+ * Augmentation avoids quotient by zero.
+ *
+ * @param originalTable The table before augmentation
+ * @return the symmetric odds ratio
+ */
+ final protected double symmetricOddsRatio(final int[][] originalTable) {
+ final double[][] augmentedTable = augmentContingencyTable(originalTable);
+
+ double ratio = 0;
+
+ ratio += (augmentedTable[0][0] / augmentedTable[0][1]) * (augmentedTable[1][1] / augmentedTable[1][0]);
+ ratio += (augmentedTable[0][1] / augmentedTable[0][0]) * (augmentedTable[1][0] / augmentedTable[1][1]);
+
+ return ratio;
+ }
+
+
+ /**
+ * Adds the small value AUGMENTATION_CONSTANT to all the entries of the table.
+ *
+ * @param table the table to augment
+ * @return the augmented table
+ */
+ private static double[][] augmentContingencyTable(final int[][] table) {
+ double[][] augmentedTable = new double[2][2];
+ for ( int i = 0; i < 2; i++ ) {
+ for ( int j = 0; j < 2; j++ )
+ augmentedTable[i][j] = table[i][j] + AUGMENTATION_CONSTANT;
+ }
+
+ return augmentedTable;
+ }
+
+ /**
+ * Returns an annotation result given a ratio
+ *
+ * @param ratio the symmetric odds ratio of the contingency table
+ * @return a hash map from SOR
+ */
+ protected Map annotationForOneTable(final double ratio) {
+ final Object value = String.format("%.3f", ratio);
+ return Collections.singletonMap(SOR, value);
+ }
+
+ public List getDescriptions() {
+ return Collections.singletonList(new VCFInfoHeaderLine(SOR, 1, VCFHeaderLineType.Float, "Symmetric Odds Ratio of 2x2 contingency table to detect strand bias"));
+ }
+
+ public List getKeyNames() {
+ return Collections.singletonList(SOR);
+ }
+}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java
index 8a35ccb05..8b37e265d 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GraphBasedLikelihoodCalculationEngine.java
@@ -155,4 +155,8 @@ public class GraphBasedLikelihoodCalculationEngine implements LikelihoodCalculat
}
}
}
+
+ @Override
+ public void close() {
+ }
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 91e763a0d..b6eddab51 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -289,6 +289,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
@Argument(fullName="dontRecoverDanglingTails", shortName="dontRecoverDanglingTails", doc="Should we disable dangling tail recovery in the read threading assembler?", required = false)
protected boolean dontRecoverDanglingTails = false;
+ @Advanced
+ @Argument(fullName="consensus", shortName="consensus", doc="In 1000G consensus mode. Inject all provided alleles to the assembly graph but don't forcibly genotype all of them.", required = false)
+ protected boolean consensusMode = false;
+
// -----------------------------------------------------------------------------------------------
// general advanced arguments to control haplotype caller behavior
// -----------------------------------------------------------------------------------------------
@@ -575,7 +579,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
// initialize the UnifiedGenotyper Engine which is used to call into the exact model
final UnifiedArgumentCollection UAC = new UnifiedArgumentCollection( SCAC ); // this adapter is used so that the full set of unused UG arguments aren't exposed to the HC user
// HC GGA mode depends critically on EMIT_ALL_SITES being set for the UG engine
- UAC.OutputMode = SCAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
+ UAC.OutputMode = SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES)
? UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_ALL_SITES : UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, null, null, samples, GATKVariantContextUtils.DEFAULT_PLOIDY);
@@ -598,6 +602,10 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
UAC.setSampleContamination(AlleleBiasedDownsamplingUtils.loadContaminationFile(UAC.CONTAMINATION_FRACTION_FILE, UAC.CONTAMINATION_FRACTION, samples, logger));
}
+ if( SCAC.GenotypingMode.equals(GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) && consensusMode ) {
+ throw new UserException("HaplotypeCaller cannot be run in both GENOTYPE_GIVEN_ALLELES mode and in consensus mode. Please choose one or the other.");
+ }
+
// initialize the output VCF header
final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
@@ -878,7 +886,8 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
regionForGenotyping.getLocation(),
getToolkit().getGenomeLocParser(),
metaDataTracker,
- activeAllelesToGenotype, emitReferenceConfidence() );
+ ( consensusMode ? Collections.emptyList() : activeAllelesToGenotype ),
+ emitReferenceConfidence() );
// TODO -- must disable if we are doing NCT, or set the output type of ! presorted
if ( bamWriter != null ) {
@@ -1051,7 +1060,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In
referenceConfidenceModel.close();
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
//TODO (open & close) at the walker, not the engine.
- //likelihoodCalculationEngine.close();
+ likelihoodCalculationEngine.close();
logger.info("Ran local assembly on " + result + " active regions");
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index 0626f2268..04e64186f 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -89,4 +89,6 @@ public interface LikelihoodCalculationEngine {
*/
public Map computeReadLikelihoods(AssemblyResultSet assemblyResultSet,
Map> perSampleReadList);
+
+ public void close();
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
index 8dfeed987..b53445933 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LocalAssemblyEngine.java
@@ -213,16 +213,18 @@ public abstract class LocalAssemblyEngine {
final Map assemblyResultByGraph, final AssemblyResultSet assemblyResultSet) {
// add the reference haplotype separately from all the others to ensure that it is present in the list of haplotypes
final Set returnHaplotypes = new LinkedHashSet<>();
- returnHaplotypes.add( refHaplotype );
final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
+ final ArrayList finders = new ArrayList<>(graphs.size());
for( final SeqGraph graph : graphs ) {
final SeqVertex source = graph.getReferenceSourceVertex();
final SeqVertex sink = graph.getReferenceSinkVertex();
if ( source == null || sink == null ) throw new IllegalArgumentException("Both source and sink cannot be null but got " + source + " and sink " + sink + " for graph "+ graph);
final KBestHaplotypeFinder haplotypeFinder = new KBestHaplotypeFinder(graph,source,sink);
+ finders.add(haplotypeFinder);
final Iterator bestHaplotypes = haplotypeFinder.iterator(numBestHaplotypesPerGraph);
+
while (bestHaplotypes.hasNext()) {
final KBestHaplotype kBestHaplotype = bestHaplotypes.next();
final Haplotype h = kBestHaplotype.haplotype();
@@ -256,9 +258,19 @@ public abstract class LocalAssemblyEngine {
}
}
-
- if ( returnHaplotypes.size() < returnHaplotypes.size() )
- logger.info("Found " + returnHaplotypes.size() + " candidate haplotypes of " + returnHaplotypes.size() + " possible combinations to evaluate every read against at " + refLoc);
+ // Make sure that the ref haplotype is amongst the return haplotypes and calculate its score as
+ // the first returned by any finder.
+ if (!returnHaplotypes.contains(refHaplotype)) {
+ double refScore = Double.NaN;
+ for (final KBestHaplotypeFinder finder : finders) {
+ final double candidate = finder.score(refHaplotype);
+ if (Double.isNaN(candidate)) continue;
+ refScore = candidate;
+ break;
+ }
+ refHaplotype.setScore(refScore);
+ returnHaplotypes.add(refHaplotype);
+ }
if( debug ) {
if( returnHaplotypes.size() > 1 ) {
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
index 55a1c5dba..7165e61a5 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java
@@ -90,6 +90,18 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
return new LoglessPairHMM();
else
return new CnyPairHMM();
+ case VECTOR_LOGLESS_CACHING:
+ try
+ {
+ return new VectorLoglessPairHMM();
+ }
+ catch(UnsatisfiedLinkError ule)
+ {
+ logger.debug("Failed to load native library for VectorLoglessPairHMM - using Java implementation of LOGLESS_CACHING");
+ return new LoglessPairHMM();
+ }
+ case DEBUG_VECTOR_LOGLESS_CACHING:
+ return new DebugJNILoglessPairHMM(PairHMM.HMM_IMPLEMENTATION.VECTOR_LOGLESS_CACHING);
case ARRAY_LOGLESS:
if (noFpga || !CnyPairHMM.isAvailable())
return new ArrayLoglessPairHMM();
@@ -162,10 +174,13 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
}
}
+ @Override
public void close() {
if ( likelihoodsStream != null ) likelihoodsStream.close();
+ pairHMMThreadLocal.get().close();
}
+
private void writeDebugLikelihoods(final GATKSAMRecord processedRead, final Haplotype haplotype, final double log10l){
if ( WRITE_LIKELIHOODS_TO_FILE ) {
likelihoodsStream.printf("%s %s %s %s %s %s %f%n",
@@ -316,8 +331,8 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
int X_METRIC_LENGTH = 0;
for( final Map.Entry> sample : perSampleReadList.entrySet() ) {
for( final GATKSAMRecord read : sample.getValue() ) {
- final int readLength = read.getReadLength();
- if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
+ final int readLength = read.getReadLength();
+ if( readLength > X_METRIC_LENGTH ) { X_METRIC_LENGTH = readLength; }
}
}
int Y_METRIC_LENGTH = 0;
@@ -327,7 +342,12 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
}
// initialize arrays to hold the probabilities of being in the match, insertion and deletion cases
- pairHMMThreadLocal.get().initialize(X_METRIC_LENGTH, Y_METRIC_LENGTH);
+ pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH);
+ }
+
+ private void finalizePairHMM()
+ {
+ pairHMMThreadLocal.get().finalizeRegion();
}
@@ -341,12 +361,14 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
// Add likelihoods for each sample's reads to our stratifiedReadMap
final Map stratifiedReadMap = new LinkedHashMap<>();
for( final Map.Entry> sampleEntry : perSampleReadList.entrySet() ) {
- // evaluate the likelihood of the reads given those haplotypes
+ // evaluate the likelihood of the reads given those haplotypes
final PerReadAlleleLikelihoodMap map = computeReadLikelihoods(haplotypes, sampleEntry.getValue());
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
stratifiedReadMap.put(sampleEntry.getKey(), map);
}
+ //Used mostly by the JNI implementation(s) to free arrays
+ finalizePairHMM();
return stratifiedReadMap;
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java
index b8dba7b86..d5d424ca9 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/RandomLikelihoodCalculationEngine.java
@@ -79,4 +79,9 @@ public class RandomLikelihoodCalculationEngine implements LikelihoodCalculationE
return result;
}
+
+ @Override
+ public void close() {
+ }
+
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
index 8fba6c9d5..d981e6eeb 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/AggregatedSubHaplotypeFinder.java
@@ -45,21 +45,21 @@
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.PriorityQueue;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.*;
/**
* K-best sub-haplotype finder that selects the best solutions out of a collection of sub-haplotype finders.
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
-class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
+class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
/**
* Collection of subFinders that provided the actual solutions.
*/
- private final Collection extends KBestSubHaplotypeFinder> subFinders;
+ protected final Collection subFinders;
/**
* Flag indicating whether the sub-finders have been processed or not.
@@ -89,17 +89,53 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
* Creates a new aggregated sub-haplotype finder given its sub-finders.
* @param finders set of sub-finders.
*/
- public AggregatedSubHaplotypeFinder(final Collection extends KBestSubHaplotypeFinder> finders) {
+ public AggregatedSubHaplotypeFinder(final Collection finders) {
if (finders == null) throw new IllegalArgumentException("finder collection cannot be null");
this.subFinders = finders;
}
+ @Override
+ public String id() {
+ final StringBuilder resultBuilder = new StringBuilder();
+ for (final KBestSubHaplotypeFinder subFinder : subFinders)
+ resultBuilder.append(subFinder.id());
+ return resultBuilder.toString();
+ }
+
+ @Override
+ public String label() {
+ return "<OR>";
+ }
+
+ @Override
+ public Set> subFinderLabels() {
+ final int subFinderCount = subFinders.size();
+ final String edgeCost = String.format("%.2f",-Math.log10((double) subFinderCount));
+ final Set> result = new LinkedHashSet<>(subFinderCount);
+ for (final KBestSubHaplotypeFinder subFinder : subFinders)
+ result.add(new Pair<>(subFinder,edgeCost));
+ return result;
+ }
+
@Override
public int getCount() {
processSubFindersIfNeeded();
return count;
}
+ @Override
+ public double score(final byte[] bases, final int offset, final int length) {
+ if (bases == null) throw new IllegalArgumentException("bases cannot be null");
+ if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
+ if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+ if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
+ for (final KBestSubHaplotypeFinder subFinder : subFinders) {
+ final double score = subFinder.score(bases,offset,length);
+ if (!Double.isNaN(score)) return score;
+ }
+ return Double.NaN;
+ }
+
private void processSubFindersIfNeeded() {
if (processedSubFinders) return;
@@ -144,6 +180,11 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
return rankedSubHaplotype.get(k);
}
+ @Override
+ public boolean isReference() {
+ return false;
+ }
+
/**
* Custom implementation of {@link KBestHaplotype} to encapsulate sub-finder results.
*/
@@ -167,7 +208,7 @@ class AggregatedSubHaplotypeFinder implements KBestSubHaplotypeFinder {
}
@Override
- public int score() {
+ public double score() {
return result.score();
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
index 36216bdd2..3abd2e4bc 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/BaseGraph.java
@@ -52,6 +52,7 @@ import com.google.java.contract.Requires;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import org.jgrapht.EdgeFactory;
+import org.jgrapht.alg.CycleDetector;
import org.jgrapht.graph.DefaultDirectedGraph;
import java.io.File;
@@ -146,6 +147,39 @@ public class BaseGraph extends Default
return set;
}
+ /**
+ * Convert this kmer graph to a simple sequence graph.
+ *
+ * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
+ * graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
+ *
+ * @return a newly allocated SequenceGraph
+ */
+ public SeqGraph convertToSequenceGraph() {
+
+ final SeqGraph seqGraph = new SeqGraph(kmerSize);
+ final Map vertexMap = new HashMap<>();
+
+
+ // create all of the equivalent seq graph vertices
+ for ( final V dv : vertexSet() ) {
+ final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
+ sv.setAdditionalInfo(dv.additionalInfo());
+ vertexMap.put(dv, sv);
+ seqGraph.addVertex(sv);
+ }
+
+ // walk through the nodes and connect them to their equivalent seq vertices
+ for( final E e : edgeSet() ) {
+ final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
+ final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
+ //logger.info("Adding edge " + seqInV + " -> " + seqOutV);
+ seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
+ }
+
+ return seqGraph;
+ }
+
/**
* Pull out the additional sequence implied by traversing this node in the graph
* @param v the vertex from which to pull out the additional base sequence
@@ -712,4 +746,13 @@ public class BaseGraph extends Default
if (!containsVertex(vertex)) return false;
return true;
}
+
+ /**
+ * Checks for the presence of directed cycles in the graph.
+ *
+ * @return {@code true} if the graph has cycles, {@code false} otherwise.
+ */
+ public boolean hasCycles() {
+ return new CycleDetector<>(this).detectCycles();
+ }
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java
index 69b42cee6..71ce9929b 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/CommonSuffixSplitter.java
@@ -122,7 +122,7 @@ public class CommonSuffixSplitter {
} else {
incomingTarget = prefixV;
graph.addVertex(prefixV);
- graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 0));
+ graph.addEdge(prefixV, suffixV, new BaseEdge(out.isRef(), 1));
edgesToRemove.add(out);
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
index ae270ed7b..5ceaa29c5 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/DeadEndKBestSubHaplotypeFinder.java
@@ -45,6 +45,11 @@
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.Collections;
+import java.util.Set;
+
/**
* Represents a trivial k-best sub haplotype finder with no solutions.
*
@@ -65,6 +70,21 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
protected DeadEndKBestSubHaplotypeFinder() {
}
+ @Override
+ public String id() {
+ return "";
+ }
+
+ @Override
+ public String label() {
+ return "<DEAD>";
+ }
+
+ @Override
+ public Set> subFinderLabels() {
+ return Collections.emptySet();
+ }
+
@Override
public int getCount() {
return 0;
@@ -77,4 +97,18 @@ final class DeadEndKBestSubHaplotypeFinder implements KBestSubHaplotypeFinder {
else
throw new IllegalArgumentException("k cannot be equal or greater to the haplotype count");
}
+
+ @Override
+ public boolean isReference() {
+ return false;
+ }
+
+ @Override
+ public double score(final byte[] bases, final int offset, final int length) {
+ if (bases == null) throw new IllegalArgumentException("bases cannot be null");
+ if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
+ if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+ if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
+ return Double.NaN;
+ }
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
index 0e50ec02b..1a642d200 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/EmptyPathHaplotypeFinder.java
@@ -45,6 +45,12 @@
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.Collections;
+import java.util.Set;
+
/**
* Trivial k-best sub-haplotype finder where the source and sink vertex are the same one.
*
@@ -67,6 +73,21 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
singleHaplotypePath = new MyBestHaplotypePath(graph,vertex);
}
+ @Override
+ public String id() {
+ return "v" + singleHaplotypePath.head().getId();
+ }
+
+ @Override
+ public String label() {
+ return singleHaplotypePath.head().getSequenceString();
+ }
+
+ @Override
+ public Set> subFinderLabels() {
+ return Collections.emptySet();
+ }
+
@Override
public int getCount() {
return 1;
@@ -81,6 +102,24 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
return singleHaplotypePath;
}
+ @Override
+ public boolean isReference() {
+ return singleHaplotypePath.isReference();
+ }
+
+ @Override
+ public double score(final byte[] bases, final int offset, final int length) {
+ if (bases == null) throw new IllegalArgumentException("bases cannot be null");
+ if (offset < 0) throw new IllegalArgumentException("the offset cannot be negative");
+ if (length < 0) throw new IllegalArgumentException("the length cannot be negative");
+ if (offset + length > bases.length) throw new IllegalArgumentException("the offset and length go beyond the array size");
+ final byte[] vertexBases = singleHaplotypePath.head().getSequence();
+ if (length != vertexBases.length)
+ return Double.NaN;
+ else
+ return Utils.equalRange(bases, offset, vertexBases, 0, length)? 0 : Double.NaN;
+ }
+
/**
* Custom extension of {@link KBestHaplotype} that implements the single solution behaviour.
*/
@@ -120,7 +159,7 @@ class EmptyPathHaplotypeFinderNode implements KBestSubHaplotypeFinder {
}
@Override
- public int score() {
+ public double score() {
return 0;
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
index ca22f17ec..d1b5bd614 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotype.java
@@ -68,7 +68,7 @@ public abstract class KBestHaplotype implements Comparable {
*
* @return 0 or greater.
*/
- public abstract int score();
+ public abstract double score();
/**
* Indicates whether this result is the reference haplotype.
@@ -122,6 +122,8 @@ public abstract class KBestHaplotype implements Comparable {
public Haplotype haplotype() {
if (haplotype != null) return haplotype;
haplotype = new Haplotype(bases(),isReference());
+ if (score() > 0)
+ throw new IllegalStateException("score cannot be greater than 0: " + score());
haplotype.setScore(score());
return haplotype;
}
@@ -152,7 +154,35 @@ public abstract class KBestHaplotype implements Comparable {
*/
public int compareTo(final KBestHaplotype other) {
if (other == null) throw new IllegalArgumentException("the other object cannot be null");
- return - 1 * (score() - other.score());
+ return - Double.compare(score(), other.score());
+ }
+
+ @Override
+ public int hashCode() {
+ return haplotype().hashCode();
+ }
+
+ @Override
+ public boolean equals(final Object other) {
+ return other == null ? false: (other instanceof KBestHaplotype ? equals((KBestHaplotype)other) : false);
+ }
+
+ @Override
+ public String toString() {
+ return haplotype().toString() + " Score = " + score();
+ }
+
+ /**
+ * Checks whether both solutions are equal.
+ *
+ * Both solutions are considered equal when the underlying haplotypes are equal. The path on the respective
+ * graph might deffer though.
+ *
+ *
+ * @return {@code true} iff both haplotypes are the same (considering the ref state).
+ */
+ protected boolean equals(final KBestHaplotype other) {
+ return haplotype().equals(other.haplotype(),false);
}
/**
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
index f27cca12c..5e971792c 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestHaplotypeFinder.java
@@ -45,8 +45,13 @@
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.haplotype.Haplotype;
import org.jgrapht.alg.CycleDetector;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintWriter;
import java.util.*;
/**
@@ -233,7 +238,7 @@ public class KBestHaplotypeFinder extends AbstractList implement
}
@Override
- public KBestHaplotype get(int index) {
+ public KBestHaplotype get(final int index) {
if (index < 0 || index >= size())
throw new IndexOutOfBoundsException();
return topFinder.getKBest(index);
@@ -305,28 +310,28 @@ public class KBestHaplotypeFinder extends AbstractList implement
/**
* Creates a finder from a vertex.
*
- * @param source the source vertex for the finder.
+ * @param vertex the source vertex for the finder.
*
* @return never {@code null}, perhaps a finder that returns no haplotypes though.
*/
- protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex source) {
- KBestSubHaplotypeFinder node = finderByVertex.get(source);
- if (node == null) {
- if (sinks.contains(source))
- node = new EmptyPathHaplotypeFinderNode(graph,source);
+ protected KBestSubHaplotypeFinder createVertexFinder(final SeqVertex vertex) {
+ KBestSubHaplotypeFinder finder = finderByVertex.get(vertex);
+ if (finder == null) {
+ if (sinks.contains(vertex))
+ finder = new EmptyPathHaplotypeFinderNode(graph,vertex);
else {
- final Set outgoingEdges = graph.outgoingEdgesOf(source);
+ final Set outgoingEdges = graph.outgoingEdgesOf(vertex);
if (outgoingEdges.isEmpty())
- node = DeadEndKBestSubHaplotypeFinder.INSTANCE;
+ finder = DeadEndKBestSubHaplotypeFinder.INSTANCE;
else {
final Map undeadChildren = createChildrenFinders(outgoingEdges);
- node = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
- new RecursiveSubHaplotypeFinder(source,undeadChildren);
+ finder = undeadChildren.isEmpty() ? DeadEndKBestSubHaplotypeFinder.INSTANCE :
+ new RecursiveSubHaplotypeFinder(graph,vertex,undeadChildren);
}
}
- finderByVertex.put(source, node);
+ finderByVertex.put(vertex, finder);
}
- return node;
+ return finder;
}
/**
@@ -340,7 +345,7 @@ public class KBestHaplotypeFinder extends AbstractList implement
* @return never {@code null}, perhaps an empty map if there is no children with valid paths to any sink for this
* finder.
*/
- private Map createChildrenFinders(Set baseEdges) {
+ private Map createChildrenFinders(final Set baseEdges) {
final Map result = new LinkedHashMap<>(baseEdges.size());
for (final BaseEdge edge : baseEdges) {
final KBestSubHaplotypeFinder targetFinder = createVertexFinder(graph.getEdgeTarget(edge));
@@ -349,4 +354,156 @@ public class KBestHaplotypeFinder extends AbstractList implement
}
return result;
}
+
+ /**
+ * Print a DOT representation of search graph.
+ *
+ * @param out character stream printer where to print the DOT representation to.
+ *
+ * @throws IllegalArgumentException if {@code out} is {@code null}.
+ */
+ public void printDOT(final PrintWriter out) {
+ if (out == null)
+ throw new IllegalArgumentException("the out writer cannot be null");
+ out.println("digraph {");
+ out.println(" rankdir = LR");
+ out.println(" node [shape=box, margin=0.01]");
+ out.println(" subgraph cluster_dummy { style = invis; x [label=\"\",shape=none,margin=0] }");
+ final StringBuilder referenceCluster = new StringBuilder(1000);
+
+ referenceCluster.append(" subgraph cluster_ref {\n");
+ referenceCluster.append(" node [penwidth=2]\n");
+ for (final KBestSubHaplotypeFinder finder : finderByVertex.values() ) {
+ final String id = finder.id();
+ final String line = String.format(" %s [label=<%s>]",id,finder.label());
+ if (finder.isReference())
+ referenceCluster.append(" ").append(line).append('\n');
+ else
+ out.println(line);
+ }
+ referenceCluster.append(" }");
+ out.println(referenceCluster.toString());
+
+ for (final KBestSubHaplotypeFinder finder : finderByVertex.values())
+ for (final Pair extends KBestSubHaplotypeFinder,String> subFinderLabel : finder.subFinderLabels()) {
+ final KBestSubHaplotypeFinder subFinder = subFinderLabel.getFirst();
+
+ final String edgeLabel = subFinderLabel.getSecond();
+ out.println(String.format(" %s -> %s [label=%s]",finder.id(),subFinder.id(),edgeLabel));
+ }
+ out.println("}");
+ }
+
+ /**
+ * Print a DOT representation of search graph.
+ *
+ * @param file file where to print the DOT representation to.
+ *
+ * @throws IllegalArgumentException if {@code file} is {@code null}.
+ * @throws FileNotFoundException if {@code file} cannot be created or written.
+ * @throws IllegalStateException if there was some trouble when writing the DOT representation.
+ */
+ public void printDOT(final File file) throws FileNotFoundException {
+ if (file == null)
+ throw new IllegalArgumentException("the output file cannot be null");
+ final PrintWriter out = new PrintWriter(file);
+ printDOT(out);
+ if (out.checkError())
+ throw new IllegalStateException("error occurred while writing k-best haplotype search graph into file '"
+ + file.getAbsolutePath() + "'");
+ out.close();
+ }
+
+ /**
+ * Print a DOT representation of search graph.
+ *
+ * @param fileName name of the file where to print the DOT representation to.
+ *
+ * @throws IllegalArgumentException if {@code fileName} is {@code null}.
+ * @throws FileNotFoundException if no file named {@code fileName} cannot be created or written.
+ * @throws IllegalStateException if there was some trouble when writing the DOT representation.
+ */
+ @SuppressWarnings("unused") // Available for debugging purposes.
+ public void printDOTFile(final String fileName) throws FileNotFoundException {
+ printDOT(new File(fileName));
+ }
+
+ /**
+ * Get the score of a give sequence of bases
+ *
+ * @param bases the base sequence.
+ *
+ * @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
+ * throw this finder.
+ */
+ public double score(final byte[] bases) {
+ return topFinder.score(bases,0,bases.length);
+ }
+
+ /**
+ * Get the score of a give sequence of bases
+ *
+ * @param haplotype the haplotype.
+ *
+ * @return {@link Double#NaN} if there is no score for the sequence, i.e. there is no such a haplotype accessible
+ * throw this finder.
+ */
+ public double score(final Haplotype haplotype) {
+ return score(haplotype.getBases());
+ }
+
+
+ /**
+ * Returns a unique list of haplotypes solutions.
+ *
+ * The result will not contain more than one haplotype with the same base sequence. The solution of the best
+ * score is returned.
+ *
+ *
+ * This makes sense when there are more than one possible path through the graph to create the same haplotype.
+ *
+ *
+ * The resulting list is sorted by the score with more likely haplotype search results first.
+ *
+ *
+ * @param maxSize maximum number of unique results to return.
+ *
+ * @throws IllegalArgumentException if {@code maxSize} is negative.
+ *
+ * @return never {@code null}, perhaps an empty list.
+ */
+ public List unique(final int maxSize) {
+ if (maxSize < 0) throw new IllegalArgumentException("maxSize cannot be negative");
+ final int requiredCapacity = Math.min(maxSize,size());
+ final Set haplotypes = new HashSet<>(requiredCapacity);
+ int resultSize = 0;
+ final List result = new ArrayList<>(requiredCapacity);
+ for (final KBestHaplotype kbh : this) {
+ if (haplotypes.add(kbh.haplotype())) {
+ result.add(kbh);
+ if (resultSize == maxSize) break;
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Returns a unique list of haplotypes solutions.
+ *
+ *
+ * The result will not contain more than one haplotype with the same base sequence. The solution of the best
+ * score is returned.
+ *
+ *
+ * This makes sense when there are more than one possible path through the graph to create the same haplotype.
+ *
+ *
+ * The resulting list is sorted by the score with more likely haplotype search results first.
+ *
+ *
+ * @return never {@code null}, perhaps an empty list.
+ */
+ public List unique() {
+ return unique(size());
+ }
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
index 9c185b52c..eb3360500 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/KBestSubHaplotypeFinder.java
@@ -45,6 +45,10 @@
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.Set;
+
/**
* Common interface for K-Best sub-haplotype finders.
*
@@ -52,6 +56,29 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
*/
interface KBestSubHaplotypeFinder {
+ /**
+ * Return an unique id for this sub-haplotype finder to be used when outputting diagrams.
+ *
+ * @return never {@code null}.
+ */
+ public String id();
+
+ /**
+ * Returns a label with human readable representation of this finder.
+ *
+ *
This is used when generating a diagram to illustrate the search space and costs
+ *
+ * @return never {@code null}.
+ */
+ public String label();
+
+ /**
+ * Returns the set of subfinder from this finder together with a label for the connection with the current finder.
+ *
+ *
The label is used when generating a diagram to illustrate the search space and costs
+ */
+ public Set> subFinderLabels();
+
/**
* Returns the total number of possible sub-haplotypes.
* @return 0 or greater.
@@ -67,5 +94,22 @@ interface KBestSubHaplotypeFinder {
*
* @return never {@code null}.
*/
- public abstract KBestHaplotype getKBest(int k);
+ public KBestHaplotype getKBest(int k);
+
+ /**
+ * Checks whether the top vertex for this finder is a reference haplotype vertex.
+ *
+ * @return {@code true} iff the top vertex for this finder is a reference vertex.
+ */
+ public boolean isReference();
+
+ /**
+ * Calculate the score of a sequence of bases.
+ *
+ * @param bases array containing the query base sequence.
+ * @param offset first position of the query base sequence in {@code bases} .
+ * @param length length of the query base sequence.
+ * @return {@link Double#NaN} if there is no score for this sequence, otherwise a valid score value.
+ */
+ public double score(byte[] bases, int offset, int length);
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java
index 978d83eb4..657ecfd85 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/MultiSampleEdge.java
@@ -49,20 +49,24 @@ package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
import java.util.PriorityQueue;
/**
- * edge class for connecting nodes in the graph that tracks some per-sample information
- *
+ * Edge class for connecting nodes in the graph that tracks some per-sample information.
+ *
* This class extends BaseEdge with the additional functionality of tracking the maximum
* multiplicity seen within any single sample. The workflow for using this class is:
- *
- * MultiSampleEdge e = new MultiSampleEdge(ref, 1)
- * e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1
- * e.getPruningMultiplicity() // = 1
- * e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
- * e.getPruningMultiplicity() // = 2
- * e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2
- * e.getPruningMultiplicity() // = 2
- * e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
- * e.getPruningMultiplicity() // = 3
+ *
+ *
+ * {@code
+ * MultiSampleEdge e = new MultiSampleEdge(ref, 1)
+ * e.incMultiplicity(1) // total is 2, per sample is 2, max per sample is 1
+ * e.getPruningMultiplicity() // = 1
+ * e.flushSingleSampleMultiplicity() // total is 2, per sample is 0, max per sample is 2
+ * e.getPruningMultiplicity() // = 2
+ * e.incMultiplicity(3) // total is 5, per sample is 3, max per sample is 2
+ * e.getPruningMultiplicity() // = 2
+ * e.flushSingleSampleMultiplicity() // total is 5, per sample is 0, max per sample is 3
+ * e.getPruningMultiplicity() // = 3
+ * }
+ *
*/
public class MultiSampleEdge extends BaseEdge {
private int currentSingleSampleMultiplicity;
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
index 0fbbfdc64..fe85befef 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/RecursiveSubHaplotypeFinder.java
@@ -45,9 +45,10 @@
*/
package org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Map;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.collections.Pair;
+
+import java.util.*;
/**
* General recursive sub-haplotype finder.
@@ -67,7 +68,11 @@ import java.util.Map;
*
* @author Valentin Ruano-Rubio <valentin@broadinstitute.org>
*/
-class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
+class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
+
+
+ private final SeqVertex vertex;
+ private final boolean isReference;
/**
* Creates a recursive sub-haplotype finder give the target graph, first vertex and all possible outgoing edges
@@ -80,20 +85,83 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
* @param vertex first vertex for all sub-haplotype solutions provided by this finder
* @param children map from outgoing edge to the corresponding sub-sub-haplotype finder.
*/
- public RecursiveSubHaplotypeFinder(final SeqVertex vertex,
+ public RecursiveSubHaplotypeFinder(final SeqGraph graph, final SeqVertex vertex,
final Map children) {
super(createChildFinderCollection(vertex, children));
+ this.vertex = vertex;
+ this.isReference = graph.isReferenceNode(vertex);
}
- private static Collection createChildFinderCollection(final SeqVertex vertex, final Map finders) {
+ /**
+ * Wraps the descendant vertices finders in order to take advantage of the {@link AggregatedSubHaplotypeFinder}
+ * common code.
+ *
+ * Automatically calibrates the edge score (cost) so that it takes into account the total across all edges.
+ *
+ *
+ * @param vertex the parent vertex.
+ * @param finders the child vertices indexed by the connecting edge.
+ * @return never {@code null} but potentially an empty collection if there is child returning some sub-haplotype
+ * solution.
+ */
+ private static Collection createChildFinderCollection(final SeqVertex vertex,
+ final Map finders) {
if (finders == null) throw new IllegalArgumentException("the edge to child map cannot be null");
- final Collection result = new ArrayList<>(finders.size());
- for (final Map.Entry e : finders.entrySet())
- result.add(new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue()));
+ final ArrayList result = new ArrayList<>(finders.size());
+ for (final Map.Entry e : finders.entrySet()) {
+ final EdgeSubHaplotypeFinder subFinder = new EdgeSubHaplotypeFinder(vertex,e.getKey(), e.getValue());
+ if (subFinder.getCount() == 0) continue;
+ result.add(subFinder);
+ }
+ if (result.size() == 0)
+ return Collections.emptySet();
+ else if (result.size() == 1) // no calibration needed, by default edgeScore is 0.
+ return Collections.singleton(result.get(0));
+ else {
+ double totalEdgeMultiplicityAcrossEdges = 0;
+ for (final EdgeSubHaplotypeFinder finder : result)
+ totalEdgeMultiplicityAcrossEdges += Math.max(0.5,finder.edge.getMultiplicity());
+ final double log10TotalEdgeMultiplicityAcrossEdges = Math.log10(totalEdgeMultiplicityAcrossEdges);
+ for (final EdgeSubHaplotypeFinder finder : result)
+ finder.calibrateEdgeScore(log10TotalEdgeMultiplicityAcrossEdges);
+ return result;
+ }
+ }
+
+ @Override
+ public boolean isReference() {
+ return isReference;
+ }
+
+ @Override
+ public String label() {
+ return vertex.getSequenceString();
+ }
+
+ @Override
+ public Set> subFinderLabels() {
+ final Set> result = new LinkedHashSet<>(subFinders.size());
+ for (final EdgeSubHaplotypeFinder subFinder : subFinders)
+ result.add(new Pair<>(subFinder,simplifyZeros(String.format("%.4f", subFinder.edgeScore))));
return result;
}
- private static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
+ /**
+ * Removes zeros decimal positions from edge-labels.
+ *
+ * @param edgeLabel the original label to reformat.
+ * @return never {@code null}, the reformatted label.
+ */
+ private String simplifyZeros(final String edgeLabel) {
+ if (edgeLabel.equals("0.000") || edgeLabel.equals("-0.000") )
+ return "0.";
+ int i = edgeLabel.length() - 1;
+ while (edgeLabel.charAt(i) == '0')
+ i--;
+ return (i == edgeLabel.length() - 1) ? edgeLabel : edgeLabel.substring(0,i);
+ }
+
+ protected static class EdgeSubHaplotypeFinder implements KBestSubHaplotypeFinder {
private final KBestSubHaplotypeFinder childFinder;
@@ -101,10 +169,32 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
private final BaseEdge edge;
+ private double edgeScore = 0;
+
private EdgeSubHaplotypeFinder(final SeqVertex vertex, final BaseEdge edge, final KBestSubHaplotypeFinder childFinder) {
this.childFinder = childFinder;
this.edge = edge;
this.vertex = vertex;
+ this.edgeScore = 0;
+ }
+
+ private void calibrateEdgeScore(final double log10TotalMultiplicityAcrossOutgoingEdges) {
+ edgeScore = Math.log10(Math.max(edge.getMultiplicity(),0.5)) - log10TotalMultiplicityAcrossOutgoingEdges;
+ }
+
+ @Override
+ public String id() {
+ return childFinder.id();
+ }
+
+ @Override
+ public String label() {
+ return childFinder.label();
+ }
+
+ @Override
+ public Set> subFinderLabels() {
+ return childFinder.subFinderLabels();
}
@Override
@@ -114,8 +204,31 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
@Override
public KBestHaplotype getKBest(int k) {
- return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k));
+ return new ChildKBestSubHaplotype(vertex,edge,childFinder.getKBest(k),edgeScore);
}
+
+ @Override
+ public boolean isReference() {
+ return childFinder.isReference();
+ }
+
+ @Override
+ public double score(final byte[] bases, final int offset, final int length) {
+ if (length == 0)
+ return 0;
+ final byte[] vertexSequence = vertex.getSequence();
+ if (length < vertexSequence.length) // query is not long enough to have any score.
+ return Double.NaN;
+ else if (!Utils.equalRange(vertexSequence,0,bases,offset,vertexSequence.length))
+ return Double.NaN;
+ else
+ return edgeScore + childFinder.score(bases,offset + vertexSequence.length,length - vertexSequence.length);
+ }
+ }
+
+ @Override
+ public String id() {
+ return "v" + vertex.getId();
}
/**
@@ -129,13 +242,14 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
*/
private static class ChildKBestSubHaplotype extends KBestHaplotype {
- private final int score;
+ private final double score;
private final KBestHaplotype child;
private final SeqVertex vertex;
private final boolean isReference;
- public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child) {
- this.score = edge.getMultiplicity() + child.score();
+
+ public ChildKBestSubHaplotype(final SeqVertex vertex, final BaseEdge edge, final KBestHaplotype child, final double edgeScore) {
+ this.score = edgeScore + child.score();
this.vertex = vertex;
this.child = child;
this.isReference = edge.isRef() && child.isReference();
@@ -147,7 +261,7 @@ class RecursiveSubHaplotypeFinder extends AggregatedSubHaplotypeFinder {
}
@Override
- public int score() {
+ public double score() {
return score;
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java
index c8c6abb86..087c07be7 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SeqGraph.java
@@ -291,16 +291,9 @@ public class SeqGraph extends BaseGraph {
final SeqVertex addedVertex = mergeLinearChainVertices(linearChain);
addVertex(addedVertex);
- final Set inEdges = incomingEdgesOf(first);
- final Set outEdges = outgoingEdgesOf(last);
-
- final int nEdges = inEdges.size() + outEdges.size();
- int sharedWeightAmongEdges = nEdges == 0 ? 0 : sumEdgeWeightAlongChain(linearChain) / nEdges;
- final BaseEdge inc = new BaseEdge(false, sharedWeightAmongEdges); // template to make .add function call easy
-
// update the incoming and outgoing edges to point to the new vertex
- for( final BaseEdge edge : outEdges ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy().add(inc)); }
- for( final BaseEdge edge : inEdges ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy().add(inc)); }
+ for( final BaseEdge edge : outgoingEdgesOf(last) ) { addEdge(addedVertex, getEdgeTarget(edge), edge.copy()); }
+ for( final BaseEdge edge : incomingEdgesOf(first) ) { addEdge(getEdgeSource(edge), addedVertex, edge.copy()); }
removeAllVertices(linearChain);
return true;
@@ -313,29 +306,6 @@ public class SeqGraph extends BaseGraph {
return new SeqVertex( seqsCat );
}
- /**
- * Get the sum of the edge weights on a linear chain of at least 2 elements
- *
- * @param chain a linear chain of vertices with at least 2 vertices
- * @return the sum of the multiplicities along all edges connecting vertices within the chain
- */
- @Requires({"chain != null", "chain.size() >= 2"})
- private int sumEdgeWeightAlongChain(final LinkedList chain) {
- int sum = 0;
- SeqVertex prev = null;
-
- for ( final SeqVertex v : chain ) {
- if ( prev != null ) {
- final BaseEdge e = getEdge(prev, v);
- if ( e == null ) throw new IllegalStateException("Something wrong with the linear chain, got a null edge between " + prev + " and " + v);
- sum += e.getMultiplicity();
- }
- prev = v;
- }
-
- return sum;
- }
-
/**
* Base class for transformation operations that need to iterate over proposed vertices, where
* each proposed vertex is a seed vertex for a potential transformation.
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java
index 284062749..401cbf18c 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/graphs/SharedVertexSequenceSplitter.java
@@ -247,12 +247,12 @@ public class SharedVertexSequenceSplitter {
if ( needPrefixNode ) {
outer.addVertex(prefixV);
- if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 0));
+ if ( top != null ) outer.addEdge(top, prefixV, BaseEdge.orRef(splitGraph.outgoingEdgesOf(prefixV), 1));
}
if ( needSuffixNode ) {
outer.addVertex(suffixV);
- if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 0));
+ if ( bot != null ) outer.addEdge(suffixV, bot, BaseEdge.orRef(splitGraph.incomingEdgesOf(suffixV), 1));
}
if ( topForConnect != null ) {
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
index a7989ac2c..e03e26e0a 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/haplotypecaller/readthreading/ReadThreadingGraph.java
@@ -52,7 +52,6 @@ import org.broadinstitute.sting.gatk.walkers.haplotypecaller.Kmer;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.graphs.*;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.jgrapht.alg.CycleDetector;
import java.io.File;
import java.util.*;
@@ -88,8 +87,7 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
/**
*
*/
-
- final boolean debugGraphTransformations;
+ private final boolean debugGraphTransformations;
final byte minBaseQualityToUseInAssembly;
protected boolean increaseCountsBackwards = true;
@@ -319,13 +317,6 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
removeAllVertices(verticesToRemove);
}
- /**
- * @return true if the graph has cycles, false otherwise
- */
- public boolean hasCycles() {
- return new CycleDetector<>(this).detectCycles();
- }
-
/**
* Does the graph not have enough complexity? We define low complexity as a situation where the number
* of non-unique kmers is more than 20% of the total number of kmers.
@@ -419,39 +410,10 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
return counter.getKmersWithCountsAtLeast(2);
}
- /**
- * Convert this kmer graph to a simple sequence graph.
- *
- * Each kmer suffix shows up as a distinct SeqVertex, attached in the same structure as in the kmer
- * graph. Nodes that are sources are mapped to SeqVertex nodes that contain all of their sequence
- *
- * @return a newly allocated SequenceGraph
- */
- // TODO -- should override base class method
+ @Override
public SeqGraph convertToSequenceGraph() {
buildGraphIfNecessary();
-
- final SeqGraph seqGraph = new SeqGraph(kmerSize);
- final Map vertexMap = new HashMap<>();
-
-
- // create all of the equivalent seq graph vertices
- for ( final MultiDeBruijnVertex dv : vertexSet() ) {
- final SeqVertex sv = new SeqVertex(dv.getAdditionalSequence(isSource(dv)));
- sv.setAdditionalInfo(dv.additionalInfo());
- vertexMap.put(dv, sv);
- seqGraph.addVertex(sv);
- }
-
- // walk through the nodes and connect them to their equivalent seq vertices
- for( final MultiSampleEdge e : edgeSet() ) {
- final SeqVertex seqInV = vertexMap.get(getEdgeSource(e));
- final SeqVertex seqOutV = vertexMap.get(getEdgeTarget(e));
- //logger.info("Adding edge " + seqInV + " -> " + seqOutV);
- seqGraph.addEdge(seqInV, seqOutV, new BaseEdge(e.isRef(), e.getMultiplicity()));
- }
-
- return seqGraph;
+ return super.convertToSequenceGraph();
}
private void increaseCountsInMatchedKmers(final SequenceForKmers seqForKmers,
@@ -749,15 +711,15 @@ public class ReadThreadingGraph extends DanglingChainMergingGraph implements Kme
}
private static String pathElementId(final String element) {
- final int parentesysPos = element.indexOf('(');
+ final int openBracketPosition = element.indexOf('(');
- if (parentesysPos == -1)
+ if (openBracketPosition == -1)
return null;
- final int closeParentesysPos = element.lastIndexOf(')');
- if (closeParentesysPos == -1)
+ final int closeBracketPosition = element.lastIndexOf(')');
+ if (closeBracketPosition == -1)
throw new IllegalArgumentException("non-closed id parantesys found in element: " + element);
- final String result = element.substring(parentesysPos + 1,closeParentesysPos).trim();
+ final String result = element.substring(openBracketPosition + 1,closeBracketPosition).trim();
if (result.isEmpty())
throw new IllegalArgumentException("empty id found in element: " + element);
return result;
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index 1f355359d..f16399e62 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -247,7 +247,7 @@ public class VariantDataManager {
logger.warn( "WARNING: Training with very few variant sites! Please check the model reporting PDF to ensure the quality of the model is reliable." );
} else if( trainingData.size() > VRAC.MAX_NUM_TRAINING_DATA ) {
logger.warn( "WARNING: Very large training set detected. Downsampling to " + VRAC.MAX_NUM_TRAINING_DATA + " training variants." );
- Collections.shuffle(trainingData);
+ Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator());
return trainingData.subList(0, VRAC.MAX_NUM_TRAINING_DATA);
}
return trainingData;
@@ -295,13 +295,13 @@ public class VariantDataManager {
public List getRandomDataForPlotting( final int numToAdd, final List trainingData, final List antiTrainingData, final List evaluationData ) {
final List returnData = new ExpandingArrayList<>();
- Collections.shuffle(trainingData);
- Collections.shuffle(antiTrainingData);
- Collections.shuffle(evaluationData);
+ Collections.shuffle(trainingData, GenomeAnalysisEngine.getRandomGenerator());
+ Collections.shuffle(antiTrainingData, GenomeAnalysisEngine.getRandomGenerator());
+ Collections.shuffle(evaluationData, GenomeAnalysisEngine.getRandomGenerator());
returnData.addAll(trainingData.subList(0, Math.min(numToAdd, trainingData.size())));
returnData.addAll(antiTrainingData.subList(0, Math.min(numToAdd, antiTrainingData.size())));
returnData.addAll(evaluationData.subList(0, Math.min(numToAdd, evaluationData.size())));
- Collections.shuffle(returnData);
+ Collections.shuffle(returnData, GenomeAnalysisEngine.getRandomGenerator());
return returnData;
}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java
index 3d1a9da57..b8a0585a2 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CalculateGenotypePosteriors.java
@@ -50,6 +50,7 @@ import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@@ -59,6 +60,8 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
import org.broadinstitute.sting.gatk.walkers.haplotypecaller.PairHMMLikelihoodCalculationEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.help.HelpConstants;
import org.broadinstitute.sting.utils.variant.GATKVCFUtils;
import org.broadinstitute.sting.utils.variant.GATKVariantContextUtils;
import org.broadinstitute.variant.variantcontext.VariantContext;
@@ -148,6 +151,7 @@ import java.util.*;
*
*
*/
+@DocumentedGATKFeature( groupName = HelpConstants.DOCS_CAT_VARMANIP, extraDocs = {CommandLineGATK.class} )
public class CalculateGenotypePosteriors extends RodWalker {
/**
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java
index 0f577cb23..580380513 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/gatk/walkers/variantutils/CombineGVCFs.java
@@ -134,6 +134,9 @@ public class CombineGVCFs extends RodWalker 1 ? startingStates.refBases[1] : (byte)'N');
}
@@ -289,7 +292,8 @@ public class CombineGVCFs extends RodWalker attrs = new HashMap<>(1);
- attrs.put(VCFConstants.END_KEY, Integer.toString(end));
+ if ( !USE_BP_RESOLUTION )
+ attrs.put(VCFConstants.END_KEY, Integer.toString(end));
// genotypes
final GenotypesContext genotypes = GenotypesContext.create();
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java
new file mode 100644
index 000000000..ea93ebe4a
--- /dev/null
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/DebugJNILoglessPairHMM.java
@@ -0,0 +1,517 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012 Broad Institute, Inc.
+* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 4. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 5. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 6. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 7. MISCELLANEOUS
+* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
+import org.broadinstitute.sting.utils.haplotype.Haplotype;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import static org.broadinstitute.sting.utils.pairhmm.PairHMMModel.*;
+
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.BufferedWriter;
+import java.util.Map;
+import java.util.HashMap;
+import java.io.IOException;
+
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin, carneiro
+ * Date: 10/16/12
+ */
+public class DebugJNILoglessPairHMM extends LoglessPairHMM {
+
+ private static final boolean dumpSandboxOnly = false; //simulates ifdef
+ private static final boolean debug = false; //simulates ifdef
+ private static final boolean verify = !dumpSandboxOnly && (debug || true); //simulates ifdef
+ private static final boolean debug0_1 = false; //simulates ifdef
+ private static final boolean debug1 = false; //simulates ifdef
+ private static final boolean debug2 = false;
+ private static final boolean debug3 = false;
+
+ //Debugging stats
+ private int numCalls = 0;
+ private int numComputeLikelihoodCalls = 0;
+ protected HashMap filenameToWriter = new HashMap();
+
+ private JNILoglessPairHMM jniPairHMM = null;
+ public DebugJNILoglessPairHMM(final PairHMM.HMM_IMPLEMENTATION hmmType) {
+ super();
+ switch(hmmType) {
+ case VECTOR_LOGLESS_CACHING:
+ jniPairHMM = new VectorLoglessPairHMM();
+ break;
+ default:
+ throw new UserException.BadArgumentValue("pairHMM","Specified JNIPairHMM implementation is unrecognized or incompatible with the HaplotypeCaller. Acceptable options are VECTOR_LOGLESS_CACHING");
+ }
+ }
+
+ @Override
+ public void close()
+ {
+ jniPairHMM.close();
+ debugClose();
+ }
+
+ //Used only when testing parts of the compute kernel
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final int readMaxLength, final int haplotypeMaxLength ) {
+ if(verify)
+ super.initialize(readMaxLength, haplotypeMaxLength);
+ if(debug3)
+ {
+ System.out.println("Java: alloc initialized readMaxLength : "+readMaxLength+" haplotypeMaxLength : "+haplotypeMaxLength);
+ debugDump("lengths_java.txt", String.format("%d %d\n",readMaxLength, haplotypeMaxLength),
+ true);
+ }
+ if(debug2)
+ jniInitialize(readMaxLength, haplotypeMaxLength);
+ }
+
+ private HashMap haplotypeToHaplotypeListIdxMap = null;
+ //Used to transfer data to JNI
+ //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final List haplotypes, final Map> perSampleReadList,
+ final int readMaxLength, final int haplotypeMaxLength ) {
+ if(verify)
+ {
+ super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
+ jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
+ haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap();
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void finalizeRegion()
+ {
+ if(!dumpSandboxOnly)
+ jniPairHMM.finalizeRegion();
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public PerReadAlleleLikelihoodMap computeLikelihoods( final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap ) {
+ // (re)initialize the pairHMM only if necessary
+ final int readMaxLength = verify ? findMaxReadLength(reads) : 0;
+ final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0;
+ if(verify)
+ {
+ if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength)
+ { initialize(readMaxLength, haplotypeMaxLength); }
+ if ( ! initialized )
+ throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode");
+ }
+ int readListSize = reads.size();
+ int numHaplotypes = alleleHaplotypeMap.size();
+ int numTestcases = readListSize*numHaplotypes;
+ if(debug0_1)
+ System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
+ int idx = 0;
+ for(GATKSAMRecord read : reads)
+ {
+ byte [] overallGCP = GCPArrayMap.get(read);
+ if(debug0_1)
+ System.out.println("Java read length "+read.getReadBases().length);
+ if(debug3)
+ {
+ for(int i=0;i currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
+ {
+ byte[] haplotypeBases = currEntry.getValue().getBases();
+ if(debug0_1)
+ System.out.println("Java haplotype length "+haplotypeBases.length);
+ if(debug3)
+ {
+ for(int i=0;i currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
+ {
+ idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
+ likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
+ ++idx;
+ }
+ readIdx += numHaplotypes;
+ }
+ //for floating point values, no exact equality
+ //check whether numbers are close in terms of abs_error or relative_error
+ //For very large values, relative_error is relevant
+ //For very small values, abs_error is relevant
+ for(int i=0;i 1e-5 && relative_error > 1e-5)
+ {
+ toDump = true;
+ break;
+ }
+ }
+ }
+ //if numbers are not close, then dump out the data that produced the inconsistency
+ if(toDump)
+ {
+ idx = 0;
+ System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
+ boolean firstLine = true;
+ for(GATKSAMRecord read : reads)
+ {
+ byte [] overallGCP = GCPArrayMap.get(read);
+ byte[] tmpByteArray = new byte[read.getReadBases().length];
+ for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
+ {
+ byte[] haplotypeBases = currEntry.getValue().getBases();
+ debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
+ debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
+ for(int k=0;k currEntry : filenameToWriter.entrySet()) {
+ BufferedWriter currWriter = currEntry.getValue();
+ try
+ {
+ currWriter.flush();
+ currWriter.close();
+ }
+ catch(IOException e)
+ {
+ e.printStackTrace();
+
+ }
+ }
+ filenameToWriter.clear();
+ }
+}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java
new file mode 100644
index 000000000..f039cc295
--- /dev/null
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java
@@ -0,0 +1,63 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012 Broad Institute, Inc.
+* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 4. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 5. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 6. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 7. MISCELLANEOUS
+* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import org.broadinstitute.sting.utils.haplotype.Haplotype;
+
+import java.util.HashMap;
+
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin, carneiro
+ * Date: 10/16/12
+ */
+public abstract class JNILoglessPairHMM extends LoglessPairHMM {
+ public abstract HashMap getHaplotypeToHaplotypeListIdxMap();
+ protected long setupTime = 0;
+
+}
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java
index ed35e6970..31a0d1363 100644
--- a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/LoglessPairHMM.java
@@ -66,7 +66,7 @@ public class LoglessPairHMM extends N2MemoryPairHMM {
protected static final double TRISTATE_CORRECTION = 3.0;
-
+
/**
* {@inheritDoc}
*/
diff --git a/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java
new file mode 100644
index 000000000..e69d9ea50
--- /dev/null
+++ b/protected/gatk-protected/src/main/java/org/broadinstitute/sting/utils/pairhmm/VectorLoglessPairHMM.java
@@ -0,0 +1,340 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012 Broad Institute, Inc.
+* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 4. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 5. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 6. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 7. MISCELLANEOUS
+* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.utils.pairhmm;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.QualityUtils;
+
+import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
+import org.broadinstitute.sting.utils.haplotype.Haplotype;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.variant.variantcontext.Allele;
+
+import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
+
+//For loading library from jar
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin, carneiro
+ * Date: 10/16/12
+ */
+public class VectorLoglessPairHMM extends JNILoglessPairHMM {
+
+ //For machine capabilities
+ public static final long sse41Mask = 1;
+ public static final long sse42Mask = 2;
+ public static final long avxMask = 4;
+ public static final long enableAll = 0xFFFFFFFFFFFFFFFFl;
+
+ //Used to copy references to byteArrays to JNI from reads
+ protected class JNIReadDataHolderClass {
+ public byte[] readBases = null;
+ public byte[] readQuals = null;
+ public byte[] insertionGOP = null;
+ public byte[] deletionGOP = null;
+ public byte[] overallGCP = null;
+ }
+
+ //Used to copy references to byteArrays to JNI from haplotypes
+ protected class JNIHaplotypeDataHolderClass {
+ public byte[] haplotypeBases = null;
+ }
+
+ /**
+ * Return 64-bit mask representing machine capabilities
+ * Bit 0 is LSB, bit 63 MSB
+ * Bit 0 represents sse4.1 availability
+ * Bit 1 represents sse4.2 availability
+ * Bit 2 represents AVX availability
+ */
+ public native long jniGetMachineType();
+
+ /**
+ * Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
+ * C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
+ * change per JVM session
+ * @param readDataHolderClass class type of JNIReadDataHolderClass
+ * @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
+ * @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
+ * */
+ private native void jniInitializeClassFieldsAndMachineMask(Class> readDataHolderClass, Class> haplotypeDataHolderClass, long mask);
+
+ private static Boolean isVectorLoglessPairHMMLibraryLoaded = false;
+ //The constructor is called only once inside PairHMMLikelihoodCalculationEngine
+ public VectorLoglessPairHMM() {
+ super();
+
+ logger.warn("WARNING: the VectorLoglessPairHMM is an experimental implementation still under active development. " +
+ "Use at your own risk!");
+
+ synchronized(isVectorLoglessPairHMMLibraryLoaded) {
+ //Load the library and initialize the FieldIDs
+ if(!isVectorLoglessPairHMMLibraryLoaded) {
+ try
+ {
+ //Try loading from Java's library path first
+ //Useful if someone builds his/her own library and wants to override the bundled
+ //implementation without modifying the Java code
+ System.loadLibrary("VectorLoglessPairHMM");
+ logger.info("libVectorLoglessPairHMM found in JVM library path");
+ }
+ catch(UnsatisfiedLinkError ule)
+ {
+ //Could not load from Java's library path - try unpacking from jar
+ try
+ {
+ logger.debug("libVectorLoglessPairHMM not found in JVM library path - trying to unpack from StingUtils.jar");
+ loadLibraryFromJar("/org/broadinstitute/sting/utils/pairhmm/libVectorLoglessPairHMM.so");
+ logger.debug("libVectorLoglessPairHMM unpacked successfully from StingUtils.jar");
+ }
+ catch(IOException ioe)
+ {
+ //Throw the UnsatisfiedLinkError to make it clear to the user what failed
+ throw ule;
+ }
+ }
+
+ isVectorLoglessPairHMMLibraryLoaded = true;
+ jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
+ }
+ }
+ }
+
+ private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
+ //Hold the mapping between haplotype and index in the list of Haplotypes passed to initialize
+ //Use this mapping in computeLikelihoods to find the likelihood value corresponding to a given Haplotype
+ private HashMap haplotypeToHaplotypeListIdxMap = new HashMap();
+ private JNIHaplotypeDataHolderClass[] mHaplotypeDataArray;
+ @Override
+ public HashMap getHaplotypeToHaplotypeListIdxMap() { return haplotypeToHaplotypeListIdxMap; }
+
+ //Used to transfer data to JNI
+ //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void initialize( final List haplotypes, final Map> perSampleReadList,
+ final int readMaxLength, final int haplotypeMaxLength ) {
+ int numHaplotypes = haplotypes.size();
+ mHaplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
+ int idx = 0;
+ haplotypeToHaplotypeListIdxMap.clear();
+ for(final Haplotype currHaplotype : haplotypes)
+ {
+ mHaplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
+ mHaplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
+ haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx);
+ ++idx;
+ }
+ jniInitializeHaplotypes(numHaplotypes, mHaplotypeDataArray);
+ }
+ /**
+ * Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
+ * accessing Java memory directly, still important to release memory from C++
+ */
+ private native void jniFinalizeRegion();
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void finalizeRegion()
+ {
+ jniFinalizeRegion();
+ }
+
+ /**
+ * Real compute kernel
+ */
+ private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
+ JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public PerReadAlleleLikelihoodMap computeLikelihoods( final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap ) {
+ if(doProfiling)
+ startTime = System.nanoTime();
+ int readListSize = reads.size();
+ int numHaplotypes = alleleHaplotypeMap.size();
+ int numTestcases = readListSize*numHaplotypes;
+ JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
+ int idx = 0;
+ for(GATKSAMRecord read : reads)
+ {
+ readDataArray[idx] = new JNIReadDataHolderClass();
+ readDataArray[idx].readBases = read.getReadBases();
+ readDataArray[idx].readQuals = read.getBaseQualities();
+ readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
+ readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
+ readDataArray[idx].overallGCP = GCPArrayMap.get(read);
+ ++idx;
+ }
+
+ mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results
+ if(doProfiling)
+ setupTime += (System.nanoTime() - startTime);
+ //for(reads)
+ // for(haplotypes)
+ // compute_full_prob()
+ jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, mHaplotypeDataArray, mLikelihoodArray, 12);
+
+ final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
+ idx = 0;
+ int idxInsideHaplotypeList = 0;
+ int readIdx = 0;
+ for(GATKSAMRecord read : reads)
+ {
+ for (Map.Entry currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
+ {
+ //Since the order of haplotypes in the List and alleleHaplotypeMap is different,
+ //get idx of current haplotype in the list and use this idx to get the right likelihoodValue
+ idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
+ likelihoodMap.add(read, currEntry.getKey(), mLikelihoodArray[readIdx + idxInsideHaplotypeList]);
+ ++idx;
+ }
+ readIdx += numHaplotypes;
+ }
+ if(doProfiling)
+ computeTime += (System.nanoTime() - startTime);
+ return likelihoodMap;
+ }
+
+ /**
+ * Print final profiling information from native code
+ */
+ public native void jniClose();
+ @Override
+ public void close()
+ {
+ System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9));
+ super.close();
+ jniClose();
+ }
+
+ //Copied from http://frommyplayground.com/how-to-load-native-jni-library-from-jar
+ /**
+ * Loads library from current JAR archive
+ *
+ * The file from JAR is copied into system temporary directory and then loaded. The temporary file is deleted after exiting.
+ * Method uses String as filename because the pathname is "abstract", not system-dependent.
+ *
+ * @param path The filename inside JAR as absolute path (beginning with '/'), e.g. /package/File.ext
+ * @throws IOException If temporary file creation or read/write operation fails
+ * @throws IllegalArgumentException If source file (param path) does not exist
+ * @throws IllegalArgumentException If the path is not absolute or if the filename is shorter than three characters (restriction of {@see File#createTempFile(java.lang.String, java.lang.String)}).
+ */
+ public static void loadLibraryFromJar(String path) throws IOException {
+
+ if (!path.startsWith("/")) {
+ throw new IllegalArgumentException("The path to be absolute (start with '/').");
+ }
+
+ // Obtain filename from path
+ String[] parts = path.split("/");
+ String filename = (parts.length > 1) ? parts[parts.length - 1] : null;
+
+ // Split filename to prexif and suffix (extension)
+ String prefix = "";
+ String suffix = null;
+ if (filename != null) {
+ parts = filename.split("\\.", 2);
+ prefix = parts[0];
+ suffix = (parts.length > 1) ? "."+parts[parts.length - 1] : null; // Thanks, davs! :-)
+ }
+
+ // Check if the filename is okay
+ if (filename == null || prefix.length() < 3) {
+ throw new IllegalArgumentException("The filename has to be at least 3 characters long.");
+ }
+
+ // Prepare temporary file
+ File temp = File.createTempFile(prefix, suffix);
+ //System.out.println("Temp lib file "+temp.getAbsolutePath());
+ temp.deleteOnExit();
+
+ if (!temp.exists()) {
+ throw new FileNotFoundException("File " + temp.getAbsolutePath() + " does not exist.");
+ }
+
+ // Prepare buffer for data copying
+ byte[] buffer = new byte[1024];
+ int readBytes;
+
+ // Open and check input stream
+ InputStream is = VectorLoglessPairHMM.class.getResourceAsStream(path);
+ if (is == null) {
+ throw new FileNotFoundException("File " + path + " was not found inside JAR.");
+ }
+
+ // Open output stream and copy data between source file in JAR and the temporary file
+ OutputStream os = new FileOutputStream(temp);
+ try {
+ while ((readBytes = is.read(buffer)) != -1) {
+ os.write(buffer, 0, readBytes);
+ }
+ } finally {
+ // If read/write fails, close streams safely before throwing an exception
+ os.close();
+ is.close();
+ }
+
+ // Finally, load the library
+ System.load(temp.getAbsolutePath());
+ }
+}
diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java
index b1c280748..0ec1dd996 100644
--- a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/RankSumUnitTest.java
@@ -46,6 +46,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.MannWhitneyU;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
@@ -75,9 +76,9 @@ public class RankSumUnitTest {
makeDistribution(distribution20_40, 40, skew, observations/2);
// shuffle the observations
- Collections.shuffle(distribution20);
- Collections.shuffle(distribution30);
- Collections.shuffle(distribution20_40);
+ Collections.shuffle(distribution20, GenomeAnalysisEngine.getRandomGenerator());
+ Collections.shuffle(distribution30, GenomeAnalysisEngine.getRandomGenerator());
+ Collections.shuffle(distribution20_40, GenomeAnalysisEngine.getRandomGenerator());
}
private static void makeDistribution(final List result, final int target, final int skew, final int numObservations) {
diff --git a/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatioUnitTest.java b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatioUnitTest.java
new file mode 100644
index 000000000..562736f0a
--- /dev/null
+++ b/protected/gatk-protected/src/test/java/org/broadinstitute/sting/gatk/walkers/annotator/StrandOddsRatioUnitTest.java
@@ -0,0 +1,100 @@
+/*
+* By downloading the PROGRAM you agree to the following terms of use:
+*
+* BROAD INSTITUTE - SOFTWARE LICENSE AGREEMENT - FOR ACADEMIC NON-COMMERCIAL RESEARCH PURPOSES ONLY
+*
+* This Agreement is made between the Broad Institute, Inc. with a principal address at 7 Cambridge Center, Cambridge, MA 02142 (BROAD) and the LICENSEE and is effective at the date the downloading is completed (EFFECTIVE DATE).
+*
+* WHEREAS, LICENSEE desires to license the PROGRAM, as defined hereinafter, and BROAD wishes to have this PROGRAM utilized in the public interest, subject only to the royalty-free, nonexclusive, nontransferable license rights of the United States Government pursuant to 48 CFR 52.227-14; and
+* WHEREAS, LICENSEE desires to license the PROGRAM and BROAD desires to grant a license on the following terms and conditions.
+* NOW, THEREFORE, in consideration of the promises and covenants made herein, the parties hereto agree as follows:
+*
+* 1. DEFINITIONS
+* 1.1 PROGRAM shall mean copyright in the object code and source code known as GATK2 and related documentation, if any, as they exist on the EFFECTIVE DATE and can be downloaded from http://www.broadinstitute/GATK on the EFFECTIVE DATE.
+*
+* 2. LICENSE
+* 2.1 Grant. Subject to the terms of this Agreement, BROAD hereby grants to LICENSEE, solely for academic non-commercial research purposes, a non-exclusive, non-transferable license to: (a) download, execute and display the PROGRAM and (b) create bug fixes and modify the PROGRAM.
+* The LICENSEE may apply the PROGRAM in a pipeline to data owned by users other than the LICENSEE and provide these users the results of the PROGRAM provided LICENSEE does so for academic non-commercial purposes only. For clarification purposes, academic sponsored research is not a commercial use under the terms of this Agreement.
+* 2.2 No Sublicensing or Additional Rights. LICENSEE shall not sublicense or distribute the PROGRAM, in whole or in part, without prior written permission from BROAD. LICENSEE shall ensure that all of its users agree to the terms of this Agreement. LICENSEE further agrees that it shall not put the PROGRAM on a network, server, or other similar technology that may be accessed by anyone other than the LICENSEE and its employees and users who have agreed to the terms of this agreement.
+* 2.3 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon LICENSEE by implication, estoppel, or otherwise to any computer software, trademark, intellectual property, or patent rights of BROAD, or of any other entity, except as expressly granted herein. LICENSEE agrees that the PROGRAM, in whole or part, shall not be used for any commercial purpose, including without limitation, as the basis of a commercial software or hardware product or to provide services. LICENSEE further agrees that the PROGRAM shall not be copied or otherwise adapted in order to circumvent the need for obtaining a license for use of the PROGRAM.
+*
+* 3. OWNERSHIP OF INTELLECTUAL PROPERTY
+* LICENSEE acknowledges that title to the PROGRAM shall remain with BROAD. The PROGRAM is marked with the following BROAD copyright notice and notice of attribution to contributors. LICENSEE shall retain such notice on all copies. LICENSEE agrees to include appropriate attribution if any results obtained from use of the PROGRAM are included in any publication.
+* Copyright 2012 Broad Institute, Inc.
+* Notice of attribution: The GATK2 program was made available through the generosity of Medical and Population Genetics program at the Broad Institute, Inc.
+* LICENSEE shall not use any trademark or trade name of BROAD, or any variation, adaptation, or abbreviation, of such marks or trade names, or any names of officers, faculty, students, employees, or agents of BROAD except as states above for attribution purposes.
+*
+* 4. INDEMNIFICATION
+* LICENSEE shall indemnify, defend, and hold harmless BROAD, and their respective officers, faculty, students, employees, associated investigators and agents, and their respective successors, heirs and assigns, (Indemnitees), against any liability, damage, loss, or expense (including reasonable attorneys fees and expenses) incurred by or imposed upon any of the Indemnitees in connection with any claims, suits, actions, demands or judgments arising out of any theory of liability (including, without limitation, actions in the form of tort, warranty, or strict liability and regardless of whether such action has any factual basis) pursuant to any right or license granted under this Agreement.
+*
+* 5. NO REPRESENTATIONS OR WARRANTIES
+* THE PROGRAM IS DELIVERED AS IS. BROAD MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY KIND CONCERNING THE PROGRAM OR THE COPYRIGHT, EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR THE ABSENCE OF LATENT OR OTHER DEFECTS, WHETHER OR NOT DISCOVERABLE. BROAD EXTENDS NO WARRANTIES OF ANY KIND AS TO PROGRAM CONFORMITY WITH WHATEVER USER MANUALS OR OTHER LITERATURE MAY BE ISSUED FROM TIME TO TIME.
+* IN NO EVENT SHALL BROAD OR ITS RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AFFILIATED INVESTIGATORS AND AFFILIATES BE LIABLE FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES OF ANY KIND, INCLUDING, WITHOUT LIMITATION, ECONOMIC DAMAGES OR INJURY TO PROPERTY AND LOST PROFITS, REGARDLESS OF WHETHER BROAD SHALL BE ADVISED, SHALL HAVE OTHER REASON TO KNOW, OR IN FACT SHALL KNOW OF THE POSSIBILITY OF THE FOREGOING.
+*
+* 6. ASSIGNMENT
+* This Agreement is personal to LICENSEE and any rights or obligations assigned by LICENSEE without the prior written consent of BROAD shall be null and void.
+*
+* 7. MISCELLANEOUS
+* 7.1 Export Control. LICENSEE gives assurance that it will comply with all United States export control laws and regulations controlling the export of the PROGRAM, including, without limitation, all Export Administration Regulations of the United States Department of Commerce. Among other things, these laws and regulations prohibit, or require a license for, the export of certain types of software to specified countries.
+* 7.2 Termination. LICENSEE shall have the right to terminate this Agreement for any reason upon prior written notice to BROAD. If LICENSEE breaches any provision hereunder, and fails to cure such breach within thirty (30) days, BROAD may terminate this Agreement immediately. Upon termination, LICENSEE shall provide BROAD with written assurance that the original and all copies of the PROGRAM have been destroyed, except that, upon prior written authorization from BROAD, LICENSEE may retain a copy for archive purposes.
+* 7.3 Survival. The following provisions shall survive the expiration or termination of this Agreement: Articles 1, 3, 4, 5 and Sections 2.2, 2.3, 7.3, and 7.4.
+* 7.4 Notice. Any notices under this Agreement shall be in writing, shall specifically refer to this Agreement, and shall be sent by hand, recognized national overnight courier, confirmed facsimile transmission, confirmed electronic mail, or registered or certified mail, postage prepaid, return receipt requested. All notices under this Agreement shall be deemed effective upon receipt.
+* 7.5 Amendment and Waiver; Entire Agreement. This Agreement may be amended, supplemented, or otherwise modified only by means of a written instrument signed by all parties. Any waiver of any rights or failure to act in a specific instance shall relate only to such instance and shall not be construed as an agreement to waive any rights or fail to act in any other instance, whether or not similar. This Agreement constitutes the entire agreement among the parties with respect to its subject matter and supersedes prior agreements or understandings between the parties relating to its subject matter.
+* 7.6 Binding Effect; Headings. This Agreement shall be binding upon and inure to the benefit of the parties and their respective permitted successors and assigns. All headings are for convenience only and shall not affect the meaning of any provision of this Agreement.
+* 7.7 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the Commonwealth of Massachusetts, U.S.A., without regard to conflict of laws principles.
+*/
+
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.lang.Integer;
+import java.util.List;
+
+/**
+ * Created by haasb on 3/5/14.
+ */
+public class StrandOddsRatioUnitTest {
+ private static double DELTA_PRECISION = 0.001;
+
+ @DataProvider(name = "UsingSOR")
+ public Object[][] makeUsingSORData() {
+ List